WHO Statistical Performance Indicators

Top 10 countries with the biggest improvement in SPI.
relplot
TidyTuesday
PyDyTuesday
Author

Manish Datt

Published

November 25, 2025

TidyTuesday dataset of November 25, 2025

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import matplotlib.transforms as mtransforms
spi_indicators = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-11-25/spi_indicators.csv')
spi_indicators
iso3c country region income year population overall_score data_use_score data_services_score data_products_score data_sources_score data_infrastructure_score
0 DNK Denmark Europe & Central Asia High income 2023 5946952 95.255833 100.0 98.466667 90.71250 87.100 100.0
1 FIN Finland Europe & Central Asia High income 2023 5584264 95.115417 100.0 96.433333 90.96875 88.175 100.0
2 POL Poland Europe & Central Asia High income 2023 36685849 94.653750 100.0 97.300000 84.54375 91.425 100.0
3 SWE Sweden Europe & Central Asia High income 2023 10536632 94.410000 100.0 96.000000 90.57500 85.475 100.0
4 ESP Spain Europe & Central Asia High income 2023 48373336 94.325000 100.0 91.200000 92.62500 87.800 100.0
... ... ... ... ... ... ... ... ... ... ... ... ...
4335 VIR Virgin Islands (U.S.) Latin America & Caribbean High income 2004 108466 NaN 0.0 NaN NaN NaN NaN
4336 PSE West Bank and Gaza Middle East & North Africa Lower middle income 2004 3236626 NaN 20.0 NaN NaN NaN NaN
4337 YEM Yemen, Rep. Middle East & North Africa Low income 2004 20733406 NaN 20.0 NaN NaN NaN NaN
4338 ZMB Zambia Sub-Saharan Africa Lower middle income 2004 11188040 NaN 40.0 NaN NaN NaN NaN
4339 ZWE Zimbabwe Sub-Saharan Africa Lower middle income 2004 12160881 NaN 20.0 NaN NaN NaN NaN

4340 rows × 12 columns

spi_indicators['year'] = pd.to_datetime(spi_indicators['year'], format='%Y')
spi_indicators.columns
Index(['iso3c', 'country', 'region', 'income', 'year', 'population',
       'overall_score', 'data_use_score', 'data_services_score',
       'data_products_score', 'data_sources_score',
       'data_infrastructure_score'],
      dtype='object')
country_desc = spi_indicators.groupby(['country'])['overall_score'].describe()
country_desc['diff'] = country_desc['max']-country_desc['min']
top10_diff = country_desc.nlargest(10, 'diff')
spi_top10 = spi_indicators[spi_indicators['country'].isin(top10_diff.index)]
spi_top10
iso3c country region income year population overall_score data_use_score data_services_score data_products_score data_sources_score data_infrastructure_score
54 SAU Saudi Arabia Middle East & North Africa High income 2023-01-01 36947025 81.533333 70.0 88.366667 75.22500 79.075000 95.0
61 UZB Uzbekistan Europe & Central Asia Lower middle income 2023-01-01 36412350 80.332083 80.0 76.633333 85.04375 64.983333 95.0
71 ARE United Arab Emirates Middle East & North Africa High income 2023-01-01 9516871 77.635000 90.0 79.500000 71.15000 62.525000 85.0
80 BFA Burkina Faso Sub-Saharan Africa Low income 2023-01-01 23251485 74.309167 100.0 69.000000 80.38750 42.158333 80.0
92 ZWE Zimbabwe Sub-Saharan Africa Lower middle income 2023-01-01 16665409 69.732500 100.0 67.033333 83.38750 38.241667 60.0
... ... ... ... ... ... ... ... ... ... ... ... ...
4289 SAU Saudi Arabia Middle East & North Africa High income 2004-01-01 23661808 NaN 20.0 NaN NaN NaN NaN
4299 SOM Somalia Sub-Saharan Africa Low income 2004-01-01 10117354 NaN 20.0 NaN NaN NaN NaN
4327 ARE United Arab Emirates Middle East & North Africa High income 2004-01-01 3993339 NaN 20.0 NaN NaN NaN NaN
4331 UZB Uzbekistan Europe & Central Asia Lower middle income 2004-01-01 25864350 NaN 40.0 NaN NaN NaN NaN
4339 ZWE Zimbabwe Sub-Saharan Africa Lower middle income 2004-01-01 12160881 NaN 20.0 NaN NaN NaN NaN

200 rows × 12 columns

df_melt_top10 = pd.melt(spi_top10, id_vars=['country','year'], \
            value_vars=['overall_score', 'data_use_score', 'data_services_score', 'data_products_score', 'data_sources_score', 'data_infrastructure_score'],\
            var_name='Score_type', value_name='Scores')
df_melt_top10
country year Score_type Scores
0 Saudi Arabia 2023-01-01 overall_score 81.533333
1 Uzbekistan 2023-01-01 overall_score 80.332083
2 United Arab Emirates 2023-01-01 overall_score 77.635000
3 Burkina Faso 2023-01-01 overall_score 74.309167
4 Zimbabwe 2023-01-01 overall_score 69.732500
... ... ... ... ...
1195 Saudi Arabia 2004-01-01 data_infrastructure_score NaN
1196 Somalia 2004-01-01 data_infrastructure_score NaN
1197 United Arab Emirates 2004-01-01 data_infrastructure_score NaN
1198 Uzbekistan 2004-01-01 data_infrastructure_score NaN
1199 Zimbabwe 2004-01-01 data_infrastructure_score NaN

1200 rows × 4 columns

sns.set_context("notebook", font_scale=1.5)

g = sns.relplot(data=df_melt_top10, x='year', y='Scores', hue='country', kind='line', col='Score_type', \
            col_wrap=3, legend=False, col_order=['data_use_score', 'data_sources_score', 'data_infrastructure_score', 'data_products_score', 'data_services_score', 'overall_score'])

original_ticks = g.axes.flatten()[0].get_xticks()   # or use one representative axis

keep = original_ticks#[::2]
#if original_ticks[-1] not in keep:
#    keep = list(keep) + [original_ticks[-1]]

for ind, ax in enumerate(g.axes.flatten()):
    new_title = ' '.join(ax.get_title().removesuffix('_score').split()[-1].split('_')).capitalize()
    
    ax.set_title(new_title, y=0.9, color='#555555')
#    ax.tick_params(axis='x', rotation=45)
    ax.set_xlabel("")
    ax.set_xticks(keep)
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%y"))
    ax.margins(x=0)
    ax.set_facecolor('#F5F5F5')
    for labelx, labely in zip(ax.get_xticklabels(), ax.get_yticklabels()):
        labelx.set_fontfamily('monospace')
        labely.set_fontfamily('monospace')
    if (ind not in [0,3]):
        ax.spines['left'].set_visible(False)
        ax.tick_params(axis='y', left=False, labelleft=False)
        labels = ax.get_xticklabels()
        if labels:
            labels[1].set_visible(False)   

fig = g.fig
ax = g.axes.flatten()[1]   # facet to annotate

# create an offset transform: 5 points right, 0 points up
text_trans = mtransforms.offset_copy(ax.transData, fig=fig,
                                     x=5, y=0, units='points')

y_offset = 3
for line, country in zip(ax.lines, df_melt_top10['country'].unique()):
    x_end = line.get_xdata()[-1]
    y_end = line.get_ydata()[-1]

    if (country in ['Uzbekistan', 'Lebanon']):
        y_end += y_offset
    if (country in ['Marshall Islands', 'Zimbabwe']):
        y_end -= y_offset
    

    fig.text(
        x_end,
        y_end,
        country,
        transform=text_trans,   
        ha='left',
        va='center',
        color=line.get_color(),
        clip_on=False           
    )


g.fig.subplots_adjust(wspace=0, hspace=0)
g.fig.suptitle("Top 10 countries with the greatest improvement in the overall score \nof the World Bank Statistical Performance Indicators from 2004 \u2013 2023", fontfamily='Serif', y=1.025)
g.fig.patch.set_facecolor('#F5F5F5')
plt.savefig('WHO_SPI.png', dpi=300, bbox_inches='tight')
plt.show()