import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import textwrap
TidyTuesday dataset of 2025-08-05
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-05/income_inequality_processed.csv')
income_inequality_processed = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-05/income_inequality_raw.csv') income_inequality_raw
income_inequality_processed
Entity | Code | Year | gini_mi_eq | gini_dhi_eq | |
---|---|---|---|---|---|
0 | Australia | AUS | 1989 | 0.431 | 0.304 |
1 | Australia | AUS | 1995 | 0.470 | 0.311 |
2 | Australia | AUS | 2001 | 0.481 | 0.320 |
3 | Australia | AUS | 2003 | 0.469 | 0.316 |
4 | Australia | AUS | 2004 | 0.467 | 0.316 |
... | ... | ... | ... | ... | ... |
942 | Vietnam | VNM | 2005 | NaN | 0.369 |
943 | Vietnam | VNM | 2007 | NaN | 0.401 |
944 | Vietnam | VNM | 2009 | NaN | 0.398 |
945 | Vietnam | VNM | 2011 | NaN | 0.364 |
946 | Vietnam | VNM | 2013 | NaN | 0.350 |
947 rows × 5 columns
income_inequality_raw
Entity | Code | Year | gini_disposable__age_total | gini_market__age_total | population_historical | owid_region | |
---|---|---|---|---|---|---|---|
0 | Afghanistan | AFG | -10000 | NaN | NaN | 14737.0 | NaN |
1 | Afghanistan | AFG | -9000 | NaN | NaN | 20405.0 | NaN |
2 | Afghanistan | AFG | -8000 | NaN | NaN | 28253.0 | NaN |
3 | Afghanistan | AFG | -7000 | NaN | NaN | 39120.0 | NaN |
4 | Afghanistan | AFG | -6000 | NaN | NaN | 54166.0 | NaN |
... | ... | ... | ... | ... | ... | ... | ... |
58920 | Zimbabwe | ZWE | 2019 | NaN | NaN | 15271330.0 | NaN |
58921 | Zimbabwe | ZWE | 2020 | NaN | NaN | 15526837.0 | NaN |
58922 | Zimbabwe | ZWE | 2021 | NaN | NaN | 15797165.0 | NaN |
58923 | Zimbabwe | ZWE | 2022 | NaN | NaN | 16069010.0 | NaN |
58924 | Zimbabwe | ZWE | 2023 | NaN | NaN | 16340778.0 | Africa |
58925 rows × 7 columns
= income_inequality_processed.dropna(subset=['gini_mi_eq'])
income_inequality_processed_noNA 'diff'] = income_inequality_processed_noNA['gini_mi_eq'] - income_inequality_processed_noNA['gini_dhi_eq']
income_inequality_processed_noNA[= income_inequality_processed_noNA.sort_values('Year').groupby('Entity').last()
df1 = df1.sort_values('diff', ascending=False)
df1 df1
Code | Year | gini_mi_eq | gini_dhi_eq | diff | |
---|---|---|---|---|---|
Entity | |||||
Belgium | BEL | 2021 | 0.486 | 0.255 | 0.231 |
Italy | ITA | 2020 | 0.563 | 0.335 | 0.228 |
Ireland | IRL | 2021 | 0.514 | 0.290 | 0.224 |
Austria | AUT | 2022 | 0.494 | 0.287 | 0.207 |
Germany | DEU | 2020 | 0.506 | 0.302 | 0.204 |
Norway | NOR | 2004 | 0.452 | 0.261 | 0.191 |
Czechia | CZE | 2016 | 0.444 | 0.254 | 0.190 |
Denmark | DNK | 2022 | 0.477 | 0.288 | 0.189 |
Greece | GRC | 2021 | 0.488 | 0.308 | 0.180 |
United Kingdom | GBR | 2021 | 0.479 | 0.302 | 0.177 |
Sweden | SWE | 2021 | 0.458 | 0.285 | 0.173 |
Romania | ROU | 2021 | 0.463 | 0.295 | 0.168 |
Spain | ESP | 2022 | 0.481 | 0.314 | 0.167 |
Lithuania | LTU | 2021 | 0.518 | 0.366 | 0.152 |
Slovakia | SVK | 2018 | 0.387 | 0.236 | 0.151 |
Luxembourg | LUX | 2021 | 0.434 | 0.288 | 0.146 |
Canada | CAN | 2021 | 0.436 | 0.291 | 0.145 |
Bulgaria | BGR | 2022 | 0.517 | 0.373 | 0.144 |
Netherlands | NLD | 2021 | 0.413 | 0.270 | 0.143 |
Estonia | EST | 2016 | 0.456 | 0.314 | 0.142 |
Australia | AUS | 2020 | 0.459 | 0.325 | 0.134 |
Israel | ISR | 2021 | 0.472 | 0.345 | 0.127 |
Finland | FIN | 2016 | 0.382 | 0.258 | 0.124 |
Japan | JPN | 2020 | 0.423 | 0.305 | 0.118 |
United States | USA | 2023 | 0.507 | 0.392 | 0.115 |
Brazil | BRA | 2015 | 0.555 | 0.446 | 0.109 |
Switzerland | CHE | 2019 | 0.401 | 0.310 | 0.091 |
South Africa | ZAF | 2017 | 0.706 | 0.616 | 0.090 |
Iceland | ISL | 2017 | 0.326 | 0.251 | 0.075 |
Dominican Republic | DOM | 2007 | 0.523 | 0.515 | 0.008 |
Plotting
= 0.460
cutoff = df1[df1['gini_mi_eq']>=cutoff]
set1 = df1[df1['gini_mi_eq']<cutoff] set2
= [0,1]
x
= sorted(set(set1['gini_mi_eq']).union(set(set1['gini_dhi_eq'])))
categories1 = sorted(set(set2['gini_mi_eq']).union(set(set2['gini_dhi_eq'])))
categories2 = categories1 + categories2
categories
= plt.subplots(1,2,figsize=(6, 8),sharey=True)
fig, (ax1,ax2) for ind, (row_ind, row) in enumerate(set1.iterrows()):
'gini_mi_eq'], row['gini_dhi_eq']], marker='o', label=row_ind, alpha=0.6)
ax1.plot(x, [row[if (row_ind in ['Germany', 'Denmark']):
0.5, row['gini_dhi_eq']))
ax1.annotate(row_ind, (elif (row_ind in ['Austria']):
1.01, row['gini_dhi_eq']-0.01))
ax1.annotate(row_ind, (else:
1.01, row['gini_dhi_eq']))
ax1.annotate(row_ind, (
for ind, (row_ind, row) in enumerate(set2.iterrows()):
'gini_mi_eq'], row['gini_dhi_eq']], marker='o', label=row_ind, alpha=0.6)
ax2.plot(x, [row[if (row_ind in ['Switzerland', 'Luxembourg', 'Finland', 'Iceland']):
0.4, row['gini_dhi_eq']))
ax2.annotate(row_ind, (else:
1.01, row['gini_dhi_eq']))
ax2.annotate(row_ind, (
'top', 'right', 'bottom', 'left']].set_visible(False)
ax1.spines[['top', 'right', 'bottom', 'left']].set_visible(False)
ax2.spines[[
ax1.set_xticks([])
ax2.set_xticks([])
='y', pad=15)
ax1.tick_params(axis='y', length=0)
ax1.tick_params(axis='y', length=0)
ax2.tick_params(axis
#ax1.set_ylabel(r'Income inequality $\longrightarrow\!\!\!\!\!\!>$', fontsize=12)
= textwrap.fill("Gini coefficient for different countries pre- (left) and post- (right) tax deductions. Data for the most recent year are shown. The plot is split into two columns at the dotted line for better visualization.",30)
title_text =.55,y=.75,s=title_text, fontsize=12, fontfamily='Serif')
fig.text(x
ax1.annotate('',
=(-0.05, 1), xycoords='axes fraction',
xy=(-0.05, 0), textcoords='axes fraction',
xytext=dict(arrowstyle='->', lw=10, mutation_scale=70, color='lightgrey'),
arrowprops=-1
zorder
)
ax1.annotate('Income inequality',
=(-0.09, 1), xycoords='axes fraction',
xy=(-0.09, 0.10), textcoords='axes fraction',
xytext=90
rotation
)
plt.tight_layout()
= ax1.transData.transform((0, cutoff))[1]
y_display = fig.transFigure.inverted().transform((0, y_display))[1]
y_fig
# Add horizontal line across the figure at the data-level cutoff
= Line2D([0.125, 0.9], [y_fig, y_fig], # X in figure coords, Y is now in figure coords too
line =fig.transFigure,
transform='lightgrey', linewidth=1, linestyle='dotted')
color
fig.add_artist(line)
#plt.savefig("income_inequality.png", dpi=300)
plt.show()