import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
TidyTuesday dataset of September 30, 2025
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-30/cranes.csv') cranes
cranes
date | observations | comment | weather_disruption | |
---|---|---|---|---|
0 | 2024-10-03 | 408.0 | Last count of season | False |
1 | 2024-09-30 | 14940.0 | NaN | False |
2 | 2024-09-26 | NaN | Canceled/No count | True |
3 | 2024-09-23 | 18450.0 | NaN | False |
4 | 2024-09-19 | 14900.0 | NaN | False |
... | ... | ... | ... | ... |
1543 | 1994-03-28 | NaN | Canceled/No count | False |
1544 | 1994-03-27 | NaN | Canceled/No count | False |
1545 | 1994-03-26 | NaN | Canceled/No count | False |
1546 | 1994-03-25 | 200.0 | NaN | False |
1547 | 1994-03-24 | 200.0 | NaN | False |
1548 rows × 4 columns
'date'] = pd.to_datetime(cranes['date'])
cranes[# add month and year columns. Month should be three letter
'year'] = cranes['date'].dt.year
cranes['month'] = cranes['date'].dt.month
cranes[
'month'] = cranes['date'].dt.strftime('%b')
cranes['year_half'] = cranes['month'].apply(lambda x: 'first_half' if x in ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun'] else 'second_half') cranes[
cranes
date | observations | comment | weather_disruption | year | month | year_half | |
---|---|---|---|---|---|---|---|
0 | 2024-10-03 | 408.0 | Last count of season | False | 2024 | Oct | second_half |
1 | 2024-09-30 | 14940.0 | NaN | False | 2024 | Sep | second_half |
2 | 2024-09-26 | NaN | Canceled/No count | True | 2024 | Sep | second_half |
3 | 2024-09-23 | 18450.0 | NaN | False | 2024 | Sep | second_half |
4 | 2024-09-19 | 14900.0 | NaN | False | 2024 | Sep | second_half |
... | ... | ... | ... | ... | ... | ... | ... |
1543 | 1994-03-28 | NaN | Canceled/No count | False | 1994 | Mar | first_half |
1544 | 1994-03-27 | NaN | Canceled/No count | False | 1994 | Mar | first_half |
1545 | 1994-03-26 | NaN | Canceled/No count | False | 1994 | Mar | first_half |
1546 | 1994-03-25 | 200.0 | NaN | False | 1994 | Mar | first_half |
1547 | 1994-03-24 | 200.0 | NaN | False | 1994 | Mar | first_half |
1548 rows × 7 columns
= cranes.groupby(['year','year_half', 'month'])['observations'].sum().reset_index() df_obs
= {'Mar':'Mars', 'Apr':'April', 'Aug':'Augusti', 'Sep':'September', 'Oct':'Oktober'}
month_mapping = '#135E4B'
bg_color = '#CCDCDB'
fg_color = ['#FCC5F8','#B3F7FE','#E7F8BE']
custom_map =1.75)
sns.set_theme(font_scale'white')
sns.set_style(= sns.catplot(data=df_obs, x='year', y='observations', col='year_half', kind='point', hue='month', \
g =2, alpha=1, legend=True, linestyles=':', height=5, palette=custom_map)
aspect="")
g.set_titles(template
= list(g._legend_data.keys())
legend_labels
# remove legend
g._legend.remove()# set x-tick at interval of 10
for ax in g.axes.flat:
range(0,32,5))
ax.set_xticks("")
ax.set_xlabel("")
ax.set_ylabel(True,linestyle=':', linewidth=0.7,)
ax.yaxis.grid(# ax.xaxis.grid(True,linestyle=':', linewidth=0.3,)
'left','bottom']].set_visible(False)
ax.spines[[= [f'{int(x/1000)}K' if x > 0 else 0 for x in ax.get_yticks()]
yticks ='monospace')
ax.set_yticklabels(yticks, fontfamily='monospace')
ax.set_xticklabels(ax.get_xticklabels(), fontfamily
ax.set_facecolor(bg_color)=fg_color)
ax.tick_params(colors
# Create separate legends for each subplot
for ind, ax in enumerate(g.axes.flat):
# Get all lines in the subplot
= ax.lines
lines
# Filter lines that have actual data (non-NaN y-values)
= []
valid_lines for line in lines:
# Skip error bars (unlabeled lines)
if line.get_label() == '':
continue
# Check if line has non-NaN data
= line.get_ydata()
ydata if not np.all(np.isnan(ydata)):
valid_lines.append(line)
# Extract labels from valid lines
= [line.get_label() for line in valid_lines]
labels
# Sort labels numerically (if possible) or alphabetically
try:
# Try to convert labels to integers for numerical sorting
= sorted(range(len(labels)), key=lambda i: int(labels[i]))
sorted_indices except ValueError:
# Fall back to alphabetical sorting if labels aren't numbers
= sorted(range(len(labels)), key=lambda i: labels[i])
sorted_indices
# Reorder lines and labels
= [valid_lines[i] for i in sorted_indices]
sorted_lines = [labels[i] for i in sorted_indices]
sorted_labels # print(sorted_labels)
= [Line2D([], [], marker='none', linestyle='none')
dummy_handles for _ in sorted_lines]
# Add legend to subplot
if ind==0:
= ax.legend(
legend =dummy_handles,
handles=[month_mapping[label] for label in legend_labels[:2]],
labels='month',
title='upper left',
loc=False,
frameon=3,
ncol=(0.05, 0.9)
bbox_to_anchor
)if ind>0:
= ax.legend(
legend =dummy_handles,
handles=[month_mapping[label] for label in legend_labels[2:]],
labels='month',
title='upper left',
loc=False,
frameon=3,
ncol=(0.05, 0.9)
bbox_to_anchor
)for text, line in zip(legend.get_texts(), sorted_lines):
text.set_color(line.get_color())
# Remove markers from legend handles
'')
legend.set_title(
0][0].tick_params(labelleft=False)
g.axes[0][1].tick_params(labelleft=True)
g.axes[0][1].tick_params(axis='y', labelleft=True, pad=20)
g.axes[0][1].set_yticklabels(yticks, ha='left')
g.axes[=0.05)
g.fig.subplots_adjust(wspace'Crane Observations at Lake Hornborgasjön in Sweden', fontsize=24, family='Serif', \
g.fig.suptitle(='bold', color=fg_color)
weight
g.fig.patch.set_facecolor(bg_color)"cranes_observations.png", dpi=300, bbox_inches='tight', pad_inches=0.1)
plt.savefig( plt.show()