TidyTuesday dataset of October 21, 2025

import marimo as mo
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import textwrap

historic_station_met = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-10-21/historic_station_met.csv')
station_meta = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-10-21/station_meta.csv')

historic_station_met

	station	year	month	tmax	tmin	af	rain	sun
0	aberporth	1941	1	NaN	NaN	NaN	74.7	NaN
1	aberporth	1941	2	NaN	NaN	NaN	69.1	NaN
2	aberporth	1941	3	NaN	NaN	NaN	76.2	NaN
3	aberporth	1941	4	NaN	NaN	NaN	33.7	NaN
4	aberporth	1941	5	NaN	NaN	NaN	51.3	NaN
...	...	...	...	...	...	...	...	...
39143	yeovilton	2024	8	22.2	12.8	0.0	27.4	141.1
39144	yeovilton	2024	9	18.3	10.7	0.0	142.8	107.6
39145	yeovilton	2024	10	16.2	8.1	0.0	102.0	85.2
39146	yeovilton	2024	11	11.7	5.1	7.0	88.6	48.8
39147	yeovilton	2024	12	10.5	5.0	1.0	29.6	27.9

39148 rows × 8 columns

historic_station_met['year'] = pd.to_datetime(historic_station_met['year']).astype(int)

bins = [1850, 1925, 1950, 1975, 2000, 2025]
labels = ['till 1925', '1926–1950', '1951–1975', '1976–2000', '2001 onwards']
historic_station_met['quarter'] = pd.cut(historic_station_met['year'], bins=bins, labels=labels)

historic_station_met['tdiff'] = historic_station_met['tmax']-historic_station_met['tmin']
historic_station_met['station'] = historic_station_met['station'].str.capitalize()

historic_station_met.columns

Index(['station', 'year', 'month', 'tmax', 'tmin', 'af', 'rain', 'sun',
       'quarter', 'tdiff'],
      dtype='object')

df_grp = historic_station_met.groupby(['station','year', 'month', 'quarter']).agg({
    'tmax': 'max',
    'tmin': 'min',
    'tdiff': 'mean',
    'af': 'sum',
    'rain': 'sum',
     'sun': 'sum'
}).reset_index()
df_grp

	station	year	month	quarter	tmax	tmin	tdiff	af	rain	sun
0	Aberporth	1853	1	till 1925	NaN	NaN	NaN	0.0	0.0	0.0
1	Aberporth	1853	1	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
2	Aberporth	1853	1	1951–1975	NaN	NaN	NaN	0.0	0.0	0.0
3	Aberporth	1853	1	1976–2000	NaN	NaN	NaN	0.0	0.0	0.0
4	Aberporth	1853	1	2001 onwards	NaN	NaN	NaN	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...
381835	Yeovilton	2024	12	till 1925	NaN	NaN	NaN	0.0	0.0	0.0
381836	Yeovilton	2024	12	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
381837	Yeovilton	2024	12	1951–1975	NaN	NaN	NaN	0.0	0.0	0.0
381838	Yeovilton	2024	12	1976–2000	NaN	NaN	NaN	0.0	0.0	0.0
381839	Yeovilton	2024	12	2001 onwards	10.5	5.0	5.5	1.0	29.6	27.9

381840 rows × 10 columns

# Group by both station and year
grouped = historic_station_met.groupby(['station', 'year'])

# Dictionary to store correlation results
correlations = {}

# Loop through each (station, year) group
for (station, year), df_grp1 in grouped:
    # Compute correlation between 'rain' and 'sun'
    corr_matrix = df_grp1[['rain', 'sun']].corr()
    corr_value = corr_matrix.loc['rain', 'sun']

    # Store result with a tuple key
    correlations[(station, year)] = corr_value

correlation_df = pd.DataFrame.from_dict(
    correlations, orient='index', columns=['rain_sun_corr']
)

# Split the tuple index into two columns
correlation_df.index = pd.MultiIndex.from_tuples(correlation_df.index, names=['station', 'year'])
correlation_df = correlation_df.reset_index()

print(correlation_df)

        station  year  rain_sun_corr
0     Aberporth  1941            NaN
1     Aberporth  1942      -0.432952
2     Aberporth  1943      -0.527707
3     Aberporth  1944      -0.440227
4     Aberporth  1945      -0.251566
...         ...   ...            ...
3268  Yeovilton  2020      -0.653444
3269  Yeovilton  2021      -0.141005
3270  Yeovilton  2022      -0.438216
3271  Yeovilton  2023      -0.555453
3272  Yeovilton  2024      -0.234487

[3273 rows x 3 columns]

sns.scatterplot(data=correlation_df, x='year', y='rain_sun_corr', hue='station', alpha=0.5, legend=False)
plt.show()

df_grp[df_grp['quarter'] == '1926–1950']

	station	year	month	quarter	tmax	tmin	tdiff	af	rain	sun
1	Aberporth	1853	1	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
6	Aberporth	1853	2	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
11	Aberporth	1853	3	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
16	Aberporth	1853	4	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
21	Aberporth	1853	5	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...
381816	Yeovilton	2024	8	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
381821	Yeovilton	2024	9	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
381826	Yeovilton	2024	10	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
381831	Yeovilton	2024	11	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0
381836	Yeovilton	2024	12	1926–1950	NaN	NaN	NaN	0.0	0.0	0.0

76368 rows × 10 columns

##Plotting

col_palette = 'autumn_r' #'Wistia'
month_labels = ['J', 'F', 'M', 'A', 'M', 'J', 'J', 'A', 'S', 'O', 'N', 'D']
sns.set_context("talk", font_scale=2.2)  
bg_color = '#390099'
fg_color = '#eef4ed'

# Create a faceted stripplot

g = sns.catplot(
    data=df_grp,
    x='month',
    y='tmax',
    hue='tdiff',
    col='station',
    kind='strip',
    palette=col_palette,
    dodge=False,
    sharey=True,
    height=5,
    aspect=1.2,
    col_wrap=8,
    legend=False,
)
g.fig.patch.set_facecolor(bg_color)  

# Set axes background color
for ax in g.axes.flat:
    ax.set_facecolor(bg_color)  
    ax.tick_params(axis='y', colors=fg_color)
    for spine in ax.spines.values():
        spine.set_color(fg_color)

col_wrap = 8

for i, ax in enumerate(g.axes.flat):
    if i % col_wrap != 0:  # Not the first column in each row
        ax.set_ylabel('')
        ax.tick_params(axis='y', left=False, labelleft=False, colors=fg_color)
        ax.tick_params(axis='x', colors=fg_color)
        sns.despine(ax=ax,left=True)
    else:
        ax.tick_params(axis='x', colors=fg_color)

# Adjust layout
g.set_titles("{col_name}", color=fg_color)
g.set_axis_labels("", "")
g.set_xticklabels(month_labels, fontdict={'family': 'monospace', 'color': fg_color})
g.fig.text(-0.005, 0.5, 'Maximum temperature (°C)', va='center', rotation='vertical', color=fg_color)

norm = mcolors.Normalize(vmin=df_grp['tdiff'].min(), vmax=df_grp['tdiff'].max())
sm = cm.ScalarMappable(cmap='autumn_r', norm=norm)
sm.set_array([])  # Required for colorbar

#g.fig.subplots_adjust(right=0.85)
# Add the colorbar to the figure
cbar_ax = g.fig.add_axes([0.7, 0.08, 0.2, 0.01])  # [left, bottom, width, height]
cbar = g.fig.colorbar(sm, cax=cbar_ax, orientation='horizontal')
cbar.set_label('Temperature Difference', color=fg_color)

# Change tick label color
cbar.ax.xaxis.set_tick_params(color=fg_color)  # Tick marks
for label in cbar.ax.get_xticklabels():
    label.set_color(fg_color)  # Tick label text
title = 'Monthly variations in maximum temperature at 37 weather stations in the UK. Points are colored based on the difference in maximum and minimum temperatures.'    

g.fig.text(0.63, 0.13,textwrap.fill(title, width=55), color=fg_color, family='Serif', fontweight='bold', fontsize=38)           
plt.tight_layout()
plt.savefig("UK_weather.png", dpi=300, bbox_inches='tight', pad_inches=0.2)
plt.show()