import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
TidyTuesday data for 2025-07-1
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-07-01/weekly_gas_prices.csv') weekly_gas_prices
'date'] = pd.to_datetime(weekly_gas_prices['date']) weekly_gas_prices[
weekly_gas_prices
date | fuel | grade | formulation | price | |
---|---|---|---|---|---|
0 | 1990-08-20 | gasoline | regular | all | 1.191 |
1 | 1990-08-20 | gasoline | regular | conventional | 1.191 |
2 | 1990-08-27 | gasoline | regular | all | 1.245 |
3 | 1990-08-27 | gasoline | regular | conventional | 1.245 |
4 | 1990-09-03 | gasoline | regular | all | 1.242 |
... | ... | ... | ... | ... | ... |
22355 | 2025-06-23 | gasoline | premium | all | 4.128 |
22356 | 2025-06-23 | gasoline | premium | conventional | 3.950 |
22357 | 2025-06-23 | gasoline | premium | reformulated | 4.333 |
22358 | 2025-06-23 | diesel | all | NaN | 3.775 |
22359 | 2025-06-23 | diesel | ultra_low_sulfur | NaN | 3.775 |
22360 rows × 5 columns
'year'] = weekly_gas_prices['date'].dt.year weekly_gas_prices[
# create a new col fuel_grade by combining values from fuel and grade columns
'fuel_grade'] = weekly_gas_prices['fuel'] + '-' + weekly_gas_prices['grade']
weekly_gas_prices[ weekly_gas_prices
date | fuel | grade | formulation | price | year | fuel_grade | |
---|---|---|---|---|---|---|---|
0 | 1990-08-20 | gasoline | regular | all | 1.191 | 1990 | gasoline-regular |
1 | 1990-08-20 | gasoline | regular | conventional | 1.191 | 1990 | gasoline-regular |
2 | 1990-08-27 | gasoline | regular | all | 1.245 | 1990 | gasoline-regular |
3 | 1990-08-27 | gasoline | regular | conventional | 1.245 | 1990 | gasoline-regular |
4 | 1990-09-03 | gasoline | regular | all | 1.242 | 1990 | gasoline-regular |
... | ... | ... | ... | ... | ... | ... | ... |
22355 | 2025-06-23 | gasoline | premium | all | 4.128 | 2025 | gasoline-premium |
22356 | 2025-06-23 | gasoline | premium | conventional | 3.950 | 2025 | gasoline-premium |
22357 | 2025-06-23 | gasoline | premium | reformulated | 4.333 | 2025 | gasoline-premium |
22358 | 2025-06-23 | diesel | all | NaN | 3.775 | 2025 | diesel-all |
22359 | 2025-06-23 | diesel | ultra_low_sulfur | NaN | 3.775 | 2025 | diesel-ultra_low_sulfur |
22360 rows × 7 columns
Plotting
= plt.subplots(figsize=(8, 4))
fig, ax
= [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
intervals = ['#D6ECF3', '#87CEEB', '#1E90FF', '#0000CD', 'red']
colors
= mcolors.ListedColormap(colors)
cmap = mcolors.BoundaryNorm(intervals, cmap.N)
norm
= weekly_gas_prices.groupby(['year', 'fuel_grade'])['price'].max().reset_index()
df_max_price
= ['gasoline-all', 'gasoline-premium', 'gasoline-midgrade', 'gasoline-regular', 'diesel-all', 'diesel-ultra_low_sulfur', 'diesel-low_sulfur']
fuel_cat 'fuel_grade'] = pd.Categorical(df_max_price['fuel_grade'], categories=fuel_cat, ordered=True)
df_max_price[# sort by year and fuel_grade
= df_max_price.sort_values(by=['fuel_grade'])
df_max_price
# remove rows where fuel_grade contains "all"
= df_max_price[~df_max_price['fuel_grade'].str.contains('all')]
df_max_price
= ax.scatter(
s1 'year'],
df_max_price['fuel_grade'],
df_max_price[=df_max_price['price'],
c=cmap,
cmap=norm,
norm='s'
marker
)
ax.invert_yaxis()
= inset_axes(ax,
cbar_ax ="50%", # Width as a percent of the parent axis
width="5%", # Height as a percent of the parent axis
height='lower center', # Position inside the main plot
loc=(-0.025, 0.15, 0.5, 0.5),
bbox_to_anchor=ax.transAxes)
bbox_transform
= plt.colorbar(s1, cax=cbar_ax, shrink=0.5, orientation='horizontal')
cbar f'${interval:.0f}' for interval in intervals])
cbar.set_ticklabels([False)
cbar.outline.set_visible('Price per gallon', fontsize=8)
cbar.set_label('top')
cbar.ax.xaxis.set_label_position(=0, labelsize=8)
cbar.ax.tick_params(length
for label in ax.get_yticklabels():
if "diesel" in label.get_text():
'#666666')
label.set_color(
ax.set_yticks(ax.get_yticks())'-')[1] for k in ax.get_yticklabels()])
ax.set_yticklabels([k.get_text().split(
0.01, 0.925, '\n'.join("GASOLINE"), transform=ax.transAxes, fontsize=10,
ax.text(='top', fontfamily='Consolas')
verticalalignment0.01, 0.30, '\n'.join("DIESEL"), transform=ax.transAxes, fontsize=10,
ax.text(='top', color='#666666', fontfamily='Consolas')
verticalalignment'top', 'right']].set_visible(False)
ax.spines[[
# Label max and min for each fuel_grade
for grade in df_max_price['fuel_grade'].unique():
= df_max_price[df_max_price['fuel_grade'] == grade]
subset
# Skip if empty or NaNs
if subset.empty or subset['price'].isnull().all():
continue
= subset.loc[subset['price'].idxmax()]
max_row = subset.loc[subset['price'].idxmin()]
min_row
= dict(arrowstyle='<-', color='black', linewidth=0.8)
arrow
# Annotate max
ax.annotate(f"${max_row['price']:.2f}",
=(max_row['year'], max_row['fuel_grade']),
xy=(0, 13),
xytext='offset points',
textcoords='center', va='bottom',
ha=8, color='red',
fontsize=arrow
arrowprops
)# Annotate min
ax.annotate(f"${min_row['price']:.2f}",
=(min_row['year'], min_row['fuel_grade']),
xy=(0, -20),
xytext='offset points',
textcoords='center', va='bottom',
ha=8, color='blue',
fontsize=arrow
arrowprops
)'Year-wise maximum fuel prices in the US across five different categories. \n For each series, the highest and lowest prices are labeled.',x=0, ha='left', fontfamily='Serif')
fig.suptitle('gas_prices.png', dpi=300, bbox_inches='tight')
plt.savefig( plt.show()