import pandas as pd
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-06-10/judges_appointments.csv')
judges_appointments = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-06-10/judges_people.csv') judges_people
judges_appointments
judge_id | court_name | court_type | president_name | president_party | nomination_date | predecessor_last_name | predecessor_first_name | senate_confirmation_date | commission_date | chief_judge_begin | chief_judge_end | retirement_from_active_service | termination_date | termination_reason | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3419 | U. S. District Court, Southern District of New... | USDC | Barack Obama | Democratic | 07/28/2011 | Kaplan | Lewis A. | 03/22/2012 | 03/23/2012 | NaN | NaN | NaN | NaN | NaN |
1 | 1 | U. S. District Court, Eastern District of New ... | USDC | Franklin D. Roosevelt | Democratic | 02/03/1936 | new | NaN | 02/12/1936 | 02/15/1936 | NaN | NaN | 02/15/1966 | 05/28/1971 | Death |
2 | 2 | U. S. District Court, Western District of Penn... | USDC | Rutherford B. Hayes | Republican | 01/06/1880 | Ketcham | Winthrop | 01/14/1880 | 01/14/1880 | NaN | NaN | NaN | 02/09/1891 | Appointment to Another Judicial Position |
3 | 3 | U. S. District Court, Northern District of Ala... | USDC | Ronald Reagan | Republican | 07/22/1982 | McFadden | Frank H. | 08/18/1982 | 08/18/1982 | NaN | NaN | 05/31/1996 | NaN | NaN |
4 | 4 | U. S. District Court, District of New Jersey | USDC | Jimmy Carter | Democratic | 09/28/1979 | Barlow | George H. | 10/31/1979 | 11/02/1979 | NaN | NaN | 02/15/1994 | 12/02/2009 | Death |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4197 | 2201 | U. S. District Courts, Albemarle, Cape Fear & ... | USDC | Reassignment | Reassignment | NaN | new | NaN | NaN | 02/13/1801 | NaN | NaN | NaN | 03/04/1802 | Death |
4198 | 2689 | U. S. District Court, Eastern District of Miss... | USDC | Harry S Truman | Democratic | 01/13/1949 | Harper | Roy Winfield | 01/31/1949 | 02/02/1949 | NaN | NaN | 01/05/1971 | 02/13/1994 | Death |
4199 | 1126 | U. S. Court of Appeals for the Ninth Circuit | USCA | William H. Taft | Republican | 12/12/1910 | new | NaN | 01/31/1911 | 02/08/1911 | NaN | NaN | 01/31/1928 | 11/30/1928 | Retirement |
4200 | 1453 | U. S. Court of Appeals for the Second Circuit | USCA | Reassignment | Reassignment | NaN | new | NaN | NaN | 07/01/1929 | NaN | NaN | 09/06/1940 | 09/05/1943 | Death |
4201 | 2689 | U. S. District Court, Western District of Miss... | USDC | Harry S Truman | Democratic | 01/13/1949 | Harper | Roy Winfield | 01/31/1949 | 02/02/1949 | NaN | NaN | 01/05/1971 | 02/13/1994 | Death |
4202 rows × 15 columns
'commission_date'] = pd.to_datetime(judges_appointments['commission_date'])
judges_appointments['commission_date'].dt.year.min() judges_appointments[
1789.0
judges_people
judge_id | name_first | name_middle | name_last | name_suffix | birth_date | birthplace_city | birthplace_state | death_date | death_city | death_state | gender | race | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3419 | Ronnie | NaN | Abrams | NaN | 1968.0 | New York | NY | NaN | NaN | NaN | F | White |
1 | 1 | Matthew | T. | Abruzzo | NaN | 1889.0 | Brooklyn | NY | 1971.0 | Potomac | MD | M | White |
2 | 2 | Marcus | Wilson | Acheson | NaN | 1828.0 | Washington | PA | 1906.0 | Pittsburgh | PA | M | White |
3 | 3 | William | Marsh | Acker | Jr. | 1927.0 | Birmingham | AL | NaN | NaN | NaN | M | White |
4 | 4 | Harold | Arnold | Ackerman | NaN | 1928.0 | Newark | NJ | 2009.0 | West Orange | NJ | M | White |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3527 | 3391 | Jennifer | Guerin | Zipps | NaN | 1964.0 | Ashland | OH | NaN | NaN | NaN | F | White |
3528 | 2687 | Alfonso | Joseph | Zirpoli | NaN | 1905.0 | Denver | CO | 1995.0 | San Francisco | CA | M | White |
3529 | 2688 | William | J. | Zloch | NaN | 1944.0 | Fort Lauderdale | FL | NaN | NaN | NaN | M | White |
3530 | 2690 | Rya | Weickert | Zobel | NaN | 1931.0 | Zwickau | Germany | NaN | NaN | NaN | F | White |
3531 | 3106 | Jack | NaN | Zouhary | NaN | 1951.0 | Toledo | OH | NaN | NaN | NaN | M | White |
3532 rows × 13 columns
"gender"]).size() judges_people.groupby([
gender
F 389
M 3143
dtype: int64
'race']=judges_people['race'].fillna("Others") judges_people[
= judges_people.groupby(["gender","race"]).count().sort_values(["gender","judge_id"], ascending=[True,False])
df_grp df_grp
judge_id | name_first | name_middle | name_last | name_suffix | birth_date | birthplace_city | birthplace_state | death_date | death_city | death_state | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|
gender | race | |||||||||||
F | White | 293 | 293 | 265 | 293 | 0 | 293 | 293 | 293 | 28 | 24 | 24 |
African American | 51 | 51 | 50 | 51 | 0 | 51 | 51 | 51 | 3 | 3 | 3 | |
Hispanic | 29 | 29 | 21 | 29 | 0 | 29 | 29 | 29 | 1 | 1 | 1 | |
Asian American | 9 | 9 | 9 | 9 | 0 | 9 | 9 | 9 | 0 | 0 | 0 | |
Others | 3 | 3 | 3 | 3 | 0 | 3 | 3 | 3 | 0 | 0 | 0 | |
African Am./Hispanic | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | |
American Indian | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | |
Hispanic/Asian Am. | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | |
White/Asian Am. | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | |
M | White | 2871 | 2871 | 2483 | 2871 | 299 | 2870 | 2848 | 2869 | 1906 | 1350 | 1352 |
African American | 150 | 150 | 135 | 150 | 41 | 150 | 150 | 150 | 42 | 38 | 38 | |
Hispanic | 87 | 87 | 70 | 87 | 5 | 87 | 86 | 86 | 15 | 9 | 9 | |
Asian American | 22 | 22 | 18 | 22 | 0 | 22 | 22 | 22 | 6 | 4 | 4 | |
Others | 6 | 6 | 6 | 6 | 1 | 6 | 6 | 6 | 1 | 1 | 1 | |
American Indian | 2 | 2 | 2 | 2 | 0 | 2 | 2 | 2 | 0 | 0 | 0 | |
Pac. Isl./Asian Am. | 2 | 2 | 2 | 2 | 0 | 2 | 2 | 2 | 0 | 0 | 0 | |
African Am./Hispanic | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | |
Hispanic/White | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | |
Pac. Isl./White | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 |
= df_grp["judge_id"].reset_index()
df1 df1
gender | race | judge_id | |
---|---|---|---|
0 | F | White | 293 |
1 | F | African American | 51 |
2 | F | Hispanic | 29 |
3 | F | Asian American | 9 |
4 | F | Others | 3 |
5 | F | African Am./Hispanic | 1 |
6 | F | American Indian | 1 |
7 | F | Hispanic/Asian Am. | 1 |
8 | F | White/Asian Am. | 1 |
9 | M | White | 2871 |
10 | M | African American | 150 |
11 | M | Hispanic | 87 |
12 | M | Asian American | 22 |
13 | M | Others | 6 |
14 | M | American Indian | 2 |
15 | M | Pac. Isl./Asian Am. | 2 |
16 | M | African Am./Hispanic | 1 |
17 | M | Hispanic/White | 1 |
18 | M | Pac. Isl./White | 1 |
def process_group(group):
= group.nlargest(4, 'judge_id')
top_rows = group.iloc[4:]
remaining_rows if not remaining_rows.empty:
= remaining_rows['judge_id'].sum()
other_sum = pd.DataFrame({'gender': [group.name], 'race': ['Others'], 'judge_id': [other_sum]})
other_row return pd.concat([top_rows, other_row], ignore_index=True)
return top_rows
# Apply function to each group
= df1.groupby('gender', group_keys=False).apply(process_group)
df_grouped
df_grouped
gender | race | judge_id | |
---|---|---|---|
0 | F | White | 293 |
1 | F | African American | 51 |
2 | F | Hispanic | 29 |
3 | F | Asian American | 9 |
4 | F | Others | 7 |
0 | M | White | 2871 |
1 | M | African American | 150 |
2 | M | Hispanic | 87 |
3 | M | Asian American | 22 |
4 | M | Others | 13 |
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from PIL import Image
import textwrap
# Define race colors
= {
race_colors 'White': (160, 200, 220),
'African American': (70, 130, 180),
'Hispanic': (0, 0, 128),
'Asian American': (30, 144, 255),
'Others': (0, 0, 0)
}= df_grouped.groupby("gender").apply(lambda x: dict(zip(x["race"], x["judge_id"]))).to_dict()
df_grouped_dict
= {gender: {race: (count / sum(race_counts.values())) * 100 for race, count in race_counts.items()}
df_percentages for gender, race_counts in df_grouped_dict.items()}
# Function to recolor an icon with stacked race colors
def recolor_icon_layers(icon_path, race_counts, race_colors):
= Image.open(icon_path).convert("RGBA") # Convert to RGBA
img = np.array(img) # Convert image to array
data
= sum(race_counts.values())
total_count = data.shape[0]
height = 0
y_start
for race, count in race_counts.items():
= int((count / total_count) * height)
layer_height = y_start + layer_height
y_end = data[y_start:y_end, :, 3] > 0 # Keep only non-transparent pixels
mask -1][mask] = race_colors.get(race, (128, 128, 128)) # Apply race color
data[y_start:y_end, :, := y_end
y_start
return Image.fromarray(data)
= plt.subplots(figsize=(6, 4))
fig, ax
# Calculate total counts for scaling
= sum(df_grouped_dict.get("F", {}).values())
total_category1 = sum(df_grouped_dict.get("M", {}).values())
total_category2
= {race: (r/255, g/255, b/255) for race, (r, g, b) in race_colors.items()}
race_colors_mpl
for i, (category, race_counts) in enumerate(df_grouped_dict.items()):
= "person-dress.png" if category == "F" else "person.png"
icon_path = recolor_icon_layers(icon_path, race_counts, race_colors)
icon_array
= sum(race_counts.values()) / max(total_category1, total_category2)
scale_factor = 0.8 # Adjust zoom level
base_zoom = OffsetImage(np.array(icon_array), zoom=base_zoom * scale_factor)
imagebox
= AnnotationBbox(imagebox, (i, 0), frameon=False, xycoords="data", box_alignment=(0.5, 0))
ab
ax.add_artist(ab)
= df_percentages[category]
percentages for j, (race, percent) in enumerate(reversed(percentages.items())):
if(category == "F"):
+0.3, j*150, f"{percent:.1f}%", ha='right', fontsize=10, color=race_colors_mpl[race])
ax.text(ielse:
+0.4, j*250, f"{race}: {percent:.1f}%", ha='left', fontsize=10, color=race_colors_mpl[race])
ax.text(i
0, 1000, f"Female judges: {total_category1}", ha='center', fontsize=10, color="black")
ax.text(1, 4000, f"Male judges: {total_category2:,}", ha='center', fontsize=10, color="black")
ax.text(
0, 1])
ax.set_xticks([-0.5, 1.5)
ax.set_xlim(0, max(total_category1, total_category2) + 100)
ax.set_ylim(= f"Among the US judges appointed from {judges_appointments['commission_date'].dt.year.min():.0f} to {judges_appointments['commission_date'].dt.year.max():.0f}, there were about 8 male judges for every female judge. The proportion of the top four races is shown in different colors."
title = "\n".join(textwrap.wrap(title, width=30))
wrapped_title ='left', pad=75)
ax.set_title(wrapped_title, loc"off")
ax.axis("whitesmoke")
fig.patch.set_facecolor("judges_appoint.png", bbox_inches="tight", dpi=300)
fig.savefig( plt.show()