The Languages of the World

Distribution of language status for top 10 countries with most languages.
ObservablePlot
TidyTuesday
Author

Manish Datt

Published

December 23, 2025

TidyTuesday dataset of December 23, 2025

Language Data Tables

Languages

Endangered Status

Families

Combined Data

Import required libraries

    <link href="https://unpkg.com/tabulator-tables@6.3.1/dist/css/tabulator.min.css" rel="stylesheet">
    <script src="https://cdn.jsdelivr.net/npm/d3@7"></script>
    <script src="https://cdn.jsdelivr.net/npm/@observablehq/plot@0.6.11/dist/plot.umd.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/htl@0.3.1/dist/htl.min.js"></script>
    <script type="text/javascript" src="https://unpkg.com/tabulator-tables@6.3.1/dist/js/tabulator.min.js"></script>
    <script src="https://unpkg.com/papaparse@5.4.1/papaparse.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/danfojs@1.1.2/lib/bundle.js"></script>

<script type="module">
    // Import the ES module version of i18n-iso-countries
    import countries from 'https://cdn.skypack.dev/i18n-iso-countries';

    async function init() {
        // Fetch the English language data
        const response = await fetch('https://cdn.jsdelivr.net/npm/i18n-iso-countries@7.11.0/langs/en.json');
        const data = await response.json();
        
        // Register the English locale
        countries.registerLocale(data);
        
        // Make the getCountryName function available globally
        window.getCountryName = function(code) {
            return countries.getName(code.toUpperCase(), 'en') || code;
        };
    }

    init();
</script>

Create placeholders

    <div class="container">
        <h3>Languages</h3>
        <div id="languages-table"></div>

        <div class="row">
            <div class="col-md-6">
                <h3>Endangered Status</h3>
                <div id="endangered-table"></div>
            </div>
            <div class="col-md-6">
                <h3>Families</h3>
                <div id="families-table"></div>
            </div>
        </div>

        <h3>Combined Data</h3>
        <div class="mb-4" id="combined-table"></div>

        <div>
            <div id="stacked-barplot"></div>
            <div id="status-filters" style="margin-bottom: 1px;"></div>
        </div>
    </div>

Plotting


    <script>
        window.addEventListener('load', () => {
            Promise.all([
                fetch('endangered_status.csv').then(r => r.text()),
                fetch('families.csv').then(r => r.text()),
                fetch('languages.csv').then(r => r.text())
            ])
            .then(([endText, famText, langText]) => {
                const endData = Papa.parse(endText, {header: true}).data;
                const famData = Papa.parse(famText, {header: true}).data;
                const langData = Papa.parse(langText, {header: true}).data;

                // Create maps for merging
                const endangeredMap = new Map(endData.map(row => [row.id, row]));
                const familiesMap = new Map(famData.map(row => [row.id, row]));

                // Merge data using the original approach
                const combinedData = langData.map(lang => {
                    const end = endangeredMap.get(lang.id);
                    const fam = familiesMap.get(lang.family_id);
                    return {
                        id: lang.id,
                        name: lang.name,
                        latitude: lang.latitude,
                        longitude: lang.longitude,
                        iso639P3code: lang.iso639P3code,
                        countries: lang.countries,
                        glottocode: lang.glottocode,
                        macroarea: lang.macroarea,
                        family_id: lang.family_id,
                        family: fam ? fam.family : '',
                        status_code: end ? end.status_code : '',
                        status_label: end ? end.status_label : ''
                    };
                });

                // Create tables
                new Tabulator("#endangered-table", {
                    data: endData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Status Code", field: "status_code"},
                        {title: "Status Label", field: "status_label"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                new Tabulator("#families-table", {
                    data: famData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Family", field: "family"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                new Tabulator("#languages-table", {
                    data: langData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Name", field: "name"},
                        {title: "Latitude", field: "latitude"},
                        {title: "Longitude", field: "longitude"},
                        {title: "ISO 639-3 Code", field: "iso639P3code"},
                        {title: "Countries", field: "countries"},
                        {title: "Glottocode", field: "glottocode"},
                        {title: "Macroarea", field: "macroarea"},
                        {title: "Family ID", field: "family_id"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                new Tabulator("#combined-table", {
                    data: combinedData,
                    columns: [
                        {title: "ID", field: "id"},
                        {title: "Name", field: "name"},
                        {title: "Latitude", field: "latitude"},
                        {title: "Longitude", field: "longitude"},
                        {title: "ISO 639-3 Code", field: "iso639P3code"},
                        {title: "Countries", field: "countries"},
                        {title: "Glottocode", field: "glottocode"},
                        {title: "Macroarea", field: "macroarea"},
                        {title: "Family ID", field: "family_id"},
                        {title: "Family", field: "family"},
                        {title: "Status Code", field: "status_code"},
                        {title: "Status Label", field: "status_label"}
                    ],
                    layout: "fitColumns",
                    pagination: "local",
                    paginationSize: 5
                });

                // Process data for status grouping using danfojs
                const df = new dfd.DataFrame(combinedData);
                
                // Process countries column to handle multiple countries per language
                const explodedData = [];
                combinedData.forEach(row => {
                    const countries = row.countries ? row.countries.split(';').map(c => c.trim()).filter(c => c) : [];
                    countries.forEach(country => {
                        explodedData.push({
                            country: country,
                            status_code: row.status_code,
                            status_label: row.status_label,
                            language: row.name
                        });
                    });
                });

                // Create new dataframe with exploded countries
                const explodedDf = new dfd.DataFrame(explodedData);
                
                // Group by country and status code, then count languages
                const grouped = explodedDf.groupby(['country', 'status_code']);
                const result = grouped.count().resetIndex();
                
                // The count column might be named 'language' or something else, let's check
                // and rename it to 'count' if needed
                if (result.columns.includes('language')) {
                    result.rename({language: 'count'}, inplace=true);
                } else if (result.columns.includes('status_label')) {
                    result.rename({status_label: 'count'}, inplace=true);
                }
                
                // Sort by count descending
                const countColumn = result.columns.find(col => col !== 'country' && col !== 'status_code');
                
                if (countColumn) {
                    result.sortValues(countColumn, {ascending: false}, inplace=true);
                }
                
                // Convert the result to the format Tabulator expects
                const tableData = result.values.map((row, index) => {
                    return {
                        id: index,
                        country: getCountryName(row[0]),
                        status_code: row[1],
                        status_label_count: row[2],
                        language_count: row[3]
                    };
                });
                
                // Create column definitions using column names
                const columnDefinitions = [
                    {title: "Country", field: "country"},
                    {title: "Status Code", field: "status_code"},
                    {title: "Status Label Count", field: "status_label_count"},
                    {title: "Language Count", field: "language_count"}
                ];
                

                // Process data for stacked bar plot
                // Sort grouped data by status code first
                const sortedResult = [...result.values].sort((a, b) => a[1].localeCompare(b[1]));

                // Get top 10 countries by total language count
                const countryTotals = new Map();
                result.values.forEach(row => {
                    const country = row[0];
                    const langCount = row[3]; // language_count is at index 3
                    countryTotals.set(country, (countryTotals.get(country) || 0) + langCount);
                });

                // Get top 10 countries
                const top10Countries = Array.from(countryTotals.entries())
                    .sort((a, b) => b[1] - a[1])
                    .slice(0, 10)
                    .map(([country]) => country);

                // Filter data for top 10 countries and transform for stacked bar plot
                const stackedData = [];
                sortedResult.forEach(row => {
                    const country = row[0];
                    const status = row[1];
                    const count = row[3];
                    
                    if (top10Countries.includes(country)) {
                        // Replace blank status code with "0" and filter only status codes 0-6
                        const processedStatus = status === '' || status === null || status === undefined ? '0' : status;
                        
                        // Only include status codes 0-6
                        if (processedStatus >= '0' && processedStatus <= '6') {
                            stackedData.push({
                                country: getCountryName(country),
                                status: processedStatus,
                                count: count
                            });
                        }
                    }
                });

                // Prepare data for horizontal stacked bar plot
                // Sort countries by total count (descending) for y-axis
                const countryData = {};
                stackedData.forEach(item => {
                    if (!countryData[item.country]) {
                        countryData[item.country] = {
                            country: item.country,
                            statusCounts: {}
                        };
                    }
                    countryData[item.country].statusCounts[item.status] = item.count;
                });

                // Convert to array and sort by total count
                const sortedCountries = Object.values(countryData).sort((a, b) => {
                    const totalA = Object.values(a.statusCounts).reduce((sum, count) => sum + count, 0);
                    const totalB = Object.values(b.statusCounts).reduce((sum, count) => sum + count, 0);
                    return totalB - totalA;
                });

                // Flatten data for plotting and calculate totals
                const stackedPlotData = [];
                const barCountryTotals = {};
                
                sortedCountries.forEach(countryData => {
                    const total = Object.values(countryData.statusCounts).reduce((sum, count) => sum + count, 0);
                    barCountryTotals[countryData.country] = total;
                    
                    Object.entries(countryData.statusCounts).forEach(([status, count]) => {
                        stackedPlotData.push({
                            country: countryData.country,
                            status: status,
                            count: count
                        });
                    });
                });

                // Define unique colors for each status code (0-6)
                const statusColors = {
                    "0": "#BDBDBD",  
                    "1": "#2E7D32",  
                    "2": "#9CCC65",  
                    "3": "#FBC02D",  
                    "4": "#EF6C00",  
                    "5": "#C62828",  
                    "6": "#6D5C6D"   
                };

                // Create mapping from status code to status label
                const statusLabels = {};
                endData.forEach(row => {
                    if (row.status_code && row.status_code >= 0 && row.status_code <= 6) {
                        statusLabels[row.status_code] = row.status_label || "NA";
                    }
                });
                
                // Ensure status 0 has "NA" label
                statusLabels["0"] = "NA";

                // Get unique status codes and create checkboxes
                const uniqueStatuses = [...new Set(stackedPlotData.map(d => d.status))];
                const statusFilters = {};
                uniqueStatuses.forEach(status => {
                    statusFilters[status] = true;
                });

                // Create checkboxes
                const filterContainer = document.getElementById('status-filters');
                
                // Create "All" checkbox
                const allLabel = document.createElement('label');
                allLabel.style.marginRight = '15px';
                allLabel.style.display = 'inline-block';
                allLabel.style.fontWeight = 'bold';
                
                const allCheckbox = document.createElement('input');
                allCheckbox.type = 'checkbox';
                allCheckbox.checked = true;
                allCheckbox.id = 'all-status';
                
                allCheckbox.addEventListener('change', function() {
                    const isChecked = this.checked;
                    uniqueStatuses.forEach(status => {
                        statusFilters[status] = isChecked;
                        const statusCheckbox = document.querySelector(`input[value="${status}"]`);
                        if (statusCheckbox) {
                            statusCheckbox.checked = isChecked;
                        }
                    });
                    updateStackedBarPlot();
                });
                
                allLabel.appendChild(allCheckbox);
                allLabel.appendChild(document.createTextNode('All'));
                filterContainer.appendChild(allLabel);
                
                // Create individual status checkboxes
                uniqueStatuses.forEach(status => {
                    const label = document.createElement('label');
                    label.style.marginRight = '15px';
                    label.style.display = 'inline-block';
                    
                    const checkbox = document.createElement('input');
                    checkbox.type = 'checkbox';
                    checkbox.checked = true;
                    checkbox.value = status;
                    
                    checkbox.addEventListener('change', function() {
                        statusFilters[status] = this.checked;
                        
                        // Update "All" checkbox state based on individual checkboxes
                        const allChecked = uniqueStatuses.every(s => statusFilters[s]);
                        allCheckbox.checked = allChecked;
                        
                        updateStackedBarPlot();
                    });
                    
                    label.appendChild(checkbox);
                    const statusText = document.createTextNode(statusLabels[status] || `Status ${status}`);
                    label.appendChild(statusText);
                    
                    // Apply color to the label text
                    label.style.color = statusColors[status] || "#000000";
                    
                    filterContainer.appendChild(label);
                });

                // Function to update stacked bar plot based on filters
                function updateStackedBarPlot() {
                    const filteredData = stackedPlotData.filter(d => statusFilters[d.status]);
                    
                    // Recalculate totals
                    const filteredTotals = {};
                    filteredData.forEach(d => {
                        if (!filteredTotals[d.country]) {
                            filteredTotals[d.country] = 0;
                        }
                        filteredTotals[d.country] += d.count;
                    });
                    
                    // Update plot
                    const updatedPlot = Plot.plot({
                        title: htl.html`<span class="fs-3">Status distribution for top 10 countries with most languages</span>`,
                        marks: [
                            Plot.barX(filteredData, {x: "count", y: "country", fill: "status"}),
                            Plot.text(filteredData.map(d => ({
                                country: d.country,
                                count: filteredTotals[d.country] || 0
                            })), {x: "count", y: "country", text: "count", textAnchor: "start", dx: 8, fill: "black", fontWeight: "bold", fontSize: 12})
                        ],
                        x: {label: null, tickFormat: null, ticks: null, tickSize: 0, axis: null},
                        y: {label: null, domain: sortedCountries.map(d => d.country), ticks: null, tickSize: 0},
                        color: {
                            type: "ordinal",
                            domain: Object.keys(statusColors),
                            range: Object.values(statusColors)
                        },
                        width: 700,
                        height: 400,
                        marginLeft: 170,  // Increase left margin for longer country names
                        marginRight: 30,   // Reduce right margin to shift plot right
                        style: {
                            fontSize: "14px"
                        }
                    });
                    
                    // Replace old plot with new one
                    const plotContainer = document.getElementById('stacked-barplot');
                    plotContainer.innerHTML = '';
                    plotContainer.appendChild(updatedPlot);
                }

                // Create initial stacked bar plot
                const stackedBarPlot = Plot.plot({
                    title: htl.html`<span class="fs-3">Status distribution for top 10 countries with most languages</span>`,
                    marks: [
                        Plot.barX(stackedPlotData, {x: "count", y: "country", fill: "status"}),
                        Plot.text(stackedPlotData.map(d => ({
                            country: d.country,
                            count: barCountryTotals[d.country]
                        })), {x: "count", y: "country", text: "count", textAnchor: "start", dx: 8, fill: "black", fontWeight: "bold", fontSize: 12})
                    ],
                    x: {label: null, tickFormat: null, ticks: null, tickSize: 0, axis: null},
                    y: {label: null, domain: sortedCountries.map(d => d.country), ticks: null, tickSize: 0},
                    color: {
                        type: "ordinal",
                        domain: Object.keys(statusColors),
                        range: Object.values(statusColors)
                    },
                    width: 700,
                    height: 400,
                    marginLeft: 170,  // Increase left margin for longer country names
                    marginRight: 30,   // Reduce right margin to shift plot right
                    style: {
                        fontSize: "14px"
                    }
                });
                document.getElementById('stacked-barplot').appendChild(stackedBarPlot);
            })
            .catch(error => console.error('Error loading CSV files:', error));
        });
    </script>