In [45]:
import pandas as pd
import plotly.graph_objects as go
from pathlib import Path
In [35]:
DATA_DIR = Path('data')
In [ ]:
enrollment_data = pd.read_csv('data/combined_enrollment_data.csv')
enrollment_data.rename(columns={'District Name': 'District', 'SchoolYearId': 'Year', 'K-12':'Enrollment'}, inplace=True)
enrollment_data['Enrollment'] = enrollment_data['Enrollment'].str.replace('<', '').astype(int)
In [39]:
# Determine which districts to compare
target_districts = [
    'East Maine SD 63',
    'Winnetka SD 36',
    'Northbrook SD 28',
    'Glencoe SD 35',
    'CCSD 62',
    'Park Ridge CCSD 64',
    'Lincolnwood SD 74',
    'Arlington Heights SD 25',
    'Skokie SD 68',
    'Skokie SD 69',
    'Skokie SD 73-5',
    'Oak Park ESD 97',
    'Northbrook/Glenview SD 30',
    'Glenview CCSD 34',
    'Wilmette SD 39',
    'Wheeling CCSD 21',
    'Palatine CCSD 15',
    'North Shore SD 112',
    'Evanston CCSD 65'
]
In [40]:
# Filter data for target districts  
filtered_enrollment_data = enrollment_data[enrollment_data['District'].isin(target_districts)].copy()
In [41]:
# Use plotly to create a line chart showing enrollment trends for each district
fig = go.Figure()

for district in target_districts:
    # Make 'Evanston CCSD 65' stand out with a thicker line and use black color
    if district == 'Evanston CCSD 65':
        district_data = filtered_enrollment_data[filtered_enrollment_data['District'] == district]
        fig.add_trace(go.Scatter(x=district_data['Year'], y=district_data['Enrollment'], mode='lines+markers', name=district, line=dict(width=4, color='black')))
        continue
    district_data = filtered_enrollment_data[filtered_enrollment_data['District'] == district]
    fig.add_trace(go.Scatter(x=district_data['Year'], y=district_data['Enrollment'], mode='lines+markers', name=district))
fig.update_layout(
    title='Enrollment Totals for Selected Districts',
    xaxis_title='Year',
    yaxis_title='Enrollment',
    legend_title='District',
    height=600
)
fig.show()
In [101]:
fig.write_html('assets/enrollment_totals.html', include_plotlyjs='cdn', div_id='fig_enrollment_totals')
In [42]:
# Plot enrollement trends for each district in percentage change since 2018
fig_pct = go.Figure()

for district in target_districts:
    district_data = filtered_enrollment_data[filtered_enrollment_data['District'] == district]
    # Calculate percentage change since 2018
    pct_change = ((district_data['Enrollment'] - district_data['Enrollment'].iloc[0]) / district_data['Enrollment'].iloc[0]) * 100
    # Make 'Evanston CCSD 65' stand out with a thicker line and use black color
    if district == 'Evanston CCSD 65':
        district_data = filtered_enrollment_data[filtered_enrollment_data['District'] == district]
        fig_pct.add_trace(go.Scatter(x=district_data['Year'], y=pct_change, mode='lines+markers', name=district, line=dict(width=4, color='black')))
        continue
    fig_pct.add_trace(go.Scatter(x=district_data['Year'], y=pct_change, mode='lines+markers', name=district))

fig_pct.update_layout(
    title='Enrollment Percentage Change Since 2018 for Selected Districts',
    xaxis_title='Year',
    yaxis_title='Percentage Change',
    legend_title='District',
    height=600
)
fig_pct.show()
In [102]:
fig_pct.write_html('assets/enrollment_pct_change.html', include_plotlyjs='cdn', div_id='fig_enrollment_pct_change')
In [43]:
# Calcultate the percent change in enrollment since 2018 for each district
enrollment_2018 = filtered_enrollment_data[filtered_enrollment_data['Year'] == 2018][['District', 'Enrollment']].set_index('District')
enrollment_2023 = filtered_enrollment_data[filtered_enrollment_data['Year'] == 2023][['District', 'Enrollment']].set_index('District')
percent_change = ((enrollment_2023 - enrollment_2018) / enrollment_2018 * 100).fillna(0)

# Calculate a Z-score for each district's enrollment percrentage change since 2018
mean_change = percent_change['Enrollment'].mean()
std_change = percent_change['Enrollment'].std()
z_scores = (percent_change['Enrollment'] - mean_change) / std_change if std_change != 0 else 0

# Display the percent change and Z-scores using plotly table
table_data = pandas.DataFrame({
    'District': percent_change.index,
    'Percent Change': percent_change['Enrollment'],
    'Z-Score': z_scores
})

# Round the Percent Change and Z-Score columns to 2 decimal places
table_data['Percent Change'] = table_data['Percent Change'].round(2)
table_data['Z-Score'] = table_data['Z-Score'].round(2)

# Sort the table by Percent Change descending
table_data = table_data.sort_values(by='Percent Change', ascending=False)

fig_table = go.Figure(data=[go.Table(
    header=dict(values=list(table_data.columns),
                fill_color='paleturquoise',
                align='left'),
    cells=dict(values=[table_data.District, table_data['Percent Change'], table_data['Z-Score']],
               fill_color='lavender',
               align='left'))
])
fig_table.update_layout(title='Enrollment Percentage Change and Z-Scores Since 2018', height=600)
fig_table.show()
In [103]:
fig_table.write_html('assets/enrollment_pct_change_table.html', include_plotlyjs='cdn', div_id='fig_enrollment_pct_change_table')
In [67]:
salary_data = pd.read_csv('data/target_combined_salary_data.csv')
In [47]:
# Define mapping from PositionCodeDescription or Position to Role
role_map = {
    'administrator in a bilingual education program': 'Admin',
    'administrator in  a bilingual education program': 'Admin',
    'assistant principal': 'Principal',
    'assistant special education director': 'Admin',
    'assistant/associate district superintendent': 'Admin',
    'bilingual education teacher': 'Teacher',
    'bilingual special education teacher': 'Teacher',
    'career and technical educator (cte)': 'Admin',
    'chief executive officer': 'Admin',
    'chief school business official': 'Admin',
    'citywide administrator': 'Admin',
    'citywide resource teacher': 'Teacher',
    'dean of students admin (admin endorsement held)': 'Admin',
    'dean of students teacher no admin endorsement)': 'Admin',
    'director area voc cent or supervisor or more 1 field in cte': 'Admin',
    'district superintendent': 'Admin',
    'english as a second language teacher': 'Teacher',
    'general administrator or general supervisor': 'Admin',
    'head teacher': 'Teacher',
    'head of gen ed (depart chair admin endorsement held)': 'Admin',
    'head of gen ed (department chair no admin endorsement held)': 'Admin',
    'principal': 'Principal',
    'reading teacher': 'Teacher',
    'resource teacher arts(visual art, music, drama, and theatre)': 'Teacher',
    'resource teacher economics': 'Teacher',
    'resource teacher elementary': 'Teacher',
    'resource teacher english/language arts': 'Teacher',
    'resource teacher foreign language': 'Teacher',
    'resource teacher government/civics/political science': 'Teacher',
    'resource teacher history': 'Teacher',
    'resource teacher math': 'Teacher',
    'resource teacher other': 'Teacher',
    'resource teacher reading': 'Teacher',
    'resource teacher science (all sciences)': 'Teacher',
    'special education director': 'Admin',
    'special education supervisor': 'Admin',
    'special education teacher': 'Teacher',
    'speech language pathology teacher': 'Teacher',
    'supervisor of more than one school support personnel area': 'Admin',
    'supervisor of one field in career and technical education': 'Admin',
    'supervisor of one school support personnel area': 'Admin',
    'supervisory dean': 'Admin',
    'teacher': 'Teacher',
    'visiting international teacher': 'Teacher',
    '1st grade teacher': 'Teacher',
    '2nd grade teacher': 'Teacher',
    '3rd grade teacher': 'Teacher',
    '4th grade teacher': 'Teacher',
    '5th grade teacher': 'Teacher',
    'academic instructional coach': 'Teacher',
    'accelerated ela coordinator': 'Admin',
    'african centered curr 2nd grade t': 'Teacher',
    'african centered curr 3rd grade t': 'Teacher',
    'african centered curr 4th grade t': 'Teacher',
    'african centered curr 5th grade t': 'Teacher',
    'african centered curr k grade tea': 'Teacher',
    'art / media art teacher': 'Teacher',
    'art teacher': 'Teacher',
    'assistant superintendent of acade': 'Admin',
    'assistant superintendent of academics': 'Admin',
    'assistant superintendent of acco': 'Admin',
    'assistant superintendent of accountability': 'Admin',
    'asst director of teaching & learni': 'Admin',
    'asst director of teaching & learning': 'Admin',
    'band teacher': 'Teacher',
    'bilingual arts/reading teacher': 'Teacher',
    'bilingual esl teacher': 'Teacher',
    'bilingual math/science teacher': 'Teacher',
    'bilingual pre-k hs teacher': 'Teacher',
    'bilingual pre-k pfa teacher': 'Teacher',
    'bilingual teacher': 'Teacher',
    'chief financial officer': 'Admin',
    'computer science teacher': 'Teacher',
    'dec president': 'Admin',
    'director of buildings, grounds & t': 'Admin',
    'director of climate & safety': 'Admin',
    'director of early childhood prog': 'Admin',
    'director of early childhood programs': 'Admin',
    'director of finance': 'Admin',
    'director of human relations': 'Admin',
    'director of humanities': 'Admin',
    'director of mtss & sel': 'Admin',
    'director of multilingual services': 'Admin',
    'director of programs & partnersh': 'Admin',
    'director of programs & partnerships': 'Admin',
    'director of schools management': 'Admin',
    'director of steam': 'Admin',
    'director of strategic projects': 'Admin',
    'director of student specialized se': 'Admin',
    'director of student specialized services': 'Admin',
    'drama teacher': 'Teacher',
    'drama/dance teacher': 'Teacher',
    'esl teacher': 'Teacher',
    'executive chief of communications': 'Admin',
    'executive chief of human relations': 'Admin',
    'executive director of raad': 'Admin',
    'executive director of technology': 'Admin',
    'french teacher': 'Teacher',
    'health services director': 'Admin',
    'hearing impaired teacher': 'Teacher',
    'hearing itinerant': 'Teacher',
    'iep interventionist': 'Teacher',
    'ies coordinator': 'Admin',
    'ies educator': 'Teacher',
    'ies educator - adaptive pe': 'Teacher',
    'ies educator bilingual': 'Teacher',
    'interventionist': 'Teacher',
    'interventionist bilingual': 'Teacher',
    'kindergarten teacher': 'Teacher',
    'language arts teacher': 'Teacher',
    'language arts/literature teacher': 'Teacher',
    'language arts/ss teacher': 'Teacher',
    'leave of absence': 'LOA',
    'library media specialist': 'Teacher',
    'manager of stud specialized servic': 'Admin',
    'math and science teacher': 'Teacher',
    'math teacher': 'Teacher',
    'media arts teacher': 'Teacher',
    'multilingual coordinator': 'Admin',
    'music teacher': 'Teacher',
    'occupational therapist': 'Teacher',
    'orchestra teacher': 'Teacher',
    'physical education teacher': 'Teacher',
    'physical therapist': 'Teacher',
    'pre-k hs teacher': 'Teacher',
    'pre-k pfa teacher': 'Teacher',
    'psychologist': 'Teacher',
    'school counselor': 'Teacher',
    'school nurse': 'Teacher',
    'science teacher': 'Teacher',
    'science/social studies teacher': 'Teacher',
    'social studies teacher': 'Teacher',
    'social worker': 'Teacher',
    'spanish & french teacher': 'Teacher',
    'spanish teacher': 'Teacher',
    'speech language pathologist': 'Teacher',
    'superintendent': 'Admin',
    'teacher visually impaired': 'Teacher',
    'twi 1st grade teacher': 'Teacher',
    'twi 2nd grade teacher': 'Teacher',
    'twi 3rd grade teacher': 'Teacher',
    'twi 4th grade teacher': 'Teacher',
    'twi 5th grade teacher': 'Teacher',
    'twi kindergarten teacher': 'Teacher',
    'vocational teacher': 'Teacher'
}
In [68]:
salary_data['Role'] = salary_data['PositionCodeDescription'].str.lower().map(role_map)
In [69]:
# Aggregate salary data by district and role
salary_summary = salary_data.groupby(['SchoolYearId', 'District', 'Role']).agg(
    comp_total=('TotalComp', 'sum'),
    headcount=('TotalComp', 'count'),
    comp_avg=('TotalComp', 'mean'),
    salary_total=('BaseSalary', 'sum'),
    salary_avg=('BaseSalary', 'mean')
).reset_index()

salary_summary['comp_total'] = salary_summary['comp_total'].round(2)
salary_summary['comp_avg'] = salary_summary['comp_avg'].round(2)
salary_summary['salary_total'] = salary_summary['salary_total'].round(2)
salary_summary['salary_avg'] = salary_summary['salary_avg'].round(2)
In [70]:
# Add enrollment data to salary summary
salary_summary = salary_summary.merge(
    enrollment_data[['District', 'Year', 'Enrollment']],
    left_on=['District', 'SchoolYearId'],
    right_on=['District', 'Year'],
    how='left'
)
In [79]:
# Add total salary per 1000 students and headcount per 1000 students
salary_summary['salary_per_1000_students'] = (salary_summary['salary_total'] / salary_summary['Enrollment'] * 1000).round(2)
salary_summary['headcount_per_1000_students'] = (salary_summary['headcount'] / salary_summary['Enrollment'] * 1000).round(2)
salary_summary['total_comp_per_1000_students'] = (salary_summary['comp_total'] / salary_summary['Enrollment'] * 1000).round(2)
In [81]:
admin_data = salary_summary[salary_summary['Role'] == 'Admin']
principal_data = salary_summary[salary_summary['Role'] == 'Principal']
In [73]:
# Plot admin headcount for each district over time
fig_admin = go.Figure()
for district in target_districts:
    district_data = admin_data[admin_data['District'] == district]
    # Make 'Evanston CCSD 65' stand out with a thicker line and use black color
    if district == 'Evanston CCSD 65':
        fig_admin.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['headcount'], mode='lines+markers', name=district, line=dict(width=4, color='black')))
        continue
    fig_admin.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['headcount'], mode='lines+markers', name=district))
fig_admin.update_layout(
    title='Admin Headcount for Selected Districts',
    xaxis_title='Year',
    yaxis_title='Headcount',
    legend_title='District',
    height=600
)
fig_admin.show()
In [87]:
fig_admin.write_html('assets/fig_admin.html', include_plotlyjs='cdn', div_id='fig_admin')
In [74]:
# Plot admin headcount per 1000 students over time for each district
fig_admin_per_1000 = go.Figure()
for district in target_districts:
    district_data = admin_data[admin_data['District'] == district]
    # Make 'Evanston CCSD 65' stand out with a thicker line and use black color
    if district == 'Evanston CCSD 65':
        fig_admin_per_1000.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['headcount_per_1000_students'], mode='lines+markers', name=district, line=dict(width=4, color='black')))
        continue
    fig_admin_per_1000.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['headcount_per_1000_students'], mode='lines+markers', name=district))
fig_admin_per_1000.update_layout(
    title='Admin Headcount per 1000 Students for Selected Districts',
    xaxis_title='Year',
    yaxis_title='Headcount per 1000 Students',
    legend_title='District',
    height=600
)
fig_admin_per_1000.show()
In [88]:
fig_admin_per_1000.write_html('assets/fig_admin_per_1000.html', include_plotlyjs='cdn', div_id='fig_admin_per_1000')
In [86]:
# Plot admin salary over time for each district
fig_admin_salary = go.Figure()
for district in target_districts:
    district_data = admin_data[admin_data['District'] == district]
    # Make 'Evanston CCSD 65' stand out with a thicker line and use black color
    if district == 'Evanston CCSD 65':
        fig_admin_salary.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['salary_total'], mode='lines+markers', name=district, line=dict(width=4, color='black')))
        continue
    fig_admin_salary.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['salary_total'], mode='lines+markers', name=district))
fig_admin_salary.update_layout(
    title='Admin Salary for Selected Districts',
    xaxis_title='Year',
    yaxis_title='Salary',
    legend_title='District',
    height=600
)
fig_admin_salary.show()
In [89]:
fig_admin_salary.write_html('assets/fig_admin_salary.html', include_plotlyjs='cdn', div_id='fig_admin_salary')
In [90]:
# Plot admin salary per 1000 students over time for each district
fig_admin_salary_per_1000 = go.Figure()
for district in target_districts:
    district_data = admin_data[admin_data['District'] == district]
    # Make 'Evanston CCSD 65' stand out with a thicker line and use black color
    if district == 'Evanston CCSD 65':
        fig_admin_salary_per_1000.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['salary_per_1000_students'], mode='lines+markers', name=district, line=dict(width=4, color='black')))
        continue
    fig_admin_salary_per_1000.add_trace(go.Scatter(x=district_data['SchoolYearId'], y=district_data['salary_per_1000_students'], mode='lines+markers', name=district))
fig_admin_salary_per_1000.update_layout(
    title='Admin Salary per 1000 Students for Selected Districts',
    xaxis_title='Year',
    yaxis_title='Salary per 1000 Students',
    legend_title='District',
    height=600
)
fig_admin_salary_per_1000.show()
In [91]:
fig_admin_salary_per_1000.write_html('assets/fig_admin_salary_per_1000.html', include_plotlyjs='cdn', div_id='fig_admin_salary_per_1000')
In [98]:
# Calculate extra Admin cost based on average headcount per 1k students across all districts multiplied by the admin salary per district
admin_data_2024 = admin_data[admin_data['SchoolYearId'] == 2024].copy()
admin_avg_headcount_per_1000 = admin_data_2024['headcount_per_1000_students'].mean()

# Extra admins per 1000 students times their avg salary times enrollment scale
admin_data_2024['extra_admin_cost'] = (
    (admin_data_2024['headcount_per_1000_students'] - admin_avg_headcount_per_1000) * 
    admin_data_2024['comp_avg'] * 
    (admin_data_2024['Enrollment'] / 1000)
)
admin_data_2024['extra_admin_cost'] = admin_data_2024['extra_admin_cost'].round(2)
admin_data_2024 = admin_data_2024.sort_values('extra_admin_cost', ascending=False)

# Plot Extra Admin Cost by District
# Make Evanston label bold
y_labels = ['<b>' + district + '</b>' if district == 'Evanston CCSD 65' or district == 'Evanston CCSD 65 - 2025' else district 
            for district in admin_data_2024['District']]

fig_extra_admin = go.Figure()

fig_extra_admin.add_trace(go.Bar(
    x=admin_data_2024['extra_admin_cost'],
    y=y_labels,
    orientation='h',
    text=admin_data_2024['extra_admin_cost'].apply(lambda x: f'${x:,.0f}'),
    textposition='auto'
))

fig_extra_admin.update_layout(
    title='Estimated Extra Admin Cost by District',
    xaxis_title='Extra Admin Cost ($)',
    yaxis_title='District',
    height=800,
    annotations=[
        dict(
            text="This is calculated by setting each district's admin headcount per 1,000 students to the average across all districts, then calculating the cost difference based on each district's average admin salary.",
            xref="paper", yref="paper",
            x=0.5, y=-0.1,
            showarrow=False,
            xanchor='center',
            font=dict(size=10, color="gray")
        )
    ]
)

fig_extra_admin.show()
In [99]:
fig_extra_admin.write_html('assets/fig_extra_admin.html', include_plotlyjs='cdn', div_id='fig_extra_admin')