Skip to content

Commit 1a26b8d

Browse files
committed
ruff format
1 parent 9cf23a8 commit 1a26b8d

1 file changed

Lines changed: 88 additions & 43 deletions

File tree

src/notebooks/technical_report.ipynb

Lines changed: 88 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,18 @@
3131
"import matplotlib.pyplot as plt\n",
3232
"import matplotlib.ticker as ticker\n",
3333
"from tqdm import tqdm\n",
34-
"from plotnine import ggplot, aes, geom_line, geom_point, theme_minimal, theme, labs, element_line, element_text, facet_wrap\n",
34+
"from plotnine import (\n",
35+
" ggplot,\n",
36+
" aes,\n",
37+
" geom_line,\n",
38+
" geom_point,\n",
39+
" theme_minimal,\n",
40+
" theme,\n",
41+
" labs,\n",
42+
" element_line,\n",
43+
" element_text,\n",
44+
" facet_wrap,\n",
45+
")\n",
3546
"\n",
3647
"import sqlite3\n",
3748
"import json\n",
@@ -185,7 +196,7 @@
185196
" # : : matches the colon\n",
186197
" # [^>]+ : matches the value and anything else until the closing bracket\n",
187198
" # > : matches the closing bracket\n",
188-
" return re.sub(r'<([^:]+):[^>]+>', r'\\1', s)"
199+
" return re.sub(r\"<([^:]+):[^>]+>\", r\"\\1\", s)"
189200
]
190201
},
191202
{
@@ -1667,11 +1678,11 @@
16671678
" course_df = mtbb_df[mtbb_df[\"Course\"] == course_name]\n",
16681679
"\n",
16691680
" # Plot median\n",
1670-
" line, = plt.plot(\n",
1681+
" (line,) = plt.plot(\n",
16711682
" course_df[\"Term\"],\n",
16721683
" course_df[\"Median time between backups (sec)\"],\n",
1673-
" marker='o',\n",
1674-
" label=f\"{course_name}\"\n",
1684+
" marker=\"o\",\n",
1685+
" label=f\"{course_name}\",\n",
16751686
" )\n",
16761687
"\n",
16771688
" # Plot IQR\n",
@@ -1686,8 +1697,8 @@
16861697
"plt.title(\"Median Duration Between Backups Over Time\")\n",
16871698
"plt.xlabel(\"Term\")\n",
16881699
"plt.ylabel(\"Seconds\")\n",
1689-
"plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
1690-
"plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
1700+
"plt.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
1701+
"plt.grid(axis=\"y\", linestyle=\"--\", alpha=0.7)\n",
16911702
"plt.show()"
16921703
]
16931704
},
@@ -1978,11 +1989,11 @@
19781989
" course_df = mnnla_df[mnnla_df[\"Course\"] == course_name]\n",
19791990
"\n",
19801991
" # Plot median\n",
1981-
" line, = plt.plot(\n",
1992+
" (line,) = plt.plot(\n",
19821993
" course_df[\"Term\"],\n",
19831994
" course_df[\"Median net number of lines added\"],\n",
1984-
" marker='o',\n",
1985-
" label=f\"{course_name}\"\n",
1995+
" marker=\"o\",\n",
1996+
" label=f\"{course_name}\",\n",
19861997
" )\n",
19871998
"\n",
19881999
" # Plot IQR\n",
@@ -1997,8 +2008,8 @@
19972008
"plt.title(\"Median Net Number of Lines Added\")\n",
19982009
"plt.xlabel(\"Term\")\n",
19992010
"plt.ylabel(\"Lines\")\n",
2000-
"plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n",
2001-
"plt.grid(axis='y', linestyle='--', alpha=0.7)\n",
2011+
"plt.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
2012+
"plt.grid(axis=\"y\", linestyle=\"--\", alpha=0.7)\n",
20022013
"plt.show()"
20032014
]
20042015
},
@@ -2400,7 +2411,11 @@
24002411
],
24012412
"source": [
24022413
"ax = sns.lineplot(\n",
2403-
" data=tleab, x=\"Term\", y=\"Total lint errors (all backups and students)\", hue=\"Course\", marker=\"o\"\n",
2414+
" data=tleab,\n",
2415+
" x=\"Term\",\n",
2416+
" y=\"Total lint errors (all backups and students)\",\n",
2417+
" hue=\"Course\",\n",
2418+
" marker=\"o\",\n",
24042419
")\n",
24052420
"plt.title(\"Total number of lint errors across (all backups)\")\n",
24062421
"plt.ylabel(\"Lint errors\")\n",
@@ -2409,7 +2424,7 @@
24092424
"# scientific notation on y-axis since numbers are large\n",
24102425
"formatter = ticker.ScalarFormatter(useMathText=True)\n",
24112426
"formatter.set_scientific(True)\n",
2412-
"formatter.set_powerlimits((-1, 1)) # Forces sci notation for anything >10 or <0.1\n",
2427+
"formatter.set_powerlimits((-1, 1)) # Forces sci notation for anything >10 or <0.1\n",
24132428
"ax.yaxis.set_major_formatter(formatter)\n",
24142429
"\n",
24152430
"plt.show()"
@@ -2615,21 +2630,29 @@
26152630
" else:\n",
26162631
" year = 2025\n",
26172632
"\n",
2618-
" df = results[\"lint_error_freqs_all_backups\"][Course(is_cs61a, Term.FALL, year)].copy()\n",
2633+
" df = results[\"lint_error_freqs_all_backups\"][\n",
2634+
" Course(is_cs61a, Term.FALL, year)\n",
2635+
" ].copy()\n",
26192636
"\n",
2620-
" df['Course'] = 'CS 61A' if is_cs61a else 'DATA C88C'\n",
2637+
" df[\"Course\"] = \"CS 61A\" if is_cs61a else \"DATA C88C\"\n",
26212638
"\n",
26222639
" # Define the standard naming pattern\n",
2623-
" error_str = df['code'] + \": \" + df['url'].str.removeprefix(\"https://docs.astral.sh/ruff/rules/\")\n",
2640+
" error_str = (\n",
2641+
" df[\"code\"]\n",
2642+
" + \": \"\n",
2643+
" + df[\"url\"].str.removeprefix(\"https://docs.astral.sh/ruff/rules/\")\n",
2644+
" )\n",
26242645
"\n",
26252646
" # If code is 'invalid-syntax', use 'invalid-syntax', otherwise use the standard pattern\n",
26262647
" # because otherwise url is NaN\n",
2627-
" df['Error'] = np.where(df['code'] == 'invalid-syntax', 'invalid-syntax', error_str)\n",
2648+
" df[\"Error\"] = np.where(\n",
2649+
" df[\"code\"] == \"invalid-syntax\", \"invalid-syntax\", error_str\n",
2650+
" )\n",
26282651
"\n",
26292652
" if is_fa23:\n",
2630-
" df['Semester'] = 'FA23'\n",
2653+
" df[\"Semester\"] = \"FA23\"\n",
26312654
" else:\n",
2632-
" df['Semester'] = 'FA25'\n",
2655+
" df[\"Semester\"] = \"FA25\"\n",
26332656
" df = df.rename(columns={\"frequency\": \"Frequency\"})\n",
26342657
" df = df.drop(columns=[\"code\", \"url\"])\n",
26352658
" return df\n",
@@ -2650,7 +2673,12 @@
26502673
"metadata": {},
26512674
"outputs": [],
26522675
"source": [
2653-
"cs61a_fa23_lint_all, cs61a_fa25_lint_all, datac88c_fa23_lint_all, datac88c_fa25_lint_all = get_slope_chart_lint_errors_dfs(results)"
2676+
"(\n",
2677+
" cs61a_fa23_lint_all,\n",
2678+
" cs61a_fa25_lint_all,\n",
2679+
" datac88c_fa23_lint_all,\n",
2680+
" datac88c_fa25_lint_all,\n",
2681+
") = get_slope_chart_lint_errors_dfs(results)"
26542682
]
26552683
},
26562684
{
@@ -2922,40 +2950,40 @@
29222950
"metadata": {},
29232951
"outputs": [],
29242952
"source": [
2925-
"def lint_errors_slope_chart(title: str, lint_error_df: pd.DataFrame, figure_size: tuple[int, int] = (6, 4)) -> ggplot:\n",
2926-
" lint_error_df['Semester'] = pd.Categorical(lint_error_df['Semester'], categories=['FA23', 'FA25'])\n",
2953+
"def lint_errors_slope_chart(\n",
2954+
" title: str, lint_error_df: pd.DataFrame, figure_size: tuple[int, int] = (6, 4)\n",
2955+
") -> ggplot:\n",
2956+
" lint_error_df[\"Semester\"] = pd.Categorical(\n",
2957+
" lint_error_df[\"Semester\"], categories=[\"FA23\", \"FA25\"]\n",
2958+
" )\n",
29272959
"\n",
29282960
" # Build the Slope Chart\n",
29292961
" slope_plot = (\n",
2930-
" ggplot(lint_error_df, aes(x='Semester', y='Frequency', group='Error', color='Error'))\n",
2931-
"\n",
2962+
" ggplot(\n",
2963+
" lint_error_df,\n",
2964+
" aes(x=\"Semester\", y=\"Frequency\", group=\"Error\", color=\"Error\"),\n",
2965+
" )\n",
29322966
" + geom_line(size=1.2)\n",
29332967
" + geom_point(size=3)\n",
2934-
"\n",
2935-
" + facet_wrap('~Course') # This creates 1 row, 2 columns automatically\n",
2936-
"\n",
2968+
" + facet_wrap(\"~Course\") # This creates 1 row, 2 columns automatically\n",
29372969
" + theme_minimal()\n",
29382970
" + theme(\n",
29392971
" figure_size=figure_size,\n",
2940-
"\n",
29412972
" # Center the title over the whole figure area\n",
29422973
" plot_title_position=\"plot\",\n",
2943-
"\n",
29442974
" # --- GRID LINE CUSTOMIZATION ---\n",
2945-
" panel_grid_major_y=element_line(color=\"grey\", linetype=\"dashed\", size=0.5, alpha=0.3),\n",
2946-
" panel_grid_minor_y=element_line(color=\"grey\", linetype=\"dashed\", size=0.2, alpha=0.3),\n",
2947-
"\n",
2975+
" panel_grid_major_y=element_line(\n",
2976+
" color=\"grey\", linetype=\"dashed\", size=0.5, alpha=0.3\n",
2977+
" ),\n",
2978+
" panel_grid_minor_y=element_line(\n",
2979+
" color=\"grey\", linetype=\"dashed\", size=0.2, alpha=0.3\n",
2980+
" ),\n",
29482981
" # Vertical lines at the FA23/FA25 positions\n",
29492982
" panel_grid_major_x=element_line(color=\"lightgrey\", size=1),\n",
2950-
"\n",
29512983
" axis_text_y=element_text(),\n",
29522984
" plot_margin=0.05,\n",
29532985
" )\n",
2954-
" + labs(\n",
2955-
" title=f\"{title} (All Backups)\",\n",
2956-
" x=\"\",\n",
2957-
" y=\"Frequency\"\n",
2958-
" )\n",
2986+
" + labs(title=f\"{title} (All Backups)\", x=\"\", y=\"Frequency\")\n",
29592987
" )\n",
29602988
"\n",
29612989
" return slope_plot"
@@ -2985,7 +3013,14 @@
29853013
}
29863014
],
29873015
"source": [
2988-
"lint_all_top3 = pd.concat([cs61a_fa23_lint_all[:3], cs61a_fa25_lint_all[:3], datac88c_fa23_lint_all[:3], datac88c_fa25_lint_all[:3]])\n",
3016+
"lint_all_top3 = pd.concat(\n",
3017+
" [\n",
3018+
" cs61a_fa23_lint_all[:3],\n",
3019+
" cs61a_fa25_lint_all[:3],\n",
3020+
" datac88c_fa23_lint_all[:3],\n",
3021+
" datac88c_fa25_lint_all[:3],\n",
3022+
" ]\n",
3023+
")\n",
29893024
"lint_errors_slope_chart(\"Top 3 Lint Errors\", lint_all_top3)"
29903025
]
29913026
},
@@ -3013,7 +3048,14 @@
30133048
}
30143049
],
30153050
"source": [
3016-
"lint_all_next3 = pd.concat([cs61a_fa23_lint_all[3:6], cs61a_fa25_lint_all[3:6], datac88c_fa23_lint_all[3:6], datac88c_fa25_lint_all[3:6]])\n",
3051+
"lint_all_next3 = pd.concat(\n",
3052+
" [\n",
3053+
" cs61a_fa23_lint_all[3:6],\n",
3054+
" cs61a_fa25_lint_all[3:6],\n",
3055+
" datac88c_fa23_lint_all[3:6],\n",
3056+
" datac88c_fa25_lint_all[3:6],\n",
3057+
" ]\n",
3058+
")\n",
30173059
"lint_errors_slope_chart(\"Top 4-6 Lint Errors\", lint_all_next3)"
30183060
]
30193061
},
@@ -3715,7 +3757,10 @@
37153757
" fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n",
37163758
"\n",
37173759
" for i, course_name in enumerate([\"CS 61A\", \"DATA C88C\"]):\n",
3718-
" course_df = df[(df[\"Course Name\"] == course_name) & (df[\"Problem\"].isin(problem_colors.keys()))]\n",
3760+
" course_df = df[\n",
3761+
" (df[\"Course Name\"] == course_name)\n",
3762+
" & (df[\"Problem\"].isin(problem_colors.keys()))\n",
3763+
" ]\n",
37193764
"\n",
37203765
" # Plot median\n",
37213766
" sns.lineplot(\n",
@@ -3744,7 +3789,7 @@
37443789
" else:\n",
37453790
" axes[i].set_title(f\"(b) {course_name}\")\n",
37463791
"\n",
3747-
" axes[i].set_ylim((0, 45)) # force same y limits for easier comparison\n",
3792+
" axes[i].set_ylim((0, 45)) # force same y limits for easier comparison\n",
37483793
" axes[i].set_ylabel(\"Backups\")\n",
37493794
" axes[i].grid(axis=\"y\", linestyle=\"--\", alpha=0.7)\n",
37503795
"\n",

0 commit comments

Comments
 (0)