|
31 | 31 | "import matplotlib.pyplot as plt\n", |
32 | 32 | "import matplotlib.ticker as ticker\n", |
33 | 33 | "from tqdm import tqdm\n", |
34 | | - "from plotnine import ggplot, aes, geom_line, geom_point, theme_minimal, theme, labs, element_line, element_text, facet_wrap\n", |
| 34 | + "from plotnine import (\n", |
| 35 | + " ggplot,\n", |
| 36 | + " aes,\n", |
| 37 | + " geom_line,\n", |
| 38 | + " geom_point,\n", |
| 39 | + " theme_minimal,\n", |
| 40 | + " theme,\n", |
| 41 | + " labs,\n", |
| 42 | + " element_line,\n", |
| 43 | + " element_text,\n", |
| 44 | + " facet_wrap,\n", |
| 45 | + ")\n", |
35 | 46 | "\n", |
36 | 47 | "import sqlite3\n", |
37 | 48 | "import json\n", |
|
185 | 196 | " # : : matches the colon\n", |
186 | 197 | " # [^>]+ : matches the value and anything else until the closing bracket\n", |
187 | 198 | " # > : matches the closing bracket\n", |
188 | | - " return re.sub(r'<([^:]+):[^>]+>', r'\\1', s)" |
| 199 | + " return re.sub(r\"<([^:]+):[^>]+>\", r\"\\1\", s)" |
189 | 200 | ] |
190 | 201 | }, |
191 | 202 | { |
|
1667 | 1678 | " course_df = mtbb_df[mtbb_df[\"Course\"] == course_name]\n", |
1668 | 1679 | "\n", |
1669 | 1680 | " # Plot median\n", |
1670 | | - " line, = plt.plot(\n", |
| 1681 | + " (line,) = plt.plot(\n", |
1671 | 1682 | " course_df[\"Term\"],\n", |
1672 | 1683 | " course_df[\"Median time between backups (sec)\"],\n", |
1673 | | - " marker='o',\n", |
1674 | | - " label=f\"{course_name}\"\n", |
| 1684 | + " marker=\"o\",\n", |
| 1685 | + " label=f\"{course_name}\",\n", |
1675 | 1686 | " )\n", |
1676 | 1687 | "\n", |
1677 | 1688 | " # Plot IQR\n", |
|
1686 | 1697 | "plt.title(\"Median Duration Between Backups Over Time\")\n", |
1687 | 1698 | "plt.xlabel(\"Term\")\n", |
1688 | 1699 | "plt.ylabel(\"Seconds\")\n", |
1689 | | - "plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n", |
1690 | | - "plt.grid(axis='y', linestyle='--', alpha=0.7)\n", |
| 1700 | + "plt.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n", |
| 1701 | + "plt.grid(axis=\"y\", linestyle=\"--\", alpha=0.7)\n", |
1691 | 1702 | "plt.show()" |
1692 | 1703 | ] |
1693 | 1704 | }, |
|
1978 | 1989 | " course_df = mnnla_df[mnnla_df[\"Course\"] == course_name]\n", |
1979 | 1990 | "\n", |
1980 | 1991 | " # Plot median\n", |
1981 | | - " line, = plt.plot(\n", |
| 1992 | + " (line,) = plt.plot(\n", |
1982 | 1993 | " course_df[\"Term\"],\n", |
1983 | 1994 | " course_df[\"Median net number of lines added\"],\n", |
1984 | | - " marker='o',\n", |
1985 | | - " label=f\"{course_name}\"\n", |
| 1995 | + " marker=\"o\",\n", |
| 1996 | + " label=f\"{course_name}\",\n", |
1986 | 1997 | " )\n", |
1987 | 1998 | "\n", |
1988 | 1999 | " # Plot IQR\n", |
|
1997 | 2008 | "plt.title(\"Median Net Number of Lines Added\")\n", |
1998 | 2009 | "plt.xlabel(\"Term\")\n", |
1999 | 2010 | "plt.ylabel(\"Lines\")\n", |
2000 | | - "plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\n", |
2001 | | - "plt.grid(axis='y', linestyle='--', alpha=0.7)\n", |
| 2011 | + "plt.legend(bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n", |
| 2012 | + "plt.grid(axis=\"y\", linestyle=\"--\", alpha=0.7)\n", |
2002 | 2013 | "plt.show()" |
2003 | 2014 | ] |
2004 | 2015 | }, |
|
2400 | 2411 | ], |
2401 | 2412 | "source": [ |
2402 | 2413 | "ax = sns.lineplot(\n", |
2403 | | - " data=tleab, x=\"Term\", y=\"Total lint errors (all backups and students)\", hue=\"Course\", marker=\"o\"\n", |
| 2414 | + " data=tleab,\n", |
| 2415 | + " x=\"Term\",\n", |
| 2416 | + " y=\"Total lint errors (all backups and students)\",\n", |
| 2417 | + " hue=\"Course\",\n", |
| 2418 | + " marker=\"o\",\n", |
2404 | 2419 | ")\n", |
2405 | 2420 | "plt.title(\"Total number of lint errors across (all backups)\")\n", |
2406 | 2421 | "plt.ylabel(\"Lint errors\")\n", |
|
2409 | 2424 | "# scientific notation on y-axis since numbers are large\n", |
2410 | 2425 | "formatter = ticker.ScalarFormatter(useMathText=True)\n", |
2411 | 2426 | "formatter.set_scientific(True)\n", |
2412 | | - "formatter.set_powerlimits((-1, 1)) # Forces sci notation for anything >10 or <0.1\n", |
| 2427 | + "formatter.set_powerlimits((-1, 1)) # Forces sci notation for anything >10 or <0.1\n", |
2413 | 2428 | "ax.yaxis.set_major_formatter(formatter)\n", |
2414 | 2429 | "\n", |
2415 | 2430 | "plt.show()" |
|
2615 | 2630 | " else:\n", |
2616 | 2631 | " year = 2025\n", |
2617 | 2632 | "\n", |
2618 | | - " df = results[\"lint_error_freqs_all_backups\"][Course(is_cs61a, Term.FALL, year)].copy()\n", |
| 2633 | + " df = results[\"lint_error_freqs_all_backups\"][\n", |
| 2634 | + " Course(is_cs61a, Term.FALL, year)\n", |
| 2635 | + " ].copy()\n", |
2619 | 2636 | "\n", |
2620 | | - " df['Course'] = 'CS 61A' if is_cs61a else 'DATA C88C'\n", |
| 2637 | + " df[\"Course\"] = \"CS 61A\" if is_cs61a else \"DATA C88C\"\n", |
2621 | 2638 | "\n", |
2622 | 2639 | " # Define the standard naming pattern\n", |
2623 | | - " error_str = df['code'] + \": \" + df['url'].str.removeprefix(\"https://docs.astral.sh/ruff/rules/\")\n", |
| 2640 | + " error_str = (\n", |
| 2641 | + " df[\"code\"]\n", |
| 2642 | + " + \": \"\n", |
| 2643 | + " + df[\"url\"].str.removeprefix(\"https://docs.astral.sh/ruff/rules/\")\n", |
| 2644 | + " )\n", |
2624 | 2645 | "\n", |
2625 | 2646 | " # If code is 'invalid-syntax', use 'invalid-syntax', otherwise use the standard pattern\n", |
2626 | 2647 | " # because otherwise url is NaN\n", |
2627 | | - " df['Error'] = np.where(df['code'] == 'invalid-syntax', 'invalid-syntax', error_str)\n", |
| 2648 | + " df[\"Error\"] = np.where(\n", |
| 2649 | + " df[\"code\"] == \"invalid-syntax\", \"invalid-syntax\", error_str\n", |
| 2650 | + " )\n", |
2628 | 2651 | "\n", |
2629 | 2652 | " if is_fa23:\n", |
2630 | | - " df['Semester'] = 'FA23'\n", |
| 2653 | + " df[\"Semester\"] = \"FA23\"\n", |
2631 | 2654 | " else:\n", |
2632 | | - " df['Semester'] = 'FA25'\n", |
| 2655 | + " df[\"Semester\"] = \"FA25\"\n", |
2633 | 2656 | " df = df.rename(columns={\"frequency\": \"Frequency\"})\n", |
2634 | 2657 | " df = df.drop(columns=[\"code\", \"url\"])\n", |
2635 | 2658 | " return df\n", |
|
2650 | 2673 | "metadata": {}, |
2651 | 2674 | "outputs": [], |
2652 | 2675 | "source": [ |
2653 | | - "cs61a_fa23_lint_all, cs61a_fa25_lint_all, datac88c_fa23_lint_all, datac88c_fa25_lint_all = get_slope_chart_lint_errors_dfs(results)" |
| 2676 | + "(\n", |
| 2677 | + " cs61a_fa23_lint_all,\n", |
| 2678 | + " cs61a_fa25_lint_all,\n", |
| 2679 | + " datac88c_fa23_lint_all,\n", |
| 2680 | + " datac88c_fa25_lint_all,\n", |
| 2681 | + ") = get_slope_chart_lint_errors_dfs(results)" |
2654 | 2682 | ] |
2655 | 2683 | }, |
2656 | 2684 | { |
|
2922 | 2950 | "metadata": {}, |
2923 | 2951 | "outputs": [], |
2924 | 2952 | "source": [ |
2925 | | - "def lint_errors_slope_chart(title: str, lint_error_df: pd.DataFrame, figure_size: tuple[int, int] = (6, 4)) -> ggplot:\n", |
2926 | | - " lint_error_df['Semester'] = pd.Categorical(lint_error_df['Semester'], categories=['FA23', 'FA25'])\n", |
| 2953 | + "def lint_errors_slope_chart(\n", |
| 2954 | + " title: str, lint_error_df: pd.DataFrame, figure_size: tuple[int, int] = (6, 4)\n", |
| 2955 | + ") -> ggplot:\n", |
| 2956 | + " lint_error_df[\"Semester\"] = pd.Categorical(\n", |
| 2957 | + " lint_error_df[\"Semester\"], categories=[\"FA23\", \"FA25\"]\n", |
| 2958 | + " )\n", |
2927 | 2959 | "\n", |
2928 | 2960 | " # Build the Slope Chart\n", |
2929 | 2961 | " slope_plot = (\n", |
2930 | | - " ggplot(lint_error_df, aes(x='Semester', y='Frequency', group='Error', color='Error'))\n", |
2931 | | - "\n", |
| 2962 | + " ggplot(\n", |
| 2963 | + " lint_error_df,\n", |
| 2964 | + " aes(x=\"Semester\", y=\"Frequency\", group=\"Error\", color=\"Error\"),\n", |
| 2965 | + " )\n", |
2932 | 2966 | " + geom_line(size=1.2)\n", |
2933 | 2967 | " + geom_point(size=3)\n", |
2934 | | - "\n", |
2935 | | - " + facet_wrap('~Course') # This creates 1 row, 2 columns automatically\n", |
2936 | | - "\n", |
| 2968 | + " + facet_wrap(\"~Course\") # This creates 1 row, 2 columns automatically\n", |
2937 | 2969 | " + theme_minimal()\n", |
2938 | 2970 | " + theme(\n", |
2939 | 2971 | " figure_size=figure_size,\n", |
2940 | | - "\n", |
2941 | 2972 | " # Center the title over the whole figure area\n", |
2942 | 2973 | " plot_title_position=\"plot\",\n", |
2943 | | - "\n", |
2944 | 2974 | " # --- GRID LINE CUSTOMIZATION ---\n", |
2945 | | - " panel_grid_major_y=element_line(color=\"grey\", linetype=\"dashed\", size=0.5, alpha=0.3),\n", |
2946 | | - " panel_grid_minor_y=element_line(color=\"grey\", linetype=\"dashed\", size=0.2, alpha=0.3),\n", |
2947 | | - "\n", |
| 2975 | + " panel_grid_major_y=element_line(\n", |
| 2976 | + " color=\"grey\", linetype=\"dashed\", size=0.5, alpha=0.3\n", |
| 2977 | + " ),\n", |
| 2978 | + " panel_grid_minor_y=element_line(\n", |
| 2979 | + " color=\"grey\", linetype=\"dashed\", size=0.2, alpha=0.3\n", |
| 2980 | + " ),\n", |
2948 | 2981 | " # Vertical lines at the FA23/FA25 positions\n", |
2949 | 2982 | " panel_grid_major_x=element_line(color=\"lightgrey\", size=1),\n", |
2950 | | - "\n", |
2951 | 2983 | " axis_text_y=element_text(),\n", |
2952 | 2984 | " plot_margin=0.05,\n", |
2953 | 2985 | " )\n", |
2954 | | - " + labs(\n", |
2955 | | - " title=f\"{title} (All Backups)\",\n", |
2956 | | - " x=\"\",\n", |
2957 | | - " y=\"Frequency\"\n", |
2958 | | - " )\n", |
| 2986 | + " + labs(title=f\"{title} (All Backups)\", x=\"\", y=\"Frequency\")\n", |
2959 | 2987 | " )\n", |
2960 | 2988 | "\n", |
2961 | 2989 | " return slope_plot" |
|
2985 | 3013 | } |
2986 | 3014 | ], |
2987 | 3015 | "source": [ |
2988 | | - "lint_all_top3 = pd.concat([cs61a_fa23_lint_all[:3], cs61a_fa25_lint_all[:3], datac88c_fa23_lint_all[:3], datac88c_fa25_lint_all[:3]])\n", |
| 3016 | + "lint_all_top3 = pd.concat(\n", |
| 3017 | + " [\n", |
| 3018 | + " cs61a_fa23_lint_all[:3],\n", |
| 3019 | + " cs61a_fa25_lint_all[:3],\n", |
| 3020 | + " datac88c_fa23_lint_all[:3],\n", |
| 3021 | + " datac88c_fa25_lint_all[:3],\n", |
| 3022 | + " ]\n", |
| 3023 | + ")\n", |
2989 | 3024 | "lint_errors_slope_chart(\"Top 3 Lint Errors\", lint_all_top3)" |
2990 | 3025 | ] |
2991 | 3026 | }, |
|
3013 | 3048 | } |
3014 | 3049 | ], |
3015 | 3050 | "source": [ |
3016 | | - "lint_all_next3 = pd.concat([cs61a_fa23_lint_all[3:6], cs61a_fa25_lint_all[3:6], datac88c_fa23_lint_all[3:6], datac88c_fa25_lint_all[3:6]])\n", |
| 3051 | + "lint_all_next3 = pd.concat(\n", |
| 3052 | + " [\n", |
| 3053 | + " cs61a_fa23_lint_all[3:6],\n", |
| 3054 | + " cs61a_fa25_lint_all[3:6],\n", |
| 3055 | + " datac88c_fa23_lint_all[3:6],\n", |
| 3056 | + " datac88c_fa25_lint_all[3:6],\n", |
| 3057 | + " ]\n", |
| 3058 | + ")\n", |
3017 | 3059 | "lint_errors_slope_chart(\"Top 4-6 Lint Errors\", lint_all_next3)" |
3018 | 3060 | ] |
3019 | 3061 | }, |
|
3715 | 3757 | " fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", |
3716 | 3758 | "\n", |
3717 | 3759 | " for i, course_name in enumerate([\"CS 61A\", \"DATA C88C\"]):\n", |
3718 | | - " course_df = df[(df[\"Course Name\"] == course_name) & (df[\"Problem\"].isin(problem_colors.keys()))]\n", |
| 3760 | + " course_df = df[\n", |
| 3761 | + " (df[\"Course Name\"] == course_name)\n", |
| 3762 | + " & (df[\"Problem\"].isin(problem_colors.keys()))\n", |
| 3763 | + " ]\n", |
3719 | 3764 | "\n", |
3720 | 3765 | " # Plot median\n", |
3721 | 3766 | " sns.lineplot(\n", |
|
3744 | 3789 | " else:\n", |
3745 | 3790 | " axes[i].set_title(f\"(b) {course_name}\")\n", |
3746 | 3791 | "\n", |
3747 | | - " axes[i].set_ylim((0, 45)) # force same y limits for easier comparison\n", |
| 3792 | + " axes[i].set_ylim((0, 45)) # force same y limits for easier comparison\n", |
3748 | 3793 | " axes[i].set_ylabel(\"Backups\")\n", |
3749 | 3794 | " axes[i].grid(axis=\"y\", linestyle=\"--\", alpha=0.7)\n", |
3750 | 3795 | "\n", |
|
0 commit comments