diff --git a/notebooks/cloc_code_stats.ipynb b/notebooks/cloc_code_stats.ipynb index c0ea934..10219f1 100644 --- a/notebooks/cloc_code_stats.ipynb +++ b/notebooks/cloc_code_stats.ipynb @@ -49,6 +49,7 @@ "from datetime import datetime\n", "from matplotlib import pyplot as plt\n", "from matplotlib import cm as cm\n", + "from collections import OrderedDict\n", "import pandas as pd\n", "%matplotlib inline" ] @@ -80,16 +81,7 @@ "date_range = pd.date_range(\n", " start=NWBGitInfo.NWB2_START_DATE if start_date is None else start_date,\n", " end=datetime.today() if end_date is None else end_date,\n", - " freq=\"D\")\n", - "\n", - "# Select the repos and their order for the summary plot with the lines of code\n", - "summary_plot_repos = [\n", - " 'PyNWB', 'HDMF', 'MatNWB',\n", - " 'NWB_Schema_Language', 'NWB_Schema', \n", - " 'HDMF_Common_Schema', 'HDMF_DocUtils', 'HDMF_Zarr',\n", - " 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy',\n", - " 'NWBWidgets', 'NWBInspector',\n", - " 'NeuroConv']" + " freq=\"D\")" ] }, { @@ -122,12 +114,51 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Repository Keys:\")\n", + "print(summary_stats['codes'].keys().values)" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 3. Plot summary of the lines of code across all NWB repos\n", - "### 3.1. Plot version 1: Using default colors for repos" + "## 3. Plot summary of the lines of code across all NWB repos\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define the grouping of the repos\n", + "summary_plot_repos_grouped = OrderedDict()\n", + "summary_plot_repos_grouped['NWB APIs'] = ['PyNWB', 'MatNWB', 'AqNWB']\n", + "summary_plot_repos_grouped['Data Modeling'] = ['HDMF', 'HDMF_Zarr', 'HDMF_Schema_Language', 'NWB_Schema_Language']\n", + "summary_plot_repos_grouped['Extension Tools'] = ['NDX_Catalog', 'NDX_Template', 'NDX_Extension_Smithy', 'NDX_Staged_Extensions', 'HDMF_DocUtils']\n", + "summary_plot_repos_grouped['Format Schema'] = ['NWB_Schema', 'HDMF_Common_Schema']\n", + "summary_plot_repos_grouped['Data Conversion'] = ['NeuroConv', 'NWBInspector', 'NWB_GUIDE']\n", + "summary_plot_repos_grouped['Cloud'] = ['LINDI', 'NWB_Benchmarks', 'NWBWidgets']\n", + "summary_plot_repos_grouped['Online Resources'] = ['NWB_Overview', 'NWB_Project_Analytics', 'Hackathons']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create flat list of repos\n", + "summary_plot_repos = [repo \n", + " for repo_type in summary_plot_repos_grouped\n", + " for repo in summary_plot_repos_grouped[repo_type]\n", + " ] " ] }, { @@ -136,11 +167,45 @@ "metadata": {}, "outputs": [], "source": [ + "# Define base colors for each category\n", + "base_colors = {\n", + " 'NWB APIs': (0.121, 0.466, 0.705, 1.0), # Blue\n", + " 'Data Modeling': (1.000, 0.843, 0.000, 1.0), # Gold/Yellow\n", + " 'Data Conversion':(0.200, 0.627, 0.172, 1.0), # Green\n", + " 'Extension Tools':(1.000, 0.498, 0.054, 1.0), # Orange\n", + " 'Format Schema': (0.839, 0.153, 0.157, 1.0), # Red\n", + " 'Cloud': (0.580, 0.404, 0.741, 1.0), # Purple\n", + " 'Online Resources':(0.549, 0.337, 0.294, 1.0), # Brown\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.1. Version 1: Using default colors for repos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create colors such that each repo is assigned a distinct color\n", "evenly_spaced_interval = np.linspace(0, 1, len(summary_plot_repos))\n", - "#colors = [cm.tab20(x) for x in evenly_spaced_interval]\n", - "colors = [cm.Paired(x) for x in evenly_spaced_interval]\n", + "colors = [cm.tab20(x) for x in evenly_spaced_interval]\n", + "#colors = [cm.Paired(x) for x in evenly_spaced_interval]\n", "# mix up colors so that neighbouring areas have more dissimilar colors\n", - "colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]\n", + "colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n", " figsize=(18,10), \n", " stacked=True, \n", @@ -149,13 +214,14 @@ " color=colors)\n", "ax.get_yaxis().set_major_formatter(\n", " mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))\n", - "plt.legend(loc=2, prop={'size': 20})\n", + "plt.legend(loc=2, prop={'size': 16})\n", "plt.ylabel('Lines of Code', fontsize=24)\n", "plt.xlabel('Date', fontsize=24)\n", "plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n", "plt.tight_layout()\n", "if save_figs:\n", " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.pdf'))\n", + " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.png'), dpi=300)\n", "plt.title('NWB Code Repository Sizes', fontsize=20)\n", "plt.show()" ] @@ -164,9 +230,29 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3.1 Plot grouped summary of the lines of code across all NWB repos \n", + "### 3.2 Group by color type but keep repos separate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "## Generate colors to visually group all repos by color and distinguish repos within\n", + "## each category based on their alpha value\n", + "# Function to generate colors with varying alpha values\n", + "def generate_colors(base_color, num_colors):\n", + " r, g, b, _ = base_color\n", + " alpha_step = 0.7 / (num_colors - 1) if num_colors > 1 else 0.7\n", + " return [(r, g, b, max(0.3, 1.0 - i * alpha_step)) for i in range(num_colors)]\n", "\n", - "For the paper we want to group tools to ease overview." + "# Generate colors for each category\n", + "colors = []\n", + "for category, repos in summary_plot_repos_grouped.items():\n", + " base_color = base_colors[category]\n", + " category_colors = generate_colors(base_color, len(repos))\n", + " colors.extend(category_colors)" ] }, { @@ -175,23 +261,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Sort repos so we can group them category\n", - "summary_plot_repos_accum = [\n", - " 'NWB_Schema', 'HDMF_Common_Schema', 'NWB_Schema_Language', \n", - " 'PyNWB', \n", - " 'HDMF',\n", - " 'MatNWB',\n", - " 'HDMF_DocUtils', 'NWBWidgets', 'NWBInspector',\n", - " 'HDMF_Zarr', 'NeuroConv',\n", - " 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy']\n", - "colors = [(0.7, 0.0, 0.0, 1.0), (0.7, 0.0, 0.0, 0.6), (0.7, 0.0, 0.0, 0.4),\n", - " (0.0, 0.5, 0.6, 1.0),\n", - " (0.0, 0.75, 0.85, 1.0),\n", - " (0.4, 1.0, 1.0 ,1.0),\n", - " (0.8, 0.4, 0.0, 1.0), (0.8, 0.4, 0.0, 0.7), (0.8, 0.4, 0.0, 0.5),\n", - " (0.8, 0.8, 0.2, 1.0), (0.8, 0.8, 0.2, 0.5),\n", - " (0.0, 0.0, 0.7, 1.0), (0.0, 0.0, 0.7, 0.85), (0.0, 0.0, 0.7, 0.7), (0.0, 0.0, 0.7, 0.55)]\n", - "ax = summary_stats['sizes'][summary_plot_repos_accum].plot.area(\n", + "ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n", " figsize=(18,10), \n", " stacked=True, \n", " linewidth=0,\n", @@ -204,39 +274,50 @@ "plt.xlabel('Date', fontsize=24)\n", "plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n", "plt.tight_layout()\n", - "plt.legend(loc=2, prop={'size': 20,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n", + "plt.legend(loc=2, prop={'size': 16,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n", "if save_figs:\n", " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.pdf'))\n", + " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.png'), dpi=300)\n", "plt.title('NWB Code Repository Sizes', fontsize=20)\n", "plt.show()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.3 Combine repos into broad categories" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "repo_sizes_grouped_df = pd.DataFrame.from_dict(\n", - " {'Format Schema': (summary_stats['sizes']['NWB_Schema'] + \n", - " summary_stats['sizes']['HDMF_Common_Schema'] + \n", - " summary_stats['sizes']['NWB_Schema_Language']),\n", - " 'HDMF': summary_stats['sizes']['HDMF'],\n", - " 'PyNWB': summary_stats['sizes']['PyNWB'],\n", - " 'MatNWB': summary_stats['sizes']['MatNWB'],\n", - " 'NWB Tools': (summary_stats['sizes']['HDMF_DocUtils'] + \n", - " summary_stats['sizes']['NWBWidgets'] + \n", - " summary_stats['sizes']['NWBInspector'] + \n", - " summary_stats['sizes']['HDMF_Zarr'] + \n", - " summary_stats['sizes']['NeuroConv']),\n", - " 'NDX Catalog': (summary_stats['sizes']['NDX_Catalog'] +\n", - " summary_stats['sizes']['NDX_Template'] +\n", - " summary_stats['sizes']['NDX_Staged_Extensions'] +\n", - " summary_stats['sizes']['NDX_Extension_Smithy']),\n", - " }\n", - ")\n", - "colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n", - "colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n", + "# Create DataFrame with the total lines of code for each category (instead of for each repo)\n", + "repo_sizes_grouped = OrderedDict()\n", + "for category, repos in summary_plot_repos_grouped.items():\n", + " category_size = None\n", + " for repo in repos:\n", + " if category_size is None:\n", + " category_size = summary_stats['sizes'][repo]\n", + " else:\n", + " category_size += summary_stats['sizes'][repo]\n", + " repo_sizes_grouped [category] = category_size\n", + "repo_sizes_grouped_df = pd.DataFrame.from_dict(repo_sizes_grouped)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n", + "# colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n", + "colors = [(c[0], c[1], c[2], 0.8) for c in base_colors.values()]\n", + "\n", "ax = repo_sizes_grouped_df.plot.area(\n", " figsize=(18,10), \n", " stacked=True, \n", @@ -253,6 +334,7 @@ "plt.legend(loc=2, prop={'size': 24,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n", "if save_figs:\n", " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.pdf'))\n", + " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.png'), dpi=300)\n", " \n", "plt.title('NWB Code Repository Sizes', fontsize=20)\n", "plt.show()" @@ -262,7 +344,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4. Plot per-repo total lines of code statistics broken down by: code, blank, comment" + "## 4. Plot per-repo stats\n", + "### 4.1 Plot total lines of code statistics broken down by: code, blank, comment" ] }, { @@ -295,7 +378,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 4. Per-repo total lines of code statistics broken down by language type" + "## 4.2 Per-repo total lines of code statistics broken down by language type" ] }, { @@ -365,7 +448,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.13.5" } }, "nbformat": 4, diff --git a/notebooks/release_timeline.ipynb b/notebooks/release_timeline.ipynb index 1b58482..c023cf3 100644 --- a/notebooks/release_timeline.ipynb +++ b/notebooks/release_timeline.ipynb @@ -27,7 +27,8 @@ "import time\n", "%matplotlib inline\n", "\n", - "plot_dir = os.path.join(os.getcwd(), 'plots')" + "plot_dir = os.path.join(os.getcwd(), 'plots') # save plots in a plots folder in the same folder as the notebook\n", + "cloc_data_dir = os.path.join(os.getcwd(), '../data') # data directory of the nwb-project-analytics repo" ] }, { @@ -38,9 +39,13 @@ "source": [ "github_repo_infos = NWBGitInfo.GIT_REPOS.get_info_objects()\n", "\n", - "# Select main repos for plotting\n", - "select_github_repo_infos = OrderedDict(\n", - " [(k, github_repo_infos[k]) \n", + "# Load release timelines\n", + "release_timelines = GitHubRepoInfo.releases_from_nwb(\n", + " cache_dir=cloc_data_dir,\n", + " read_cache=True,\n", + " write_cache=False)\n", + "select_release_timelines = OrderedDict(\n", + " [(k, release_timelines[k]) \n", " for k in ['PyNWB', 'HDMF', 'MatNWB', 'NWB_Schema', 'HDMF_Common_Schema']\n", " ]\n", ")" @@ -60,7 +65,7 @@ "outputs": [], "source": [ "RenderReleaseTimeline.plot_multiple_release_timeslines(\n", - " github_repo_infos=select_github_repo_infos,\n", + " release_timelines=select_release_timelines,\n", " add_releases=None, # Use default of NWBGitInfo.MISSING_RELEASE_TAGS,\n", " date_range=None, # Use the default range of\n", " month_intervals=2,\n", @@ -89,7 +94,6 @@ "from nwb_project_analytics.codestats import GitCodeStats\n", "import pandas as pd\n", "\n", - "cloc_data_dir = os.path.join(os.getcwd(), 'data')\n", "if GitCodeStats.cached(cloc_data_dir):\n", " git_code_stats = GitCodeStats.from_cache(cloc_data_dir)\n", " date_range = pd.date_range(start=NWBGitInfo.HDMF_START_DATE, \n", @@ -98,7 +102,7 @@ " code_summary_stats = git_code_stats.compute_summary_stats(date_range=date_range) \n", " # Clean up HDMF stats to ignore data from before HDMF was extracted from PyNWB\n", " for k in code_summary_stats.keys():\n", - " code_summary_stats[k]['HDMF'][:NWBGitInfo.HDMF_START_DATE] = 0\n", + " code_summary_stats[k].loc[:NWBGitInfo.HDMF_START_DATE, 'HDMF'] = 0\n", "else:\n", " git_code_stats = None\n", " print(\"No cached code stats available\")" @@ -162,6 +166,13 @@ " plt.show()\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -186,7 +197,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.13.5" } }, "nbformat": 4, diff --git a/src/nwb_project_analytics/codestats.py b/src/nwb_project_analytics/codestats.py index aeaa0eb..c51236f 100644 --- a/src/nwb_project_analytics/codestats.py +++ b/src/nwb_project_analytics/codestats.py @@ -161,7 +161,7 @@ def from_nwb( if repo_startdate is not None: # Set all LOC values prior to the given date to 0 for k in summary_stats.keys(): - summary_stats[k][repo_key][:repo_startdate] = 0 + summary_stats[k].loc[:repo_startdate, repo_key] = 0 # also update the per-language stats for the repo datemask = (per_repo_lang_stats[repo_key].index < repo_startdate) per_repo_lang_stats[repo_key].loc[datemask] = 0