Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 142 additions & 59 deletions notebooks/cloc_code_stats.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"from datetime import datetime\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib import cm as cm\n",
"from collections import OrderedDict\n",
"import pandas as pd\n",
"%matplotlib inline"
]
Expand Down Expand Up @@ -80,16 +81,7 @@
"date_range = pd.date_range(\n",
" start=NWBGitInfo.NWB2_START_DATE if start_date is None else start_date,\n",
" end=datetime.today() if end_date is None else end_date,\n",
" freq=\"D\")\n",
"\n",
"# Select the repos and their order for the summary plot with the lines of code\n",
"summary_plot_repos = [\n",
" 'PyNWB', 'HDMF', 'MatNWB',\n",
" 'NWB_Schema_Language', 'NWB_Schema', \n",
" 'HDMF_Common_Schema', 'HDMF_DocUtils', 'HDMF_Zarr',\n",
" 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy',\n",
" 'NWBWidgets', 'NWBInspector',\n",
" 'NeuroConv']"
" freq=\"D\")"
]
},
{
Expand Down Expand Up @@ -122,12 +114,51 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"Repository Keys:\")\n",
"print(summary_stats['codes'].keys().values)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. Plot summary of the lines of code across all NWB repos\n",
"### 3.1. Plot version 1: Using default colors for repos"
"## 3. Plot summary of the lines of code across all NWB repos\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define the grouping of the repos\n",
"summary_plot_repos_grouped = OrderedDict()\n",
"summary_plot_repos_grouped['NWB APIs'] = ['PyNWB', 'MatNWB', 'AqNWB']\n",
"summary_plot_repos_grouped['Data Modeling'] = ['HDMF', 'HDMF_Zarr', 'HDMF_Schema_Language', 'NWB_Schema_Language']\n",
"summary_plot_repos_grouped['Extension Tools'] = ['NDX_Catalog', 'NDX_Template', 'NDX_Extension_Smithy', 'NDX_Staged_Extensions', 'HDMF_DocUtils']\n",
"summary_plot_repos_grouped['Format Schema'] = ['NWB_Schema', 'HDMF_Common_Schema']\n",
"summary_plot_repos_grouped['Data Conversion'] = ['NeuroConv', 'NWBInspector', 'NWB_GUIDE']\n",
"summary_plot_repos_grouped['Cloud'] = ['LINDI', 'NWB_Benchmarks', 'NWBWidgets']\n",
"summary_plot_repos_grouped['Online Resources'] = ['NWB_Overview', 'NWB_Project_Analytics', 'Hackathons']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create flat list of repos\n",
"summary_plot_repos = [repo \n",
" for repo_type in summary_plot_repos_grouped\n",
" for repo in summary_plot_repos_grouped[repo_type]\n",
" ] "
]
},
{
Expand All @@ -136,11 +167,45 @@
"metadata": {},
"outputs": [],
"source": [
"# Define base colors for each category\n",
"base_colors = {\n",
" 'NWB APIs': (0.121, 0.466, 0.705, 1.0), # Blue\n",
" 'Data Modeling': (1.000, 0.843, 0.000, 1.0), # Gold/Yellow\n",
" 'Data Conversion':(0.200, 0.627, 0.172, 1.0), # Green\n",
" 'Extension Tools':(1.000, 0.498, 0.054, 1.0), # Orange\n",
" 'Format Schema': (0.839, 0.153, 0.157, 1.0), # Red\n",
" 'Cloud': (0.580, 0.404, 0.741, 1.0), # Purple\n",
" 'Online Resources':(0.549, 0.337, 0.294, 1.0), # Brown\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.1. Version 1: Using default colors for repos"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Create colors such that each repo is assigned a distinct color\n",
"evenly_spaced_interval = np.linspace(0, 1, len(summary_plot_repos))\n",
"#colors = [cm.tab20(x) for x in evenly_spaced_interval]\n",
"colors = [cm.Paired(x) for x in evenly_spaced_interval]\n",
"colors = [cm.tab20(x) for x in evenly_spaced_interval]\n",
"#colors = [cm.Paired(x) for x in evenly_spaced_interval]\n",
"# mix up colors so that neighbouring areas have more dissimilar colors\n",
"colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]\n",
"colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n",
" figsize=(18,10), \n",
" stacked=True, \n",
Expand All @@ -149,13 +214,14 @@
" color=colors)\n",
"ax.get_yaxis().set_major_formatter(\n",
" mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))\n",
"plt.legend(loc=2, prop={'size': 20})\n",
"plt.legend(loc=2, prop={'size': 16})\n",
"plt.ylabel('Lines of Code', fontsize=24)\n",
"plt.xlabel('Date', fontsize=24)\n",
"plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n",
"plt.tight_layout()\n",
"if save_figs:\n",
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.pdf'))\n",
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.png'), dpi=300)\n",
"plt.title('NWB Code Repository Sizes', fontsize=20)\n",
"plt.show()"
]
Expand All @@ -164,9 +230,29 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.1 Plot grouped summary of the lines of code across all NWB repos \n",
"### 3.2 Group by color type but keep repos separate"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Generate colors to visually group all repos by color and distinguish repos within\n",
"## each category based on their alpha value\n",
"# Function to generate colors with varying alpha values\n",
"def generate_colors(base_color, num_colors):\n",
" r, g, b, _ = base_color\n",
" alpha_step = 0.7 / (num_colors - 1) if num_colors > 1 else 0.7\n",
" return [(r, g, b, max(0.3, 1.0 - i * alpha_step)) for i in range(num_colors)]\n",
"\n",
"For the paper we want to group tools to ease overview."
"# Generate colors for each category\n",
"colors = []\n",
"for category, repos in summary_plot_repos_grouped.items():\n",
" base_color = base_colors[category]\n",
" category_colors = generate_colors(base_color, len(repos))\n",
" colors.extend(category_colors)"
]
},
{
Expand All @@ -175,23 +261,7 @@
"metadata": {},
"outputs": [],
"source": [
"# Sort repos so we can group them category\n",
"summary_plot_repos_accum = [\n",
" 'NWB_Schema', 'HDMF_Common_Schema', 'NWB_Schema_Language', \n",
" 'PyNWB', \n",
" 'HDMF',\n",
" 'MatNWB',\n",
" 'HDMF_DocUtils', 'NWBWidgets', 'NWBInspector',\n",
" 'HDMF_Zarr', 'NeuroConv',\n",
" 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy']\n",
"colors = [(0.7, 0.0, 0.0, 1.0), (0.7, 0.0, 0.0, 0.6), (0.7, 0.0, 0.0, 0.4),\n",
" (0.0, 0.5, 0.6, 1.0),\n",
" (0.0, 0.75, 0.85, 1.0),\n",
" (0.4, 1.0, 1.0 ,1.0),\n",
" (0.8, 0.4, 0.0, 1.0), (0.8, 0.4, 0.0, 0.7), (0.8, 0.4, 0.0, 0.5),\n",
" (0.8, 0.8, 0.2, 1.0), (0.8, 0.8, 0.2, 0.5),\n",
" (0.0, 0.0, 0.7, 1.0), (0.0, 0.0, 0.7, 0.85), (0.0, 0.0, 0.7, 0.7), (0.0, 0.0, 0.7, 0.55)]\n",
"ax = summary_stats['sizes'][summary_plot_repos_accum].plot.area(\n",
"ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n",
" figsize=(18,10), \n",
" stacked=True, \n",
" linewidth=0,\n",
Expand All @@ -204,39 +274,50 @@
"plt.xlabel('Date', fontsize=24)\n",
"plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n",
"plt.tight_layout()\n",
"plt.legend(loc=2, prop={'size': 20,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n",
"plt.legend(loc=2, prop={'size': 16,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n",
"if save_figs:\n",
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.pdf'))\n",
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.png'), dpi=300)\n",
"plt.title('NWB Code Repository Sizes', fontsize=20)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3.3 Combine repos into broad categories"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"repo_sizes_grouped_df = pd.DataFrame.from_dict(\n",
" {'Format Schema': (summary_stats['sizes']['NWB_Schema'] + \n",
" summary_stats['sizes']['HDMF_Common_Schema'] + \n",
" summary_stats['sizes']['NWB_Schema_Language']),\n",
" 'HDMF': summary_stats['sizes']['HDMF'],\n",
" 'PyNWB': summary_stats['sizes']['PyNWB'],\n",
" 'MatNWB': summary_stats['sizes']['MatNWB'],\n",
" 'NWB Tools': (summary_stats['sizes']['HDMF_DocUtils'] + \n",
" summary_stats['sizes']['NWBWidgets'] + \n",
" summary_stats['sizes']['NWBInspector'] + \n",
" summary_stats['sizes']['HDMF_Zarr'] + \n",
" summary_stats['sizes']['NeuroConv']),\n",
" 'NDX Catalog': (summary_stats['sizes']['NDX_Catalog'] +\n",
" summary_stats['sizes']['NDX_Template'] +\n",
" summary_stats['sizes']['NDX_Staged_Extensions'] +\n",
" summary_stats['sizes']['NDX_Extension_Smithy']),\n",
" }\n",
")\n",
"colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n",
"colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n",
"# Create DataFrame with the total lines of code for each category (instead of for each repo)\n",
"repo_sizes_grouped = OrderedDict()\n",
"for category, repos in summary_plot_repos_grouped.items():\n",
" category_size = None\n",
" for repo in repos:\n",
" if category_size is None:\n",
" category_size = summary_stats['sizes'][repo]\n",
" else:\n",
" category_size += summary_stats['sizes'][repo]\n",
" repo_sizes_grouped [category] = category_size\n",
"repo_sizes_grouped_df = pd.DataFrame.from_dict(repo_sizes_grouped)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n",
"# colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n",
"colors = [(c[0], c[1], c[2], 0.8) for c in base_colors.values()]\n",
"\n",
"ax = repo_sizes_grouped_df.plot.area(\n",
" figsize=(18,10), \n",
" stacked=True, \n",
Expand All @@ -253,6 +334,7 @@
"plt.legend(loc=2, prop={'size': 24,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n",
"if save_figs:\n",
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.pdf'))\n",
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.png'), dpi=300)\n",
" \n",
"plt.title('NWB Code Repository Sizes', fontsize=20)\n",
"plt.show()"
Expand All @@ -262,7 +344,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Plot per-repo total lines of code statistics broken down by: code, blank, comment"
"## 4. Plot per-repo stats\n",
"### 4.1 Plot total lines of code statistics broken down by: code, blank, comment"
]
},
{
Expand Down Expand Up @@ -295,7 +378,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. Per-repo total lines of code statistics broken down by language type"
"## 4.2 Per-repo total lines of code statistics broken down by language type"
]
},
{
Expand Down Expand Up @@ -365,7 +448,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.13.5"
}
},
"nbformat": 4,
Expand Down
27 changes: 19 additions & 8 deletions notebooks/release_timeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"import time\n",
"%matplotlib inline\n",
"\n",
"plot_dir = os.path.join(os.getcwd(), 'plots')"
"plot_dir = os.path.join(os.getcwd(), 'plots') # save plots in a plots folder in the same folder as the notebook\n",
"cloc_data_dir = os.path.join(os.getcwd(), '../data') # data directory of the nwb-project-analytics repo"
]
},
{
Expand All @@ -38,9 +39,13 @@
"source": [
"github_repo_infos = NWBGitInfo.GIT_REPOS.get_info_objects()\n",
"\n",
"# Select main repos for plotting\n",
"select_github_repo_infos = OrderedDict(\n",
" [(k, github_repo_infos[k]) \n",
"# Load release timelines\n",
"release_timelines = GitHubRepoInfo.releases_from_nwb(\n",
" cache_dir=cloc_data_dir,\n",
" read_cache=True,\n",
" write_cache=False)\n",
"select_release_timelines = OrderedDict(\n",
" [(k, release_timelines[k]) \n",
" for k in ['PyNWB', 'HDMF', 'MatNWB', 'NWB_Schema', 'HDMF_Common_Schema']\n",
" ]\n",
")"
Expand All @@ -60,7 +65,7 @@
"outputs": [],
"source": [
"RenderReleaseTimeline.plot_multiple_release_timeslines(\n",
" github_repo_infos=select_github_repo_infos,\n",
" release_timelines=select_release_timelines,\n",
" add_releases=None, # Use default of NWBGitInfo.MISSING_RELEASE_TAGS,\n",
" date_range=None, # Use the default range of\n",
" month_intervals=2,\n",
Expand Down Expand Up @@ -89,7 +94,6 @@
"from nwb_project_analytics.codestats import GitCodeStats\n",
"import pandas as pd\n",
"\n",
"cloc_data_dir = os.path.join(os.getcwd(), 'data')\n",
"if GitCodeStats.cached(cloc_data_dir):\n",
" git_code_stats = GitCodeStats.from_cache(cloc_data_dir)\n",
" date_range = pd.date_range(start=NWBGitInfo.HDMF_START_DATE, \n",
Expand All @@ -98,7 +102,7 @@
" code_summary_stats = git_code_stats.compute_summary_stats(date_range=date_range) \n",
" # Clean up HDMF stats to ignore data from before HDMF was extracted from PyNWB\n",
" for k in code_summary_stats.keys():\n",
" code_summary_stats[k]['HDMF'][:NWBGitInfo.HDMF_START_DATE] = 0\n",
" code_summary_stats[k].loc[:NWBGitInfo.HDMF_START_DATE, 'HDMF'] = 0\n",
"else:\n",
" git_code_stats = None\n",
" print(\"No cached code stats available\")"
Expand Down Expand Up @@ -162,6 +166,13 @@
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -186,7 +197,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.13.5"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion src/nwb_project_analytics/codestats.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def from_nwb(
if repo_startdate is not None:
# Set all LOC values prior to the given date to 0
for k in summary_stats.keys():
summary_stats[k][repo_key][:repo_startdate] = 0
summary_stats[k].loc[:repo_startdate, repo_key] = 0
# also update the per-language stats for the repo
datemask = (per_repo_lang_stats[repo_key].index < repo_startdate)
per_repo_lang_stats[repo_key].loc[datemask] = 0
Expand Down