Skip to content

Commit a1006b8

Browse files
committed
Update cloc_code_stats notebook
1 parent 4641110 commit a1006b8

File tree

1 file changed

+142
-59
lines changed

1 file changed

+142
-59
lines changed

notebooks/cloc_code_stats.ipynb

Lines changed: 142 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
"from datetime import datetime\n",
5050
"from matplotlib import pyplot as plt\n",
5151
"from matplotlib import cm as cm\n",
52+
"from collections import OrderedDict\n",
5253
"import pandas as pd\n",
5354
"%matplotlib inline"
5455
]
@@ -80,16 +81,7 @@
8081
"date_range = pd.date_range(\n",
8182
" start=NWBGitInfo.NWB2_START_DATE if start_date is None else start_date,\n",
8283
" end=datetime.today() if end_date is None else end_date,\n",
83-
" freq=\"D\")\n",
84-
"\n",
85-
"# Select the repos and their order for the summary plot with the lines of code\n",
86-
"summary_plot_repos = [\n",
87-
" 'PyNWB', 'HDMF', 'MatNWB',\n",
88-
" 'NWB_Schema_Language', 'NWB_Schema', \n",
89-
" 'HDMF_Common_Schema', 'HDMF_DocUtils', 'HDMF_Zarr',\n",
90-
" 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy',\n",
91-
" 'NWBWidgets', 'NWBInspector',\n",
92-
" 'NeuroConv']"
84+
" freq=\"D\")"
9385
]
9486
},
9587
{
@@ -122,12 +114,51 @@
122114
")"
123115
]
124116
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"metadata": {},
121+
"outputs": [],
122+
"source": [
123+
"print(\"Repository Keys:\")\n",
124+
"print(summary_stats['codes'].keys().values)"
125+
]
126+
},
125127
{
126128
"cell_type": "markdown",
127129
"metadata": {},
128130
"source": [
129-
"## 3. Plot summary of the lines of code across all NWB repos\n",
130-
"### 3.1. Plot version 1: Using default colors for repos"
131+
"## 3. Plot summary of the lines of code across all NWB repos\n"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": null,
137+
"metadata": {},
138+
"outputs": [],
139+
"source": [
140+
"# Define the grouping of the repos\n",
141+
"summary_plot_repos_grouped = OrderedDict()\n",
142+
"summary_plot_repos_grouped['NWB APIs'] = ['PyNWB', 'MatNWB', 'AqNWB']\n",
143+
"summary_plot_repos_grouped['Data Modeling'] = ['HDMF', 'HDMF_Zarr', 'HDMF_Schema_Language', 'NWB_Schema_Language']\n",
144+
"summary_plot_repos_grouped['Extension Tools'] = ['NDX_Catalog', 'NDX_Template', 'NDX_Extension_Smithy', 'NDX_Staged_Extensions', 'HDMF_DocUtils']\n",
145+
"summary_plot_repos_grouped['Format Schema'] = ['NWB_Schema', 'HDMF_Common_Schema']\n",
146+
"summary_plot_repos_grouped['Data Conversion'] = ['NeuroConv', 'NWBInspector', 'NWB_GUIDE']\n",
147+
"summary_plot_repos_grouped['Cloud'] = ['LINDI', 'NWB_Benchmarks', 'NWBWidgets']\n",
148+
"summary_plot_repos_grouped['Online Resources'] = ['NWB_Overview', 'NWB_Project_Analytics', 'Hackathons']"
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": null,
154+
"metadata": {},
155+
"outputs": [],
156+
"source": [
157+
"# Create flat list of repos\n",
158+
"summary_plot_repos = [repo \n",
159+
" for repo_type in summary_plot_repos_grouped\n",
160+
" for repo in summary_plot_repos_grouped[repo_type]\n",
161+
" ] "
131162
]
132163
},
133164
{
@@ -136,11 +167,45 @@
136167
"metadata": {},
137168
"outputs": [],
138169
"source": [
170+
"# Define base colors for each category\n",
171+
"base_colors = {\n",
172+
" 'NWB APIs': (0.121, 0.466, 0.705, 1.0), # Blue\n",
173+
" 'Data Modeling': (1.000, 0.843, 0.000, 1.0), # Gold/Yellow\n",
174+
" 'Data Conversion':(0.200, 0.627, 0.172, 1.0), # Green\n",
175+
" 'Extension Tools':(1.000, 0.498, 0.054, 1.0), # Orange\n",
176+
" 'Format Schema': (0.839, 0.153, 0.157, 1.0), # Red\n",
177+
" 'Cloud': (0.580, 0.404, 0.741, 1.0), # Purple\n",
178+
" 'Online Resources':(0.549, 0.337, 0.294, 1.0), # Brown\n",
179+
"}"
180+
]
181+
},
182+
{
183+
"cell_type": "markdown",
184+
"metadata": {},
185+
"source": [
186+
"### 3.1. Version 1: Using default colors for repos"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": null,
192+
"metadata": {},
193+
"outputs": [],
194+
"source": [
195+
"# Create colors such that each repo is assigned a distinct color\n",
139196
"evenly_spaced_interval = np.linspace(0, 1, len(summary_plot_repos))\n",
140-
"#colors = [cm.tab20(x) for x in evenly_spaced_interval]\n",
141-
"colors = [cm.Paired(x) for x in evenly_spaced_interval]\n",
197+
"colors = [cm.tab20(x) for x in evenly_spaced_interval]\n",
198+
"#colors = [cm.Paired(x) for x in evenly_spaced_interval]\n",
142199
"# mix up colors so that neighbouring areas have more dissimilar colors\n",
143-
"colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]\n",
200+
"colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]"
201+
]
202+
},
203+
{
204+
"cell_type": "code",
205+
"execution_count": null,
206+
"metadata": {},
207+
"outputs": [],
208+
"source": [
144209
"ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n",
145210
" figsize=(18,10), \n",
146211
" stacked=True, \n",
@@ -149,13 +214,14 @@
149214
" color=colors)\n",
150215
"ax.get_yaxis().set_major_formatter(\n",
151216
" mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))\n",
152-
"plt.legend(loc=2, prop={'size': 20})\n",
217+
"plt.legend(loc=2, prop={'size': 16})\n",
153218
"plt.ylabel('Lines of Code', fontsize=24)\n",
154219
"plt.xlabel('Date', fontsize=24)\n",
155220
"plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n",
156221
"plt.tight_layout()\n",
157222
"if save_figs:\n",
158223
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.pdf'))\n",
224+
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.png'), dpi=300)\n",
159225
"plt.title('NWB Code Repository Sizes', fontsize=20)\n",
160226
"plt.show()"
161227
]
@@ -164,9 +230,29 @@
164230
"cell_type": "markdown",
165231
"metadata": {},
166232
"source": [
167-
"### 3.1 Plot grouped summary of the lines of code across all NWB repos \n",
233+
"### 3.2 Group by color type but keep repos separate"
234+
]
235+
},
236+
{
237+
"cell_type": "code",
238+
"execution_count": null,
239+
"metadata": {},
240+
"outputs": [],
241+
"source": [
242+
"## Generate colors to visually group all repos by color and distinguish repos within\n",
243+
"## each category based on their alpha value\n",
244+
"# Function to generate colors with varying alpha values\n",
245+
"def generate_colors(base_color, num_colors):\n",
246+
" r, g, b, _ = base_color\n",
247+
" alpha_step = 0.7 / (num_colors - 1) if num_colors > 1 else 0.7\n",
248+
" return [(r, g, b, max(0.3, 1.0 - i * alpha_step)) for i in range(num_colors)]\n",
168249
"\n",
169-
"For the paper we want to group tools to ease overview."
250+
"# Generate colors for each category\n",
251+
"colors = []\n",
252+
"for category, repos in summary_plot_repos_grouped.items():\n",
253+
" base_color = base_colors[category]\n",
254+
" category_colors = generate_colors(base_color, len(repos))\n",
255+
" colors.extend(category_colors)"
170256
]
171257
},
172258
{
@@ -175,23 +261,7 @@
175261
"metadata": {},
176262
"outputs": [],
177263
"source": [
178-
"# Sort repos so we can group them category\n",
179-
"summary_plot_repos_accum = [\n",
180-
" 'NWB_Schema', 'HDMF_Common_Schema', 'NWB_Schema_Language', \n",
181-
" 'PyNWB', \n",
182-
" 'HDMF',\n",
183-
" 'MatNWB',\n",
184-
" 'HDMF_DocUtils', 'NWBWidgets', 'NWBInspector',\n",
185-
" 'HDMF_Zarr', 'NeuroConv',\n",
186-
" 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy']\n",
187-
"colors = [(0.7, 0.0, 0.0, 1.0), (0.7, 0.0, 0.0, 0.6), (0.7, 0.0, 0.0, 0.4),\n",
188-
" (0.0, 0.5, 0.6, 1.0),\n",
189-
" (0.0, 0.75, 0.85, 1.0),\n",
190-
" (0.4, 1.0, 1.0 ,1.0),\n",
191-
" (0.8, 0.4, 0.0, 1.0), (0.8, 0.4, 0.0, 0.7), (0.8, 0.4, 0.0, 0.5),\n",
192-
" (0.8, 0.8, 0.2, 1.0), (0.8, 0.8, 0.2, 0.5),\n",
193-
" (0.0, 0.0, 0.7, 1.0), (0.0, 0.0, 0.7, 0.85), (0.0, 0.0, 0.7, 0.7), (0.0, 0.0, 0.7, 0.55)]\n",
194-
"ax = summary_stats['sizes'][summary_plot_repos_accum].plot.area(\n",
264+
"ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n",
195265
" figsize=(18,10), \n",
196266
" stacked=True, \n",
197267
" linewidth=0,\n",
@@ -204,39 +274,50 @@
204274
"plt.xlabel('Date', fontsize=24)\n",
205275
"plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n",
206276
"plt.tight_layout()\n",
207-
"plt.legend(loc=2, prop={'size': 20,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n",
277+
"plt.legend(loc=2, prop={'size': 16,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n",
208278
"if save_figs:\n",
209279
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.pdf'))\n",
280+
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.png'), dpi=300)\n",
210281
"plt.title('NWB Code Repository Sizes', fontsize=20)\n",
211282
"plt.show()"
212283
]
213284
},
285+
{
286+
"cell_type": "markdown",
287+
"metadata": {},
288+
"source": [
289+
"### 3.3 Combine repos into broad categories"
290+
]
291+
},
214292
{
215293
"cell_type": "code",
216294
"execution_count": null,
217295
"metadata": {},
218296
"outputs": [],
219297
"source": [
220-
"repo_sizes_grouped_df = pd.DataFrame.from_dict(\n",
221-
" {'Format Schema': (summary_stats['sizes']['NWB_Schema'] + \n",
222-
" summary_stats['sizes']['HDMF_Common_Schema'] + \n",
223-
" summary_stats['sizes']['NWB_Schema_Language']),\n",
224-
" 'HDMF': summary_stats['sizes']['HDMF'],\n",
225-
" 'PyNWB': summary_stats['sizes']['PyNWB'],\n",
226-
" 'MatNWB': summary_stats['sizes']['MatNWB'],\n",
227-
" 'NWB Tools': (summary_stats['sizes']['HDMF_DocUtils'] + \n",
228-
" summary_stats['sizes']['NWBWidgets'] + \n",
229-
" summary_stats['sizes']['NWBInspector'] + \n",
230-
" summary_stats['sizes']['HDMF_Zarr'] + \n",
231-
" summary_stats['sizes']['NeuroConv']),\n",
232-
" 'NDX Catalog': (summary_stats['sizes']['NDX_Catalog'] +\n",
233-
" summary_stats['sizes']['NDX_Template'] +\n",
234-
" summary_stats['sizes']['NDX_Staged_Extensions'] +\n",
235-
" summary_stats['sizes']['NDX_Extension_Smithy']),\n",
236-
" }\n",
237-
")\n",
238-
"colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n",
239-
"colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n",
298+
"# Create DataFrame with the total lines of code for each category (instead of for each repo)\n",
299+
"repo_sizes_grouped = OrderedDict()\n",
300+
"for category, repos in summary_plot_repos_grouped.items():\n",
301+
" category_size = None\n",
302+
" for repo in repos:\n",
303+
" if category_size is None:\n",
304+
" category_size = summary_stats['sizes'][repo]\n",
305+
" else:\n",
306+
" category_size += summary_stats['sizes'][repo]\n",
307+
" repo_sizes_grouped [category] = category_size\n",
308+
"repo_sizes_grouped_df = pd.DataFrame.from_dict(repo_sizes_grouped)"
309+
]
310+
},
311+
{
312+
"cell_type": "code",
313+
"execution_count": null,
314+
"metadata": {},
315+
"outputs": [],
316+
"source": [
317+
"# colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n",
318+
"# colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n",
319+
"colors = [(c[0], c[1], c[2], 0.8) for c in base_colors.values()]\n",
320+
"\n",
240321
"ax = repo_sizes_grouped_df.plot.area(\n",
241322
" figsize=(18,10), \n",
242323
" stacked=True, \n",
@@ -253,6 +334,7 @@
253334
"plt.legend(loc=2, prop={'size': 24,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n",
254335
"if save_figs:\n",
255336
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.pdf'))\n",
337+
" plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.png'), dpi=300)\n",
256338
" \n",
257339
"plt.title('NWB Code Repository Sizes', fontsize=20)\n",
258340
"plt.show()"
@@ -262,7 +344,8 @@
262344
"cell_type": "markdown",
263345
"metadata": {},
264346
"source": [
265-
"## 4. Plot per-repo total lines of code statistics broken down by: code, blank, comment"
347+
"## 4. Plot per-repo stats\n",
348+
"### 4.1 Plot total lines of code statistics broken down by: code, blank, comment"
266349
]
267350
},
268351
{
@@ -295,7 +378,7 @@
295378
"cell_type": "markdown",
296379
"metadata": {},
297380
"source": [
298-
"## 4. Per-repo total lines of code statistics broken down by language type"
381+
"## 4.2 Per-repo total lines of code statistics broken down by language type"
299382
]
300383
},
301384
{
@@ -365,7 +448,7 @@
365448
"name": "python",
366449
"nbconvert_exporter": "python",
367450
"pygments_lexer": "ipython3",
368-
"version": "3.9.7"
451+
"version": "3.13.5"
369452
}
370453
},
371454
"nbformat": 4,

0 commit comments

Comments
 (0)