|
49 | 49 | "from datetime import datetime\n", |
50 | 50 | "from matplotlib import pyplot as plt\n", |
51 | 51 | "from matplotlib import cm as cm\n", |
| 52 | + "from collections import OrderedDict\n", |
52 | 53 | "import pandas as pd\n", |
53 | 54 | "%matplotlib inline" |
54 | 55 | ] |
|
80 | 81 | "date_range = pd.date_range(\n", |
81 | 82 | " start=NWBGitInfo.NWB2_START_DATE if start_date is None else start_date,\n", |
82 | 83 | " end=datetime.today() if end_date is None else end_date,\n", |
83 | | - " freq=\"D\")\n", |
84 | | - "\n", |
85 | | - "# Select the repos and their order for the summary plot with the lines of code\n", |
86 | | - "summary_plot_repos = [\n", |
87 | | - " 'PyNWB', 'HDMF', 'MatNWB',\n", |
88 | | - " 'NWB_Schema_Language', 'NWB_Schema', \n", |
89 | | - " 'HDMF_Common_Schema', 'HDMF_DocUtils', 'HDMF_Zarr',\n", |
90 | | - " 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy',\n", |
91 | | - " 'NWBWidgets', 'NWBInspector',\n", |
92 | | - " 'NeuroConv']" |
| 84 | + " freq=\"D\")" |
93 | 85 | ] |
94 | 86 | }, |
95 | 87 | { |
|
122 | 114 | ")" |
123 | 115 | ] |
124 | 116 | }, |
| 117 | + { |
| 118 | + "cell_type": "code", |
| 119 | + "execution_count": null, |
| 120 | + "metadata": {}, |
| 121 | + "outputs": [], |
| 122 | + "source": [ |
| 123 | + "print(\"Repository Keys:\")\n", |
| 124 | + "print(summary_stats['codes'].keys().values)" |
| 125 | + ] |
| 126 | + }, |
125 | 127 | { |
126 | 128 | "cell_type": "markdown", |
127 | 129 | "metadata": {}, |
128 | 130 | "source": [ |
129 | | - "## 3. Plot summary of the lines of code across all NWB repos\n", |
130 | | - "### 3.1. Plot version 1: Using default colors for repos" |
| 131 | + "## 3. Plot summary of the lines of code across all NWB repos\n" |
| 132 | + ] |
| 133 | + }, |
| 134 | + { |
| 135 | + "cell_type": "code", |
| 136 | + "execution_count": null, |
| 137 | + "metadata": {}, |
| 138 | + "outputs": [], |
| 139 | + "source": [ |
| 140 | + "# Define the grouping of the repos\n", |
| 141 | + "summary_plot_repos_grouped = OrderedDict()\n", |
| 142 | + "summary_plot_repos_grouped['NWB APIs'] = ['PyNWB', 'MatNWB', 'AqNWB']\n", |
| 143 | + "summary_plot_repos_grouped['Data Modeling'] = ['HDMF', 'HDMF_Zarr', 'HDMF_Schema_Language', 'NWB_Schema_Language']\n", |
| 144 | + "summary_plot_repos_grouped['Extension Tools'] = ['NDX_Catalog', 'NDX_Template', 'NDX_Extension_Smithy', 'NDX_Staged_Extensions', 'HDMF_DocUtils']\n", |
| 145 | + "summary_plot_repos_grouped['Format Schema'] = ['NWB_Schema', 'HDMF_Common_Schema']\n", |
| 146 | + "summary_plot_repos_grouped['Data Conversion'] = ['NeuroConv', 'NWBInspector', 'NWB_GUIDE']\n", |
| 147 | + "summary_plot_repos_grouped['Cloud'] = ['LINDI', 'NWB_Benchmarks', 'NWBWidgets']\n", |
| 148 | + "summary_plot_repos_grouped['Online Resources'] = ['NWB_Overview', 'NWB_Project_Analytics', 'Hackathons']" |
| 149 | + ] |
| 150 | + }, |
| 151 | + { |
| 152 | + "cell_type": "code", |
| 153 | + "execution_count": null, |
| 154 | + "metadata": {}, |
| 155 | + "outputs": [], |
| 156 | + "source": [ |
| 157 | + "# Create flat list of repos\n", |
| 158 | + "summary_plot_repos = [repo \n", |
| 159 | + " for repo_type in summary_plot_repos_grouped\n", |
| 160 | + " for repo in summary_plot_repos_grouped[repo_type]\n", |
| 161 | + " ] " |
131 | 162 | ] |
132 | 163 | }, |
133 | 164 | { |
|
136 | 167 | "metadata": {}, |
137 | 168 | "outputs": [], |
138 | 169 | "source": [ |
| 170 | + "# Define base colors for each category\n", |
| 171 | + "base_colors = {\n", |
| 172 | + " 'NWB APIs': (0.121, 0.466, 0.705, 1.0), # Blue\n", |
| 173 | + " 'Data Modeling': (1.000, 0.843, 0.000, 1.0), # Gold/Yellow\n", |
| 174 | + " 'Data Conversion':(0.200, 0.627, 0.172, 1.0), # Green\n", |
| 175 | + " 'Extension Tools':(1.000, 0.498, 0.054, 1.0), # Orange\n", |
| 176 | + " 'Format Schema': (0.839, 0.153, 0.157, 1.0), # Red\n", |
| 177 | + " 'Cloud': (0.580, 0.404, 0.741, 1.0), # Purple\n", |
| 178 | + " 'Online Resources':(0.549, 0.337, 0.294, 1.0), # Brown\n", |
| 179 | + "}" |
| 180 | + ] |
| 181 | + }, |
| 182 | + { |
| 183 | + "cell_type": "markdown", |
| 184 | + "metadata": {}, |
| 185 | + "source": [ |
| 186 | + "### 3.1. Version 1: Using default colors for repos" |
| 187 | + ] |
| 188 | + }, |
| 189 | + { |
| 190 | + "cell_type": "code", |
| 191 | + "execution_count": null, |
| 192 | + "metadata": {}, |
| 193 | + "outputs": [], |
| 194 | + "source": [ |
| 195 | + "# Create colors such that each repo is assigned a distinct color\n", |
139 | 196 | "evenly_spaced_interval = np.linspace(0, 1, len(summary_plot_repos))\n", |
140 | | - "#colors = [cm.tab20(x) for x in evenly_spaced_interval]\n", |
141 | | - "colors = [cm.Paired(x) for x in evenly_spaced_interval]\n", |
| 197 | + "colors = [cm.tab20(x) for x in evenly_spaced_interval]\n", |
| 198 | + "#colors = [cm.Paired(x) for x in evenly_spaced_interval]\n", |
142 | 199 | "# mix up colors so that neighbouring areas have more dissimilar colors\n", |
143 | | - "colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]\n", |
| 200 | + "colors = [c for i, c in enumerate(colors) if i % 2 == 0] + [c for i, c in enumerate(colors) if i % 2 == 1]" |
| 201 | + ] |
| 202 | + }, |
| 203 | + { |
| 204 | + "cell_type": "code", |
| 205 | + "execution_count": null, |
| 206 | + "metadata": {}, |
| 207 | + "outputs": [], |
| 208 | + "source": [ |
144 | 209 | "ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n", |
145 | 210 | " figsize=(18,10), \n", |
146 | 211 | " stacked=True, \n", |
|
149 | 214 | " color=colors)\n", |
150 | 215 | "ax.get_yaxis().set_major_formatter(\n", |
151 | 216 | " mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))\n", |
152 | | - "plt.legend(loc=2, prop={'size': 20})\n", |
| 217 | + "plt.legend(loc=2, prop={'size': 16})\n", |
153 | 218 | "plt.ylabel('Lines of Code', fontsize=24)\n", |
154 | 219 | "plt.xlabel('Date', fontsize=24)\n", |
155 | 220 | "plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n", |
156 | 221 | "plt.tight_layout()\n", |
157 | 222 | "if save_figs:\n", |
158 | 223 | " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.pdf'))\n", |
| 224 | + " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all.png'), dpi=300)\n", |
159 | 225 | "plt.title('NWB Code Repository Sizes', fontsize=20)\n", |
160 | 226 | "plt.show()" |
161 | 227 | ] |
|
164 | 230 | "cell_type": "markdown", |
165 | 231 | "metadata": {}, |
166 | 232 | "source": [ |
167 | | - "### 3.1 Plot grouped summary of the lines of code across all NWB repos \n", |
| 233 | + "### 3.2 Group by color type but keep repos separate" |
| 234 | + ] |
| 235 | + }, |
| 236 | + { |
| 237 | + "cell_type": "code", |
| 238 | + "execution_count": null, |
| 239 | + "metadata": {}, |
| 240 | + "outputs": [], |
| 241 | + "source": [ |
| 242 | + "## Generate colors to visually group all repos by color and distinguish repos within\n", |
| 243 | + "## each category based on their alpha value\n", |
| 244 | + "# Function to generate colors with varying alpha values\n", |
| 245 | + "def generate_colors(base_color, num_colors):\n", |
| 246 | + " r, g, b, _ = base_color\n", |
| 247 | + " alpha_step = 0.7 / (num_colors - 1) if num_colors > 1 else 0.7\n", |
| 248 | + " return [(r, g, b, max(0.3, 1.0 - i * alpha_step)) for i in range(num_colors)]\n", |
168 | 249 | "\n", |
169 | | - "For the paper we want to group tools to ease overview." |
| 250 | + "# Generate colors for each category\n", |
| 251 | + "colors = []\n", |
| 252 | + "for category, repos in summary_plot_repos_grouped.items():\n", |
| 253 | + " base_color = base_colors[category]\n", |
| 254 | + " category_colors = generate_colors(base_color, len(repos))\n", |
| 255 | + " colors.extend(category_colors)" |
170 | 256 | ] |
171 | 257 | }, |
172 | 258 | { |
|
175 | 261 | "metadata": {}, |
176 | 262 | "outputs": [], |
177 | 263 | "source": [ |
178 | | - "# Sort repos so we can group them category\n", |
179 | | - "summary_plot_repos_accum = [\n", |
180 | | - " 'NWB_Schema', 'HDMF_Common_Schema', 'NWB_Schema_Language', \n", |
181 | | - " 'PyNWB', \n", |
182 | | - " 'HDMF',\n", |
183 | | - " 'MatNWB',\n", |
184 | | - " 'HDMF_DocUtils', 'NWBWidgets', 'NWBInspector',\n", |
185 | | - " 'HDMF_Zarr', 'NeuroConv',\n", |
186 | | - " 'NDX_Catalog', 'NDX_Template', 'NDX_Staged_Extensions', 'NDX_Extension_Smithy']\n", |
187 | | - "colors = [(0.7, 0.0, 0.0, 1.0), (0.7, 0.0, 0.0, 0.6), (0.7, 0.0, 0.0, 0.4),\n", |
188 | | - " (0.0, 0.5, 0.6, 1.0),\n", |
189 | | - " (0.0, 0.75, 0.85, 1.0),\n", |
190 | | - " (0.4, 1.0, 1.0 ,1.0),\n", |
191 | | - " (0.8, 0.4, 0.0, 1.0), (0.8, 0.4, 0.0, 0.7), (0.8, 0.4, 0.0, 0.5),\n", |
192 | | - " (0.8, 0.8, 0.2, 1.0), (0.8, 0.8, 0.2, 0.5),\n", |
193 | | - " (0.0, 0.0, 0.7, 1.0), (0.0, 0.0, 0.7, 0.85), (0.0, 0.0, 0.7, 0.7), (0.0, 0.0, 0.7, 0.55)]\n", |
194 | | - "ax = summary_stats['sizes'][summary_plot_repos_accum].plot.area(\n", |
| 264 | + "ax = summary_stats['sizes'][summary_plot_repos].plot.area(\n", |
195 | 265 | " figsize=(18,10), \n", |
196 | 266 | " stacked=True, \n", |
197 | 267 | " linewidth=0,\n", |
|
204 | 274 | "plt.xlabel('Date', fontsize=24)\n", |
205 | 275 | "plt.grid(color='black', linestyle='--', linewidth=0.7, axis='both')\n", |
206 | 276 | "plt.tight_layout()\n", |
207 | | - "plt.legend(loc=2, prop={'size': 20,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n", |
| 277 | + "plt.legend(loc=2, prop={'size': 16,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n", |
208 | 278 | "if save_figs:\n", |
209 | 279 | " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.pdf'))\n", |
| 280 | + " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_all_grouped.png'), dpi=300)\n", |
210 | 281 | "plt.title('NWB Code Repository Sizes', fontsize=20)\n", |
211 | 282 | "plt.show()" |
212 | 283 | ] |
213 | 284 | }, |
| 285 | + { |
| 286 | + "cell_type": "markdown", |
| 287 | + "metadata": {}, |
| 288 | + "source": [ |
| 289 | + "### 3.3 Combine repos into broad categories" |
| 290 | + ] |
| 291 | + }, |
214 | 292 | { |
215 | 293 | "cell_type": "code", |
216 | 294 | "execution_count": null, |
217 | 295 | "metadata": {}, |
218 | 296 | "outputs": [], |
219 | 297 | "source": [ |
220 | | - "repo_sizes_grouped_df = pd.DataFrame.from_dict(\n", |
221 | | - " {'Format Schema': (summary_stats['sizes']['NWB_Schema'] + \n", |
222 | | - " summary_stats['sizes']['HDMF_Common_Schema'] + \n", |
223 | | - " summary_stats['sizes']['NWB_Schema_Language']),\n", |
224 | | - " 'HDMF': summary_stats['sizes']['HDMF'],\n", |
225 | | - " 'PyNWB': summary_stats['sizes']['PyNWB'],\n", |
226 | | - " 'MatNWB': summary_stats['sizes']['MatNWB'],\n", |
227 | | - " 'NWB Tools': (summary_stats['sizes']['HDMF_DocUtils'] + \n", |
228 | | - " summary_stats['sizes']['NWBWidgets'] + \n", |
229 | | - " summary_stats['sizes']['NWBInspector'] + \n", |
230 | | - " summary_stats['sizes']['HDMF_Zarr'] + \n", |
231 | | - " summary_stats['sizes']['NeuroConv']),\n", |
232 | | - " 'NDX Catalog': (summary_stats['sizes']['NDX_Catalog'] +\n", |
233 | | - " summary_stats['sizes']['NDX_Template'] +\n", |
234 | | - " summary_stats['sizes']['NDX_Staged_Extensions'] +\n", |
235 | | - " summary_stats['sizes']['NDX_Extension_Smithy']),\n", |
236 | | - " }\n", |
237 | | - ")\n", |
238 | | - "colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n", |
239 | | - "colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n", |
| 298 | + "# Create DataFrame with the total lines of code for each category (instead of for each repo)\n", |
| 299 | + "repo_sizes_grouped = OrderedDict()\n", |
| 300 | + "for category, repos in summary_plot_repos_grouped.items():\n", |
| 301 | + " category_size = None\n", |
| 302 | + " for repo in repos:\n", |
| 303 | + " if category_size is None:\n", |
| 304 | + " category_size = summary_stats['sizes'][repo]\n", |
| 305 | + " else:\n", |
| 306 | + " category_size += summary_stats['sizes'][repo]\n", |
| 307 | + " repo_sizes_grouped [category] = category_size\n", |
| 308 | + "repo_sizes_grouped_df = pd.DataFrame.from_dict(repo_sizes_grouped)" |
| 309 | + ] |
| 310 | + }, |
| 311 | + { |
| 312 | + "cell_type": "code", |
| 313 | + "execution_count": null, |
| 314 | + "metadata": {}, |
| 315 | + "outputs": [], |
| 316 | + "source": [ |
| 317 | + "# colors = [(78, 92, 150), (81, 133, 189), (155, 187, 89), (115, 147, 49), (191, 80, 77), (207, 130, 58)]\n", |
| 318 | + "# colors = [ (c[0]/255.0, c[1]/255.0, c[2]/255.0, 1.0) for c in colors]\n", |
| 319 | + "colors = [(c[0], c[1], c[2], 0.8) for c in base_colors.values()]\n", |
| 320 | + "\n", |
240 | 321 | "ax = repo_sizes_grouped_df.plot.area(\n", |
241 | 322 | " figsize=(18,10), \n", |
242 | 323 | " stacked=True, \n", |
|
253 | 334 | "plt.legend(loc=2, prop={'size': 24,}, facecolor=(1.0, 1.0, 1.0, 1.0), framealpha=1.0)\n", |
254 | 335 | "if save_figs:\n", |
255 | 336 | " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.pdf'))\n", |
| 337 | + " plt.savefig(os.path.join(plot_dir, 'nwb_repo_sizes_grouped.png'), dpi=300)\n", |
256 | 338 | " \n", |
257 | 339 | "plt.title('NWB Code Repository Sizes', fontsize=20)\n", |
258 | 340 | "plt.show()" |
|
262 | 344 | "cell_type": "markdown", |
263 | 345 | "metadata": {}, |
264 | 346 | "source": [ |
265 | | - "## 4. Plot per-repo total lines of code statistics broken down by: code, blank, comment" |
| 347 | + "## 4. Plot per-repo stats\n", |
| 348 | + "### 4.1 Plot total lines of code statistics broken down by: code, blank, comment" |
266 | 349 | ] |
267 | 350 | }, |
268 | 351 | { |
|
295 | 378 | "cell_type": "markdown", |
296 | 379 | "metadata": {}, |
297 | 380 | "source": [ |
298 | | - "## 4. Per-repo total lines of code statistics broken down by language type" |
| 381 | + "## 4.2 Per-repo total lines of code statistics broken down by language type" |
299 | 382 | ] |
300 | 383 | }, |
301 | 384 | { |
|
365 | 448 | "name": "python", |
366 | 449 | "nbconvert_exporter": "python", |
367 | 450 | "pygments_lexer": "ipython3", |
368 | | - "version": "3.9.7" |
| 451 | + "version": "3.13.5" |
369 | 452 | } |
370 | 453 | }, |
371 | 454 | "nbformat": 4, |
|
0 commit comments