From d6d6843475c0b04f431a597c434f7e630a587f74 Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 13:50:32 +0200
Subject: [PATCH 01/10] Properly show all selected attributes in the
 interactive graph and add option whether to show labels

---
 vnc_networks/utils/nx_design.py | 53 ++++++++++++++++++++++++---------
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/vnc_networks/utils/nx_design.py b/vnc_networks/utils/nx_design.py
index c26b3c7..3f4fce2 100644
--- a/vnc_networks/utils/nx_design.py
+++ b/vnc_networks/utils/nx_design.py
@@ -158,6 +158,7 @@ def display_interactive_graph(
     output_file: str = "visualisation.html",
     window_height: int = 1000,
     additional_attributes: Optional[list[NeuronAttribute]] = None,
+    show_labels: bool = True,
 ):
     """
     Display the graph in interactive browser window using pyvis. This saves an HTML file and opens it in the browser.
@@ -203,25 +204,49 @@ def display_interactive_graph(
         attributes_displayed += additional_attributes
     # Ensure all the attributes are defined in the connections object
     all_nodes = connections.get_nodes(type="uid")
-    for attribute in attributes_displayed:
-        _ = connections.get_node_attribute(all_nodes, attribute)
 
-    def node_data_to_string(graph, node):
-        data_dict = graph.nodes[node]
-        return f'body_id: {data_dict["body_id"]}\n' + "\n".join(
+    def node_data_to_string(node_data):
+        return "\n".join(
             [
-                f"{k}: {v}"
-                for k, v in data_dict.items()
+                f"{attribute_name}: {attribute_value}"
+                for attribute_name, attribute_value in node_data.items()
             ]
         )
-        
-    # This is the text that is shown when hovering over a node (more detailed)
-    node_data = {
-        node: node_data_to_string(g,node)
-        for node in g.nodes
+
+    node_attributes_lists = [
+        connections.get_node_attribute(all_nodes, attribute)
+        for attribute in attributes_displayed
+    ]
+    # {node: {attribute: value}}
+    node_attributes = {
+        node_and_attributes[0]: dict(zip(attributes_displayed, node_and_attributes[1:]))
+        for node_and_attributes in zip(g.nodes, *node_attributes_lists)
     }
-    
-    nx.set_node_attributes(g, node_data, "title")
+
+    nx.set_node_attributes(g, node_attributes)
+    # This is the text that is shown when hovering over a node (more detailed)
+    nx.set_node_attributes(
+        g,
+        {
+            node: node_data_to_string(node_data)
+            for node, node_data in node_attributes.items()
+        },
+        "title",
+    )
+    if show_labels:
+        nx.set_node_attributes(
+            g,
+            {
+                node: str(node_data["name"])
+                + (
+                    f' [{node_data["side"][0].upper()}]'
+                    if isinstance(node_data["side"], str) and len(node_data["side"]) > 0
+                    else ""
+                )
+                for node, node_data in node_attributes.items()
+            },
+            "label",
+        )
 
     # set node colour based on neuron class
     nx.set_node_attributes(

From e5242ae68afaec14df8329c4869e1d6a463f3b2c Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 13:59:27 +0200
Subject: [PATCH 02/10] Fix `ConnectomeReader.get_neurons_from_class` bug It
 should call `get_neuron_bodyids` with the `NeuronAttribute` "class_1" which
 gets converted to a connectome-specific attribute later

---
 vnc_networks/connectome_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py
index 8b5fd19..7b6c611 100644
--- a/vnc_networks/connectome_reader.py
+++ b/vnc_networks/connectome_reader.py
@@ -367,7 +367,7 @@ def get_neurons_from_class(self, class_: NeuronClass) -> list[BodyId]:
         """
         # verify if the class is indeed a neuron class for this dataset
         specific_class = self.specific_neuron_class(class_)
-        return self.get_neuron_bodyids({self.class_1: specific_class})
+        return self.get_neuron_bodyids({"class_1": specific_class})
 
     def specific_selection_dict(self, selection_dict: SelectionDict):
         """

From 8d720e0b3012c8f0db2d9b06f1b28873522784e1 Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 14:19:00 +0200
Subject: [PATCH 03/10] Fix bug in `FAFBReader.get_neuron_bodyids` If `nodes`
 was supplied, they would be returned even if they weren't valid BodyIds

---
 vnc_networks/connectome_reader.py | 50 +++++++++++++++++++------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py
index 7b6c611..84219bd 100644
--- a/vnc_networks/connectome_reader.py
+++ b/vnc_networks/connectome_reader.py
@@ -168,12 +168,16 @@ def get_neuron_bodyids(
         nodes: Optional[list[BodyId] | list[int]] = None,
     ) -> list[BodyId]:
         """
-        Get the Ids of the neurons in the dataset.
-        Select (keep) according to the selection_dict.
-        Different criteria are treated as 'and' conditions.
+        Get the BodyIds of the neurons in the dataset that fulfil the conditions in the selection_dict.
 
-        For the specific case of "class_1" that refers to the NeuronClass,
-        we need to verify both the generic and the specific names.
+        For the specific case of "class_1" that refers to the NeuronClass, we need to verify both the generic and the specific names.
+
+        Args:
+            selection_dict (SelectionDict, optional): Criteria that the returned neurons need to fulfil. Different criteria are treated as 'and' conditions. Defaults to {}.
+            nodes (Optional[list[BodyId]  |  list[int]], optional): If not None, only return BodyIds which are contained in this list. Defaults to None.
+
+        Returns:
+            list[BodyId]: list of the BodyIds of neurons that fulfilled all supplied conditions.
         """
         ...
 
@@ -693,12 +697,16 @@ def get_neuron_bodyids(
         nodes: Optional[list[BodyId] | list[int]] = None,
     ) -> list[BodyId]:
         """
-        Get the Ids of the neurons in the dataset.
-        Select (keep) according to the selection_dict.
-        Different criteria are treated as 'and' conditions.
+        Get the BodyIds of the neurons in the dataset that fulfil the conditions in the selection_dict.
+
+        For the specific case of "class_1" that refers to the NeuronClass, we need to verify both the generic and the specific names.
 
-        For the specific case of "class_1" that refers to the NeuronClass,
-        we need to verify both the generic and the specific names.
+        Args:
+            selection_dict (SelectionDict, optional): Criteria that the returned neurons need to fulfil. Different criteria are treated as 'and' conditions. Defaults to {}.
+            nodes (Optional[list[BodyId]  |  list[int]], optional): If not None, only return BodyIds which are contained in this list. Defaults to None.
+
+        Returns:
+            list[BodyId]: list of the BodyIds of neurons that fulfilled all supplied conditions.
         """
         s_dict = self.specific_selection_dict(selection_dict)
 
@@ -1683,20 +1691,24 @@ def get_neuron_bodyids(
         nodes: Optional[list[BodyId] | list[int]] = None,
     ) -> list[BodyId]:
         """
-        Get the Ids of the neurons in the dataset.
-        Select (keep) according to the selection_dict.
-        Different criteria are treated as 'and' conditions.
+        Get the BodyIds of the neurons in the dataset that fulfil the conditions in the selection_dict.
+
+        For the specific case of "class_1" that refers to the NeuronClass, we need to verify both the generic and the specific names.
 
-        For the specific case of "class_1" that refers to the NeuronClass,
-        we need to verify both the generic and the specific names.
+        Args:
+            selection_dict (SelectionDict, optional): Criteria that the returned neurons need to fulfil. Different criteria are treated as 'and' conditions. Defaults to {}.
+            nodes (Optional[list[BodyId]  |  list[int]], optional): If not None, only return BodyIds which are contained in this list. Defaults to None.
+
+        Returns:
+            list[BodyId]: list of the BodyIds of neurons that fulfilled all supplied conditions.
         """
+        # get all neurons in the dataset that are also in the nodes list
+        valid_nodes = set(self.list_all_nodes())
+        if nodes is not None:
+            valid_nodes = valid_nodes.intersection(nodes)
 
         # Treat each attribute in the selection dict independently:
         # get the nodes that satisfy each condition, and return the intersection of all
-        if nodes is not None:
-            valid_nodes = set(nodes)
-        else:
-            valid_nodes = set(self.list_all_nodes())
         for key, value in selection_dict.items():
             specific_valid_nodes = self._filter_neurons(
                 attribute=key,

From ea871f5c7c3a50137f5bf9feba18421f1cdc5f61 Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 14:19:26 +0200
Subject: [PATCH 04/10] Fix some tiny type hinting problems in
 connectome_reader

---
 vnc_networks/connectome_reader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py
index 84219bd..ef92f43 100644
--- a/vnc_networks/connectome_reader.py
+++ b/vnc_networks/connectome_reader.py
@@ -507,7 +507,7 @@ def _load_connections(self) -> pd.DataFrame:
         Needs to gather the columns ['start_bid', 'end_bid', 'syn_count', 'nt_type'].
         """
         # Loading data in the connections file
-        columns = ["start_bid", "end_bid", "syn_count"]
+        columns: list[NeuronAttribute] = ["start_bid", "end_bid", "syn_count"]
         columns_to_read = [self.sna(a) for a in columns]
         connections = pd.read_feather(self._connections_file, columns=columns_to_read)
         read_columns = (
@@ -726,7 +726,7 @@ def get_neuron_bodyids(
                         key
                     ]  # can be 'sensory' or 'sensory neuron'
                     try:  # will work if a generic NeuronClass is given
-                        specific_value = self.specific_neuron_class(requested_value)
+                        specific_value = self.specific_neuron_class(requested_value)  # type: ignore requested_value might be a generic NeuronClass, or if not a specific class already
                     except KeyError:  # will work if a specific NeuronClass is given
                         specific_value = requested_value
                     neurons = neurons[neurons[self._class_1] == specific_value]

From e291cc46bcb91dfe01b5657853ebff2fe4af2762 Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 15:10:42 +0200
Subject: [PATCH 05/10] Fix bug with `Connections.get_neuron_ids` not working
 when providing `None` as selection_dict

---
 tests/test_local/test_data_loading.py | 30 +++++++++++++++++++++++++++
 vnc_networks/connections.py           |  5 +++--
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/tests/test_local/test_data_loading.py b/tests/test_local/test_data_loading.py
index 559dd50..c9182d7 100644
--- a/tests/test_local/test_data_loading.py
+++ b/tests/test_local/test_data_loading.py
@@ -3,6 +3,9 @@
 """
 
 
+from vnc_networks.params import SelectionDict
+
+
 class TestDataLoading:
     """
     Test the data loading functions.
@@ -58,3 +61,30 @@ def test_connections_instantiation_MANCv1_2(self):
 
         df_2 = df[(df["start_bid"] == 10725) & (df["end_bid"] == 10439)]
         assert df_2["eff_weight"].values[0] == -1080, "Incorrect nt_type handling"
+
+    def test_connections_getting_neuron_ids_MANCv1_2(self):
+        """
+        Test that we get the same results if we get uids or bodyids and convert between the two.
+        """
+        import vnc_networks
+        from vnc_networks.connections import Connections
+
+        # Instantiate a Connections object
+        connections = Connections(vnc_networks.connectome_reader.MANC("v1.2"))
+
+        # test a few different selection_dicts
+        selection_dicts: list[SelectionDict | None] = [
+            None,
+            {},
+            {"class_1": "descending"},
+            {"class_1": "ascending", "nt_type": "GABA"},
+        ]
+        for selection_dict in selection_dicts:
+            body_ids = connections.get_neuron_bodyids(selection_dict)
+            uids = connections.get_neuron_ids(selection_dict)
+            assert (
+                set(connections.get_uids_from_bodyids(body_ids)) == set(uids)
+            ), f"Getting bodyids and converting to uids doesn't match with selection_dict {selection_dict}."
+            assert (
+                set(connections.get_bodyids_from_uids(uids)) == set(body_ids)
+            ), f"Getting uids and converting to bodyids doesn't match with selection_dict {selection_dict}."
diff --git a/vnc_networks/connections.py b/vnc_networks/connections.py
index f2ec465..53a1d60 100644
--- a/vnc_networks/connections.py
+++ b/vnc_networks/connections.py
@@ -977,8 +977,9 @@ def get_neuron_ids(
         Get the neuron IDs from the nodes dataframe as loaded in the initial
         dataset, based on a selection dictionary.
         """
-        nodes = self.get_nodes(type="body_id")
-        body_ids = self.CR.get_neuron_bodyids(selection_dict, nodes)
+        body_ids = self.get_nodes(type="body_id")
+        if selection_dict is not None:
+            body_ids = self.CR.get_neuron_bodyids(selection_dict, body_ids)
         return self.__get_uids_from_bodyids(body_ids)
 
     def get_neurons_pre(self):

From 60d1db4574b16515896ab33fe224160da160f32a Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 15:20:34 +0200
Subject: [PATCH 06/10] Define the `nt_weights` dict in the base
 `ConnectomeReader` class to fix type hinting error

---
 vnc_networks/connectome_reader.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py
index ef92f43..d72155d 100644
--- a/vnc_networks/connectome_reader.py
+++ b/vnc_networks/connectome_reader.py
@@ -10,7 +10,8 @@
 import os
 import typing
 from abc import ABC, abstractmethod
-from typing import Optional
+from collections.abc import Mapping
+from typing import Any, Optional
 
 import numpy as np
 import pandas as pd
@@ -54,6 +55,9 @@ class ConnectomeReader(ABC):
     _ascending = "ascending"
     _descending = "descending"
 
+    # connectomes need to implement weight assignment for their neurotransmitters
+    nt_weights: Mapping[Any, int]
+
     def __init__(
         self,
         connectome_name: str,

From 669f9d007bd92667950318cb1b29ec3607718c7f Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 15:42:42 +0200
Subject: [PATCH 07/10] Make `specific_neuron_class` throw a `KeyError` instead
 of a `ValueError` because that's what `get_neuron_bodyids` catches

---
 vnc_networks/connectome_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py
index d72155d..8a3b25e 100644
--- a/vnc_networks/connectome_reader.py
+++ b/vnc_networks/connectome_reader.py
@@ -652,7 +652,7 @@ def specific_neuron_class(self, generic_n_c: NeuronClass):
                 if converted_type is None:
                     raise KeyError
             except KeyError:
-                raise ValueError(
+                raise KeyError(
                     f"ConnectomeReader::specific_neuron_class().\
                     The class {generic_n_c} is not defined in {self.connectome_name}."
                 )

From ab01e746d785f46bf2ee8cd701ffe01e9d92faf4 Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Fri, 11 Apr 2025 16:09:38 +0200
Subject: [PATCH 08/10] Add `root_side` parameter to MANC connectome reader
 Fixes #52

---
 vnc_networks/connectome_reader.py | 6 ++++++
 vnc_networks/params.py            | 1 +
 2 files changed, 7 insertions(+)

diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py
index 8a3b25e..11d23bc 100644
--- a/vnc_networks/connectome_reader.py
+++ b/vnc_networks/connectome_reader.py
@@ -455,6 +455,7 @@ class MANCReader(ConnectomeReader):
     _nb_post_synapses: str
     _nb_pre_neurons: str
     _nb_post_neurons: str
+    _root_side: str
 
     def __init__(
         self,
@@ -548,6 +549,7 @@ def list_possible_attributes(self) -> list[str]:
             "name",
             "type",
             "side",
+            "root_side",
             "neuropil",
             "hemilineage",
             "size",
@@ -584,6 +586,7 @@ def sna(  # specific_neuron_attribute, abbreviated due to frequent use
                 "nb_pre_neurons": self._nb_pre_neurons,
                 "nb_post_neurons": self._nb_post_neurons,
                 "location": self._location,  # synapse position
+                "root_side": self._root_side,
             }
             try:
                 converted_type = mapping.get(generic_n_a)
@@ -617,6 +620,7 @@ def decode_neuron_attribute(self, specific_attribute: str) -> NeuronAttribute:
                 self._nb_pre_neurons: "nb_pre_neurons",
                 self._nb_post_neurons: "nb_post_neurons",
                 self._location: "location",  # synapse position
+                self._root_side: "root_side",
             }
             try:
                 converted_attr = mapping.get(specific_attribute)
@@ -854,6 +858,7 @@ def _load_specific_namefields(self):
         self._nb_post_synapses = "post:int"
         self._nb_pre_neurons = "upstream:int"
         self._nb_post_neurons = "downstream:int"
+        self._root_side = "rootSide:string"
         # Synapse specific
         self._start_synset_id = ":START_ID(SynSet-ID)"
         self._end_synset_id = ":END_ID(SynSet-ID)"
@@ -1182,6 +1187,7 @@ def _load_specific_namefields(self):
         self._nb_post_synapses = "post"
         self._nb_pre_neurons = "upstream"
         self._nb_post_neurons = "downstream"
+        self._root_side = "rootSide"
 
         # Synapse specific
         self._syn_id = "synapse_id"
diff --git a/vnc_networks/params.py b/vnc_networks/params.py
index e474acd..aab262c 100644
--- a/vnc_networks/params.py
+++ b/vnc_networks/params.py
@@ -129,6 +129,7 @@
     "type",
     # morphology
     "side",  # common to all
+    "root_side",  # only MANC
     "neuropil",  # common to all
     "size",  # common to all
     "area",

From 25a53e75d0d8ac4364aae1d8c46edefc473b2bfb Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Thu, 24 Apr 2025 17:19:46 +0200
Subject: [PATCH 09/10] Add notebook investigating missing synapses in FAFB
 graph

---
 scripts/fafb_missing_some_synapses.ipynb | 595 +++++++++++++++++++++++
 1 file changed, 595 insertions(+)
 create mode 100644 scripts/fafb_missing_some_synapses.ipynb

diff --git a/scripts/fafb_missing_some_synapses.ipynb b/scripts/fafb_missing_some_synapses.ipynb
new file mode 100644
index 0000000..be3b825
--- /dev/null
+++ b/scripts/fafb_missing_some_synapses.ipynb
@@ -0,0 +1,595 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "946dc5c7",
+   "metadata": {},
+   "source": [
+    "# It looks like the FAFB graph doesn't have all the connections that the connections table does"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4e1fc82e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Attribute class_1 not found in the graph. Adding it.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import polars as pl\n",
+    "\n",
+    "import vnc_networks\n",
+    "\n",
+    "connections = vnc_networks.connections.Connections(\n",
+    "    CR=vnc_networks.connectome_reader.FAFB_v783()\n",
+    ")\n",
+    "connections_table = pl.from_pandas(connections.connections)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "15eb1f75",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "connections table has 2709829 connections\n",
+      "connections graph has 2484163 connections\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"connections table has {len(connections_table)} connections\")\n",
+    "print(f\"connections graph has {len(connections.graph.edges)} connections\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a484a36f",
+   "metadata": {},
+   "source": [
+    "It turns out that the FAFB connections table sometimes has multiple entries between the same pair of neurons.\n",
+    "\n",
+    "If we check the documentation [here](https://codex.flywire.ai/api/download) it says \"More than one row can be present for the same pair of cells if they synapse in multiple neuropils (regions).\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "f2166d15",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (169_972, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>start_bid</th><th>end_bid</th><th>number_of_connections</th></tr><tr><td>i64</td><td>i64</td><td>u32</td></tr></thead><tbody><tr><td>720575940624402173</td><td>720575940622160705</td><td>12</td></tr><tr><td>720575940620540507</td><td>720575940618932763</td><td>11</td></tr><tr><td>720575940612718563</td><td>720575940623122125</td><td>11</td></tr><tr><td>720575940639242303</td><td>720575940622160705</td><td>11</td></tr><tr><td>720575940622915060</td><td>720575940616551029</td><td>10</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>720575940613455986</td><td>720575940637085503</td><td>2</td></tr><tr><td>720575940620795784</td><td>720575940607689394</td><td>2</td></tr><tr><td>720575940609658377</td><td>720575940630758418</td><td>2</td></tr><tr><td>720575940630403269</td><td>720575940636250292</td><td>2</td></tr><tr><td>720575940633593548</td><td>720575940633923479</td><td>2</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (169_972, 3)\n",
+       "┌────────────────────┬────────────────────┬───────────────────────┐\n",
+       "│ start_bid          ┆ end_bid            ┆ number_of_connections │\n",
+       "│ ---                ┆ ---                ┆ ---                   │\n",
+       "│ i64                ┆ i64                ┆ u32                   │\n",
+       "╞════════════════════╪════════════════════╪═══════════════════════╡\n",
+       "│ 720575940624402173 ┆ 720575940622160705 ┆ 12                    │\n",
+       "│ 720575940620540507 ┆ 720575940618932763 ┆ 11                    │\n",
+       "│ 720575940612718563 ┆ 720575940623122125 ┆ 11                    │\n",
+       "│ 720575940639242303 ┆ 720575940622160705 ┆ 11                    │\n",
+       "│ 720575940622915060 ┆ 720575940616551029 ┆ 10                    │\n",
+       "│ …                  ┆ …                  ┆ …                     │\n",
+       "│ 720575940613455986 ┆ 720575940637085503 ┆ 2                     │\n",
+       "│ 720575940620795784 ┆ 720575940607689394 ┆ 2                     │\n",
+       "│ 720575940609658377 ┆ 720575940630758418 ┆ 2                     │\n",
+       "│ 720575940630403269 ┆ 720575940636250292 ┆ 2                     │\n",
+       "│ 720575940633593548 ┆ 720575940633923479 ┆ 2                     │\n",
+       "└────────────────────┴────────────────────┴───────────────────────┘"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "connections_table.group_by([\"start_bid\", \"end_bid\"]).agg(\n",
+    "    pl.col(\"syn_count\").len().alias(\"number_of_connections\")\n",
+    ").filter(pl.col(\"number_of_connections\") > 1).sort(\n",
+    "    \"number_of_connections\", descending=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "57b25dcd",
+   "metadata": {},
+   "source": [
+    "Looking at all the connections between the first pair of neurons:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "e574e2d7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (12, 11)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>start_bid</th><th>end_bid</th><th>syn_count</th><th>nt_type</th><th>eff_weight</th><th>subdivision_start</th><th>subdivision_end</th><th>syn_count_norm</th><th>eff_weight_norm</th><th>start_uid</th><th>end_uid</th></tr><tr><td>i64</td><td>i64</td><td>i64</td><td>str</td><td>i64</td><td>i64</td><td>i64</td><td>f64</td><td>f64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>720575940624402173</td><td>720575940622160705</td><td>45</td><td>&quot;ACH&quot;</td><td>45</td><td>0</td><td>0</td><td>0.034695</td><td>0.034695</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>12</td><td>&quot;ACH&quot;</td><td>12</td><td>0</td><td>0</td><td>0.009252</td><td>0.009252</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>11</td><td>&quot;ACH&quot;</td><td>11</td><td>0</td><td>0</td><td>0.008481</td><td>0.008481</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>6</td><td>&quot;ACH&quot;</td><td>6</td><td>0</td><td>0</td><td>0.004626</td><td>0.004626</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>13</td><td>&quot;ACH&quot;</td><td>13</td><td>0</td><td>0</td><td>0.010023</td><td>0.010023</td><td>131982</td><td>72220</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>6</td><td>&quot;ACH&quot;</td><td>6</td><td>0</td><td>0</td><td>0.004626</td><td>0.004626</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>25</td><td>&quot;ACH&quot;</td><td>25</td><td>0</td><td>0</td><td>0.019275</td><td>0.019275</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>35</td><td>&quot;ACH&quot;</td><td>35</td><td>0</td><td>0</td><td>0.026985</td><td>0.026985</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>61</td><td>&quot;ACH&quot;</td><td>61</td><td>0</td><td>0</td><td>0.047032</td><td>0.047032</td><td>131982</td><td>72220</td></tr><tr><td>720575940624402173</td><td>720575940622160705</td><td>33</td><td>&quot;ACH&quot;</td><td>33</td><td>0</td><td>0</td><td>0.025443</td><td>0.025443</td><td>131982</td><td>72220</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (12, 11)\n",
+       "┌────────────┬────────────┬───────────┬─────────┬───┬────────────┬───────────┬───────────┬─────────┐\n",
+       "│ start_bid  ┆ end_bid    ┆ syn_count ┆ nt_type ┆ … ┆ syn_count_ ┆ eff_weigh ┆ start_uid ┆ end_uid │\n",
+       "│ ---        ┆ ---        ┆ ---       ┆ ---     ┆   ┆ norm       ┆ t_norm    ┆ ---       ┆ ---     │\n",
+       "│ i64        ┆ i64        ┆ i64       ┆ str     ┆   ┆ ---        ┆ ---       ┆ i64       ┆ i64     │\n",
+       "│            ┆            ┆           ┆         ┆   ┆ f64        ┆ f64       ┆           ┆         │\n",
+       "╞════════════╪════════════╪═══════════╪═════════╪═══╪════════════╪═══════════╪═══════════╪═════════╡\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 45        ┆ ACH     ┆ … ┆ 0.034695   ┆ 0.034695  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 12        ┆ ACH     ┆ … ┆ 0.009252   ┆ 0.009252  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 11        ┆ ACH     ┆ … ┆ 0.008481   ┆ 0.008481  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 6         ┆ ACH     ┆ … ┆ 0.004626   ┆ 0.004626  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 13        ┆ ACH     ┆ … ┆ 0.010023   ┆ 0.010023  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ …          ┆ …          ┆ …         ┆ …       ┆ … ┆ …          ┆ …         ┆ …         ┆ …       │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 6         ┆ ACH     ┆ … ┆ 0.004626   ┆ 0.004626  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 25        ┆ ACH     ┆ … ┆ 0.019275   ┆ 0.019275  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 35        ┆ ACH     ┆ … ┆ 0.026985   ┆ 0.026985  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 61        ┆ ACH     ┆ … ┆ 0.047032   ┆ 0.047032  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "│ 7205759406 ┆ 7205759406 ┆ 33        ┆ ACH     ┆ … ┆ 0.025443   ┆ 0.025443  ┆ 131982    ┆ 72220   │\n",
+       "│ 24402173   ┆ 22160705   ┆           ┆         ┆   ┆            ┆           ┆           ┆         │\n",
+       "└────────────┴────────────┴───────────┴─────────┴───┴────────────┴───────────┴───────────┴─────────┘"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "connections_table.filter(\n",
+    "    (pl.col(\"start_bid\") == 720575940624402173)\n",
+    "    & (pl.col(\"end_bid\") == 720575940622160705)\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "fd5bb318",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'syn_count': 33,\n",
+       " 'nt_type': 'ACH',\n",
+       " 'eff_weight': 33,\n",
+       " 'syn_count_norm': 0.025443330763299923,\n",
+       " 'eff_weight_norm': 0.025443330763299923,\n",
+       " 'weight': 33}"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "connections.graph.edges[131982, 72220]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e5de8e7c",
+   "metadata": {},
+   "source": [
+    "The graph just remembered that last connection in the table - the rest were dropped by networkx"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d7fa48e6",
+   "metadata": {},
+   "source": [
+    "## Quick check for MANC"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d91b9b1f",
+   "metadata": {},
+   "source": [
+    "MANC v1.2 must have already aggregated synapses over pairs of neurons (actually I think I did this when downloading the data from neuprint), so here there's no problem.\n",
+    "\n",
+    "This is because the neurotransmitter predictions are only applied at the level of neurons, so we first aggregate and then label the synapse type according to the neuron's neurotransmitter type."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "676a805a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Attribute class_1 not found in the graph. Adding it.\n",
+      "connections table has 1372588 connections with 24151003 synapses\n",
+      "connections graph has 1372588 connections with 24151003 synapses\n"
+     ]
+    }
+   ],
+   "source": [
+    "def check_connections_table_and_graph_count(\n",
+    "    connectome_reader: vnc_networks.connectome_reader.ConnectomeReader,\n",
+    "):\n",
+    "    c = vnc_networks.connections.Connections(connectome_reader)\n",
+    "    print(\n",
+    "        f\"connections table has {len(c.connections)} connections with {c.connections[\"syn_count\"].sum()} synapses\"\n",
+    "    )\n",
+    "    print(\n",
+    "        f\"connections graph has {len(c.graph.edges)} connections with {sum(\n",
+    "        c.graph.edges[e][\"syn_count\"] for e in c.graph.edges\n",
+    "    )} synapses\"\n",
+    "    )\n",
+    "\n",
+    "check_connections_table_and_graph_count(vnc_networks.connectome_reader.MANC_v_1_2())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b87f95d5",
+   "metadata": {},
+   "source": [
+    "MANC v1.0 is all good too"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "819775ef",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Attribute class_1 not found in the graph. Adding it.\n",
+      "connections table has 1548657 connections with 27387970 synapses\n",
+      "connections graph has 1548657 connections with 27387970 synapses\n"
+     ]
+    }
+   ],
+   "source": [
+    "check_connections_table_and_graph_count(vnc_networks.connectome_reader.MANC_v_1_0())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5823402",
+   "metadata": {},
+   "source": [
+    "## How many synapses are affected?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "d064083e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "We lost 3549543 out of 31574890 synapses, which is 11.2%\n"
+     ]
+    }
+   ],
+   "source": [
+    "connections_table_synapses = connections_table[\"syn_count\"].sum()\n",
+    "graph_synapses = sum(\n",
+    "    connections.graph.edges[e][\"syn_count\"] for e in connections.graph.edges\n",
+    ")\n",
+    "\n",
+    "print(\n",
+    "    f\"We lost {(diff := connections_table_synapses - graph_synapses)} out of {connections_table_synapses} synapses, which is {diff/connections_table_synapses*100:.1f}%\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "20f974c7",
+   "metadata": {},
+   "source": [
+    "Weirdly, there are 12285 neuron pairs that have multiple different neurotransmitters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "9647a987",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (12_285, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>start_bid</th><th>end_bid</th><th>different_neurotransmitters</th><th>num_different_neurotransmitters</th><th>num_synapses</th></tr><tr><td>i64</td><td>i64</td><td>list[struct[2]]</td><td>u32</td><td>i64</td></tr></thead><tbody><tr><td>720575940623317321</td><td>720575940639278399</td><td>[{&quot;GLUT&quot;,2}, {&quot;GABA&quot;,3}, … {&quot;ACH&quot;,1}]</td><td>4</td><td>156</td></tr><tr><td>720575940641501648</td><td>720575940621268651</td><td>[{&quot;SER&quot;,1}, {&quot;GABA&quot;,1}, … {&quot;ACH&quot;,1}]</td><td>4</td><td>57</td></tr><tr><td>720575940631763909</td><td>720575940613096089</td><td>[{&quot;GABA&quot;,1}, {&quot;GLUT&quot;,1}, … {&quot;ACH&quot;,1}]</td><td>4</td><td>49</td></tr><tr><td>720575940641501648</td><td>720575940629399370</td><td>[{&quot;GABA&quot;,2}, {&quot;ACH&quot;,1}, {&quot;GLUT&quot;,1}]</td><td>3</td><td>76</td></tr><tr><td>720575940622263066</td><td>720575940607384242</td><td>[{&quot;GLUT&quot;,1}, {&quot;ACH&quot;,1}, {&quot;GABA&quot;,1}]</td><td>3</td><td>25</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>720575940610384082</td><td>720575940623682195</td><td>[{&quot;ACH&quot;,4}, {&quot;SER&quot;,1}]</td><td>2</td><td>39</td></tr><tr><td>720575940627209990</td><td>720575940624202424</td><td>[{&quot;GABA&quot;,1}, {&quot;GLUT&quot;,1}]</td><td>2</td><td>10</td></tr><tr><td>720575940617505629</td><td>720575940618392912</td><td>[{&quot;GLUT&quot;,1}, {&quot;GABA&quot;,2}]</td><td>2</td><td>48</td></tr><tr><td>720575940621801866</td><td>720575940609819512</td><td>[{&quot;GLUT&quot;,2}, {&quot;GABA&quot;,1}]</td><td>2</td><td>27</td></tr><tr><td>720575940632032460</td><td>720575940624705514</td><td>[{&quot;GLUT&quot;,1}, {&quot;GABA&quot;,1}]</td><td>2</td><td>51</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (12_285, 5)\n",
+       "┌────────────────────┬────────────────────┬────────────────────┬────────────────────┬──────────────┐\n",
+       "│ start_bid          ┆ end_bid            ┆ different_neurotra ┆ num_different_neur ┆ num_synapses │\n",
+       "│ ---                ┆ ---                ┆ nsmitters          ┆ otransmitter…      ┆ ---          │\n",
+       "│ i64                ┆ i64                ┆ ---                ┆ ---                ┆ i64          │\n",
+       "│                    ┆                    ┆ list[struct[2]]    ┆ u32                ┆              │\n",
+       "╞════════════════════╪════════════════════╪════════════════════╪════════════════════╪══════════════╡\n",
+       "│ 720575940623317321 ┆ 720575940639278399 ┆ [{\"GLUT\",2},       ┆ 4                  ┆ 156          │\n",
+       "│                    ┆                    ┆ {\"GABA\",3}, … {\"A… ┆                    ┆              │\n",
+       "│ 720575940641501648 ┆ 720575940621268651 ┆ [{\"SER\",1},        ┆ 4                  ┆ 57           │\n",
+       "│                    ┆                    ┆ {\"GABA\",1}, …      ┆                    ┆              │\n",
+       "│                    ┆                    ┆ {\"AC…              ┆                    ┆              │\n",
+       "│ 720575940631763909 ┆ 720575940613096089 ┆ [{\"GABA\",1},       ┆ 4                  ┆ 49           │\n",
+       "│                    ┆                    ┆ {\"GLUT\",1}, … {\"A… ┆                    ┆              │\n",
+       "│ 720575940641501648 ┆ 720575940629399370 ┆ [{\"GABA\",2},       ┆ 3                  ┆ 76           │\n",
+       "│                    ┆                    ┆ {\"ACH\",1}, {\"GLUT… ┆                    ┆              │\n",
+       "│ 720575940622263066 ┆ 720575940607384242 ┆ [{\"GLUT\",1},       ┆ 3                  ┆ 25           │\n",
+       "│                    ┆                    ┆ {\"ACH\",1}, {\"GABA… ┆                    ┆              │\n",
+       "│ …                  ┆ …                  ┆ …                  ┆ …                  ┆ …            │\n",
+       "│ 720575940610384082 ┆ 720575940623682195 ┆ [{\"ACH\",4},        ┆ 2                  ┆ 39           │\n",
+       "│                    ┆                    ┆ {\"SER\",1}]         ┆                    ┆              │\n",
+       "│ 720575940627209990 ┆ 720575940624202424 ┆ [{\"GABA\",1},       ┆ 2                  ┆ 10           │\n",
+       "│                    ┆                    ┆ {\"GLUT\",1}]        ┆                    ┆              │\n",
+       "│ 720575940617505629 ┆ 720575940618392912 ┆ [{\"GLUT\",1},       ┆ 2                  ┆ 48           │\n",
+       "│                    ┆                    ┆ {\"GABA\",2}]        ┆                    ┆              │\n",
+       "│ 720575940621801866 ┆ 720575940609819512 ┆ [{\"GLUT\",2},       ┆ 2                  ┆ 27           │\n",
+       "│                    ┆                    ┆ {\"GABA\",1}]        ┆                    ┆              │\n",
+       "│ 720575940632032460 ┆ 720575940624705514 ┆ [{\"GLUT\",1},       ┆ 2                  ┆ 51           │\n",
+       "│                    ┆                    ┆ {\"GABA\",1}]        ┆                    ┆              │\n",
+       "└────────────────────┴────────────────────┴────────────────────┴────────────────────┴──────────────┘"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "connections_table.group_by([\"start_bid\", \"end_bid\"]).agg(\n",
+    "    pl.col(\"nt_type\").value_counts().alias(\"different_neurotransmitters\"),\n",
+    "    pl.col(\"nt_type\").n_unique().alias(\"num_different_neurotransmitters\"),\n",
+    "    pl.col(\"syn_count\").sum().alias(\"num_synapses\"),\n",
+    ").filter(pl.col(\"num_different_neurotransmitters\") > 1).sort(\n",
+    "    pl.col(\"num_different_neurotransmitters\"), descending=True\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0ff833e3",
+   "metadata": {},
+   "source": [
+    "If we count the total number of synapses per neurotransmitter type, things look kind of messy. We even have a lot of neurons that have multiple of acetylcholine, glutamate and GABA, which shouldn't really be possible? Maybe it's just mislabelling? Would need to check the [neurotransmitter paper](https://doi.org/10.1016/j.cell.2024.03.016)...\n",
+    "\n",
+    "I think it could be important to know that a neuron which primarily uses a fast acting neurotransmitter can also have neuromodulatory connections, and it would be nice to keep these."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "id": "1824ca1f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div><style>\n",
+       ".dataframe > thead > tr,\n",
+       ".dataframe > tbody > tr {\n",
+       "  text-align: right;\n",
+       "  white-space: pre-wrap;\n",
+       "}\n",
+       "</style>\n",
+       "<small>shape: (12_285, 9)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>start_bid</th><th>end_bid</th><th>num_connections_per_nt</th><th>ACH</th><th>GLUT</th><th>GABA</th><th>SER</th><th>DA</th><th>OCT</th></tr><tr><td>i64</td><td>i64</td><td>list[struct[2]]</td><td>i64</td><td>i64</td><td>i64</td><td>i64</td><td>i64</td><td>i64</td></tr></thead><tbody><tr><td>720575940631763909</td><td>720575940613096089</td><td>[{&quot;GABA&quot;,1}, {&quot;GLUT&quot;,1}, … {&quot;SER&quot;,1}]</td><td>33</td><td>5</td><td>6</td><td>5</td><td>null</td><td>null</td></tr><tr><td>720575940641501648</td><td>720575940621268651</td><td>[{&quot;ACH&quot;,1}, {&quot;SER&quot;,1}, … {&quot;GLUT&quot;,1}]</td><td>5</td><td>8</td><td>39</td><td>5</td><td>null</td><td>null</td></tr><tr><td>720575940623317321</td><td>720575940639278399</td><td>[{&quot;GLUT&quot;,1}, {&quot;GABA&quot;,1}, … {&quot;ACH&quot;,1}]</td><td>11</td><td>53</td><td>86</td><td>null</td><td>6</td><td>null</td></tr><tr><td>720575940630911991</td><td>720575940625310014</td><td>[{&quot;GLUT&quot;,1}, {&quot;GABA&quot;,1}, {&quot;ACH&quot;,1}]</td><td>7</td><td>11</td><td>7</td><td>null</td><td>null</td><td>null</td></tr><tr><td>720575940622263066</td><td>720575940620543025</td><td>[{&quot;GABA&quot;,1}, {&quot;GLUT&quot;,1}, {&quot;ACH&quot;,1}]</td><td>8</td><td>13</td><td>31</td><td>null</td><td>null</td><td>null</td></tr><tr><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td><td>&hellip;</td></tr><tr><td>720575940633356883</td><td>720575940636316023</td><td>[{&quot;GLUT&quot;,1}, {&quot;GABA&quot;,1}]</td><td>null</td><td>65</td><td>23</td><td>null</td><td>null</td><td>null</td></tr><tr><td>720575940639071977</td><td>720575940620818017</td><td>[{&quot;GABA&quot;,1}, {&quot;ACH&quot;,1}]</td><td>7</td><td>null</td><td>6</td><td>null</td><td>null</td><td>null</td></tr><tr><td>720575940630026745</td><td>720575940617190361</td><td>[{&quot;GLUT&quot;,1}, {&quot;GABA&quot;,1}]</td><td>null</td><td>26</td><td>16</td><td>null</td><td>null</td><td>null</td></tr><tr><td>720575940626995880</td><td>720575940629010356</td><td>[{&quot;ACH&quot;,1}, {&quot;GABA&quot;,1}]</td><td>23</td><td>null</td><td>25</td><td>null</td><td>null</td><td>null</td></tr><tr><td>720575940623433725</td><td>720575940604088288</td><td>[{&quot;ACH&quot;,1}, {&quot;SER&quot;,1}]</td><td>36</td><td>null</td><td>null</td><td>5</td><td>null</td><td>null</td></tr></tbody></table></div>"
+      ],
+      "text/plain": [
+       "shape: (12_285, 9)\n",
+       "┌───────────────────┬───────────────────┬───────────────────┬──────┬───┬──────┬──────┬──────┬──────┐\n",
+       "│ start_bid         ┆ end_bid           ┆ num_connections_p ┆ ACH  ┆ … ┆ GABA ┆ SER  ┆ DA   ┆ OCT  │\n",
+       "│ ---               ┆ ---               ┆ er_nt             ┆ ---  ┆   ┆ ---  ┆ ---  ┆ ---  ┆ ---  │\n",
+       "│ i64               ┆ i64               ┆ ---               ┆ i64  ┆   ┆ i64  ┆ i64  ┆ i64  ┆ i64  │\n",
+       "│                   ┆                   ┆ list[struct[2]]   ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "╞═══════════════════╪═══════════════════╪═══════════════════╪══════╪═══╪══════╪══════╪══════╪══════╡\n",
+       "│ 72057594063176390 ┆ 72057594061309608 ┆ [{\"GABA\",1},      ┆ 33   ┆ … ┆ 6    ┆ 5    ┆ null ┆ null │\n",
+       "│ 9                 ┆ 9                 ┆ {\"GLUT\",1}, …     ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│                   ┆                   ┆ {\"S…              ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594064150164 ┆ 72057594062126865 ┆ [{\"ACH\",1},       ┆ 5    ┆ … ┆ 39   ┆ 5    ┆ null ┆ null │\n",
+       "│ 8                 ┆ 1                 ┆ {\"SER\",1}, …      ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│                   ┆                   ┆ {\"GLU…            ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594062331732 ┆ 72057594063927839 ┆ [{\"GLUT\",1},      ┆ 11   ┆ … ┆ 86   ┆ null ┆ 6    ┆ null │\n",
+       "│ 1                 ┆ 9                 ┆ {\"GABA\",1}, …     ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│                   ┆                   ┆ {\"A…              ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594063091199 ┆ 72057594062531001 ┆ [{\"GLUT\",1},      ┆ 7    ┆ … ┆ 7    ┆ null ┆ null ┆ null │\n",
+       "│ 1                 ┆ 4                 ┆ {\"GABA\",1},       ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│                   ┆                   ┆ {\"ACH…            ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594062226306 ┆ 72057594062054302 ┆ [{\"GABA\",1},      ┆ 8    ┆ … ┆ 31   ┆ null ┆ null ┆ null │\n",
+       "│ 6                 ┆ 5                 ┆ {\"GLUT\",1},       ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│                   ┆                   ┆ {\"ACH…            ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ …                 ┆ …                 ┆ …                 ┆ …    ┆ … ┆ …    ┆ …    ┆ …    ┆ …    │\n",
+       "│ 72057594063335688 ┆ 72057594063631602 ┆ [{\"GLUT\",1},      ┆ null ┆ … ┆ 23   ┆ null ┆ null ┆ null │\n",
+       "│ 3                 ┆ 3                 ┆ {\"GABA\",1}]       ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594063907197 ┆ 72057594062081801 ┆ [{\"GABA\",1},      ┆ 7    ┆ … ┆ 6    ┆ null ┆ null ┆ null │\n",
+       "│ 7                 ┆ 7                 ┆ {\"ACH\",1}]        ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594063002674 ┆ 72057594061719036 ┆ [{\"GLUT\",1},      ┆ null ┆ … ┆ 16   ┆ null ┆ null ┆ null │\n",
+       "│ 5                 ┆ 1                 ┆ {\"GABA\",1}]       ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594062699588 ┆ 72057594062901035 ┆ [{\"ACH\",1},       ┆ 23   ┆ … ┆ 25   ┆ null ┆ null ┆ null │\n",
+       "│ 0                 ┆ 6                 ┆ {\"GABA\",1}]       ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "│ 72057594062343372 ┆ 72057594060408828 ┆ [{\"ACH\",1},       ┆ 36   ┆ … ┆ null ┆ 5    ┆ null ┆ null │\n",
+       "│ 5                 ┆ 8                 ┆ {\"SER\",1}]        ┆      ┆   ┆      ┆      ┆      ┆      │\n",
+       "└───────────────────┴───────────────────┴───────────────────┴──────┴───┴──────┴──────┴──────┴──────┘"
+      ]
+     },
+     "execution_count": 145,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "connections_table.group_by([\"start_bid\", \"end_bid\", \"nt_type\"]).agg(\n",
+    "    pl.col(\"syn_count\").sum().alias(\"num_nt_synapses\"),\n",
+    ").group_by([\"start_bid\", \"end_bid\"]).agg(\n",
+    "    pl.struct(\"nt_type\", \"num_nt_synapses\").alias(\"num_synapses_per_nt\"),\n",
+    "    pl.col(\"nt_type\").value_counts().alias(\"num_connections_per_nt\"),\n",
+    ").filter(pl.col(\"num_synapses_per_nt\").list.len() > 1).explode(\n",
+    "    \"num_synapses_per_nt\"\n",
+    ").unnest(\"num_synapses_per_nt\").pivot(\n",
+    "    on=\"nt_type\",\n",
+    "    index=[\"start_bid\", \"end_bid\", \"num_connections_per_nt\"],\n",
+    "    values=\"num_nt_synapses\",\n",
+    ").sort(pl.col(\"num_connections_per_nt\").list.len(), descending=True)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c95ff1ba",
+   "metadata": {},
+   "source": [
+    "## What can we do?\n",
+    "\n",
+    "If all the neurotransmitter types for the same neuron pair were the same we could just aggregate the synapse counts, but this isn't the case...\n",
+    "\n",
+    "One easy thing we can do that maybe works is to use the networkx `MultiDiGraph` - a directed graph that can have multiple edges between pairs of nodes. Maybe this works with some things and not others though?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "d3d60a29",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import networkx as nx\n",
+    "\n",
+    "fafb_multigraph = nx.from_pandas_edgelist(\n",
+    "    connections.connections,\n",
+    "    source=\"start_uid\",\n",
+    "    target=\"end_uid\",\n",
+    "    edge_attr=[\n",
+    "        \"syn_count\",  # absolute synapse count\n",
+    "        \"nt_type\",\n",
+    "        \"eff_weight\",  # signed synapse count (nt weighted)\n",
+    "        \"syn_count_norm\",  # input normalized synapse count\n",
+    "        \"eff_weight_norm\",  # input normalized signed synapse count\n",
+    "    ],\n",
+    "    create_using=nx.MultiDiGraph,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "4ee1b8eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "connections table has 2709829 connections with 31574890 synapses\n",
+      "connections graph has 2709829 connections with 31574890 synapses\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\n",
+    "        f\"connections table has {len(connections.connections)} connections with {connections.connections[\"syn_count\"].sum()} synapses\"\n",
+    "    )\n",
+    "print(\n",
+    "    f\"connections graph has {len(fafb_multigraph.edges)} connections with {sum(\n",
+    "    fafb_multigraph.edges[e][\"syn_count\"] for e in fafb_multigraph.edges\n",
+    ")} synapses\"\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7c2c16fe",
+   "metadata": {},
+   "source": [
+    "Whether this works for all the methods that operate on the graph, I'm not sure"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "25bccb83",
+   "metadata": {},
+   "source": [
+    "Alternatively, we can reuse the existing functionality of splitting neurons to split each neuron with multiple different neurotransmitter outputs into \"virtual neurons\", each with only one type of neurotransmitter.\n",
+    "\n",
+    "Then everything still fits in a normal digraph, and we can still represent it as an adjacency matrix"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 1cf21a33e9b4ec9f2f06f81daf06aed30d8ebaf4 Mon Sep 17 00:00:00 2001
From: Dom <dominicd7@hotmail.com>
Date: Thu, 24 Apr 2025 20:07:15 +0200
Subject: [PATCH 10/10] Add function to get neuron and synapse counts by
 neuropil

---
 tests/test_local/test_data_loading.py | 162 ++++++++++++++++++
 vnc_networks/connectome_reader.py     | 227 ++++++++++++++++++++++++++
 2 files changed, 389 insertions(+)

diff --git a/tests/test_local/test_data_loading.py b/tests/test_local/test_data_loading.py
index c9182d7..c107e32 100644
--- a/tests/test_local/test_data_loading.py
+++ b/tests/test_local/test_data_loading.py
@@ -88,3 +88,165 @@ def test_connections_getting_neuron_ids_MANCv1_2(self):
             assert (
                 set(connections.get_bodyids_from_uids(uids)) == set(body_ids)
             ), f"Getting uids and converting to bodyids doesn't match with selection_dict {selection_dict}."
+
+    def test_getting_counts_by_neuropil_MANCv1_2(self):
+        """
+        Test that we can get neuron and synapse counts by neuropil
+        """
+        import pandas as pd
+
+        import vnc_networks
+
+        # Instantiate a Connections object
+        connectome_reader = vnc_networks.connectome_reader.MANC("v1.2")
+
+        # check that this matches what we expect
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "downstream", [10000, 23458]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [10000, 23458],
+                    "CV": [703, 0],
+                    "IntTct": [313, 0],
+                    "LTct": [3181, 0],
+                    "LegNp(T3)(R)": [0, 1688],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "upstream", [10000, 23458]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [10000, 23458],
+                    "CV": [224, 0],
+                    "IntTct": [185, 0],
+                    "LTct": [1462, 0],
+                    "LegNp(T3)(R)": [0, 685],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil("pre", [10000, 23458]),
+            pd.DataFrame(
+                {
+                    "body_id": [10000, 23458],
+                    "CV": [138, 0],
+                    "IntTct": [73, 0],
+                    "LTct": [752, 0],
+                    "LegNp(T3)(R)": [0, 207],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil("post", [10000, 23458]),
+            pd.DataFrame(
+                {
+                    "body_id": [10000, 23458],
+                    "CV": [224, 0],
+                    "IntTct": [185, 0],
+                    "LTct": [1462, 0],
+                    "LegNp(T3)(R)": [0, 685],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "total_synapses", [10000, 23458]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [10000, 23458],
+                    "CV": [927, 0],
+                    "IntTct": [498, 0],
+                    "LTct": [4643, 0],
+                    "LegNp(T3)(R)": [0, 2373],
+                }
+            ),
+        )
+
+    def test_getting_counts_by_neuropil_FAFBv783(self):
+        """
+        Test that we can get neuron and synapse counts by neuropil
+        """
+        import pandas as pd
+
+        import vnc_networks
+
+        # Instantiate a Connections object
+        connectome_reader = vnc_networks.connectome_reader.FAFB_v783()
+
+        # check that this matches what we expect
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "downstream", [720575940627036426, 720575940633587552]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [720575940627036426, 720575940633587552],
+                    "LOP_L": [9, 0],
+                    "LO_L": [2, 0],
+                    "SLP_R": [0, 2],
+                    "SMP_R": [0, 31],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "upstream", [720575940627036426, 720575940633587552]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [720575940627036426, 720575940633587552],
+                    "LOP_L": [3, 0],
+                    "LO_L": [9, 0],
+                    "SLP_R": [0, 11],
+                    "SMP_R": [0, 13],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "pre", [720575940627036426, 720575940633587552]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [720575940627036426, 720575940633587552],
+                    "LOP_L": [14, 0],
+                    "LO_L": [83, 0],
+                    "SLP_R": [0, 33],
+                    "SMP_R": [0, 75],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "post", [720575940627036426, 720575940633587552]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [720575940627036426, 720575940633587552],
+                    "LOP_L": [100, 0],
+                    "LO_L": [8, 0],
+                    "SLP_R": [0, 4],
+                    "SMP_R": [0, 284],
+                }
+            ),
+        )
+        pd.testing.assert_frame_equal(
+            connectome_reader.get_synapse_counts_by_neuropil(
+                "total_synapses", [720575940627036426, 720575940633587552]
+            ),
+            pd.DataFrame(
+                {
+                    "body_id": [720575940627036426, 720575940633587552],
+                    "LOP_L": [114, 0],
+                    "LO_L": [91, 0],
+                    "SLP_R": [0, 37],
+                    "SMP_R": [0, 359],
+                }
+            ),
+        )
\ No newline at end of file
diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py
index 11d23bc..a15b174 100644
--- a/vnc_networks/connectome_reader.py
+++ b/vnc_networks/connectome_reader.py
@@ -7,6 +7,7 @@
 data types to the specific ones of the connectome.
 """
 
+import ast
 import os
 import typing
 from abc import ABC, abstractmethod
@@ -151,6 +152,35 @@ def get_synapse_neuropil(
         """
         ...
 
+    @abstractmethod
+    def get_synapse_counts_by_neuropil(
+        self,
+        synapse_count_type: typing.Literal[
+            "downstream", "upstream", "pre", "post", "total_synapses"
+        ],
+        body_id_subset: list[BodyId] | list[int] | None = None,
+    ) -> pd.DataFrame:
+        """
+        Get neuron or synapse counts for each neuron in each neuropil
+
+        Args:
+            synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get
+                * `"downstream"` number of downstream neurons
+                * `"upstream"` number of upstream neurons
+                * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons)
+                * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons)
+                * `"total_synapses"` total number of synapses, sum of pre and post
+
+            body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons.
+                If None, return counts for all. Defaults to None.
+
+        Returns:
+            pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id.
+                **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying
+                a small number of neurons for `body_id_subset`).
+        """
+        ...
+
     @abstractmethod
     def list_possible_attributes(self) -> list[str]:
         """
@@ -1152,6 +1182,34 @@ def get_synapse_neuropil(
             data.loc[data["synapse_id"].isin(synapses_in_roi), "neuropil"] = roi
         return data
 
+    def get_synapse_counts_by_neuropil(
+        self,
+        synapse_count_type: typing.Literal[
+            "downstream", "upstream", "pre", "post", "total_synapses"
+        ],
+        body_id_subset: list[BodyId] | list[int] | None = None,
+    ) -> pd.DataFrame:
+        """
+        Get neuron or synapse counts for each neuron in each neuropil
+
+        Args:
+            synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get
+                * `"downstream"` number of downstream neurons
+                * `"upstream"` number of upstream neurons
+                * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons)
+                * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons)
+                * `"total_synapses"` total number of synapses, sum of pre and post
+
+            body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons.
+                If None, return counts for all. Defaults to None.
+
+        Returns:
+            pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id.
+                **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying
+                a small number of neurons for `body_id_subset`).
+        """
+        raise NotImplementedError("Not sure how to get this for MANCv1.0 yet...")
+
 
 class MANC_v_1_2(MANCReader):
     def __init__(
@@ -1188,6 +1246,7 @@ def _load_specific_namefields(self):
         self._nb_pre_neurons = "upstream"
         self._nb_post_neurons = "downstream"
         self._root_side = "rootSide"
+        self._roi_info = "roiInfo"
 
         # Synapse specific
         self._syn_id = "synapse_id"
@@ -1311,6 +1370,69 @@ def get_synapse_neuropil(
         synapses.columns = ["synapse_id", "neuropil"]
         return synapses
 
+    def get_synapse_counts_by_neuropil(
+        self,
+        synapse_count_type: typing.Literal[
+            "downstream", "upstream", "pre", "post", "total_synapses"
+        ],
+        body_id_subset: list[BodyId] | list[int] | None = None,
+    ) -> pd.DataFrame:
+        """
+        Get neuron or synapse counts for each neuron in each neuropil
+
+        Args:
+            synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get
+                * `"downstream"` number of downstream neurons
+                * `"upstream"` number of upstream neurons
+                * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons)
+                * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons)
+                * `"total_synapses"` total number of synapses, sum of pre and post
+
+            body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons.
+                If None, return counts for all. Defaults to None.
+
+        Returns:
+            pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id.
+                **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying
+                a small number of neurons for `body_id_subset`).
+        """
+        # the total synapses count is called "synweight" in MANC. Renamed it in the arguments so it's more intuitive
+        synapse_count_type_name = (
+            "synweight"
+            if synapse_count_type == "total_synapses"
+            else synapse_count_type
+        )
+
+        roi_info_table = pd.read_feather(
+            self._nodes_file, [self._body_id, self._roi_info]
+        )
+        if body_id_subset is not None:
+            # Note: this removes some ROIs from the columns...
+            roi_info_table = roi_info_table[
+                roi_info_table[self._body_id].isin(body_id_subset)
+            ]
+        return (
+            pd.DataFrame(
+                {
+                    body_id: {
+                        roi: (
+                            roi_connections[synapse_count_type_name]
+                            if synapse_count_type_name in roi_connections
+                            else 0
+                        )
+                        for roi, roi_connections in connections.items()
+                    }
+                    for body_id, connections in zip(
+                        roi_info_table[self._body_id],
+                        roi_info_table[self._roi_info].apply(ast.literal_eval).values,
+                    )
+                }
+            )
+            .T.reset_index(names="body_id")
+            .fillna(0)
+            .astype(int)
+        )
+
 
 @typing.overload
 def MANC(
@@ -1557,6 +1679,111 @@ def get_synapse_neuropil(
             For that, load "neuropil_synapse_table.csv"'
         )
 
+    def get_synapse_counts_by_neuropil(
+        self,
+        synapse_count_type: typing.Literal[
+            "downstream", "upstream", "pre", "post", "total_synapses"
+        ],
+        body_id_subset: list[BodyId] | list[int] | None = None,
+    ):
+        """
+        Get neuron or synapse counts for each neuron in each neuropil
+
+        Args:
+            synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get
+                * `"downstream"` number of downstream neurons
+                * `"upstream"` number of upstream neurons
+                * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons)
+                * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons)
+                * `"total_synapses"` total number of synapses, sum of pre and post
+
+            body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons.
+                If None, return counts for all. Defaults to None.
+
+        Returns:
+            pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id.
+                **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying
+                a small number of neurons for `body_id_subset`).
+        """
+        connections_table = pd.read_csv(
+            self._connections_file,
+        )
+
+        if synapse_count_type == "total_synapses":
+            if body_id_subset is not None:
+                # Note: this removes some ROIs from the columns...
+                connections_table = connections_table[
+                    connections_table[self._start_bid].isin(body_id_subset)
+                    | connections_table[self._end_bid].isin(body_id_subset)
+                ]
+            # separately get the pre and post synapse counts then sum them
+            synapse_counts = (
+                pd.concat(
+                    [
+                        connections_table[
+                            [neuron_body_id_we_care_about, "neuropil", "syn_count"]
+                        ]
+                        .groupby([neuron_body_id_we_care_about, "neuropil"])
+                        .sum()
+                        .reset_index()
+                        .pivot(
+                            index=neuron_body_id_we_care_about,
+                            columns="neuropil",
+                            values="syn_count",
+                        )
+                        .reset_index(names="body_id")
+                        .fillna(0)
+                        .astype(int)
+                        for neuron_body_id_we_care_about in [
+                            self._start_bid,
+                            self._end_bid,
+                        ]
+                    ]
+                )
+                .groupby("body_id")
+                .sum()
+                .reset_index()
+                .rename_axis(None, axis=1)  # otherwise the index has a name
+            )
+            if body_id_subset is not None:
+                synapse_counts = synapse_counts[
+                    synapse_counts["body_id"].isin(body_id_subset)
+                ].reset_index(drop=True)
+            return synapse_counts
+        elif synapse_count_type in ["downstream", "post"]:
+            neuron_body_id_we_care_about = self._start_bid
+        else:
+            neuron_body_id_we_care_about = self._end_bid
+
+        if body_id_subset is not None:
+            # Note: this removes some ROIs from the columns...
+            connections_table = connections_table[
+                connections_table[neuron_body_id_we_care_about].isin(body_id_subset)
+            ]
+
+        if synapse_count_type in ["downstream", "upstream"]:
+            aggregate_group_function = lambda group: group.count()
+        else:
+            aggregate_group_function = lambda group: group.sum()
+
+        return (
+            aggregate_group_function(
+                connections_table[
+                    [neuron_body_id_we_care_about, "neuropil", "syn_count"]
+                ].groupby([neuron_body_id_we_care_about, "neuropil"])
+            )
+            .reset_index()
+            .pivot(
+                index=neuron_body_id_we_care_about,
+                columns="neuropil",
+                values="syn_count",
+            )
+            .reset_index(names="body_id")
+            .rename_axis(None, axis=1)  # otherwise the index has a name
+            .fillna(0)
+            .astype(int)
+        )
+
     def sna(self, generic_n_a: NeuronAttribute) -> str:
         """
         Converts the generic Neuron Attribute to the specific one.