From d6d6843475c0b04f431a597c434f7e630a587f74 Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 13:50:32 +0200 Subject: [PATCH 01/10] Properly show all selected attributes in the interactive graph and add option whether to show labels --- vnc_networks/utils/nx_design.py | 53 ++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/vnc_networks/utils/nx_design.py b/vnc_networks/utils/nx_design.py index c26b3c7..3f4fce2 100644 --- a/vnc_networks/utils/nx_design.py +++ b/vnc_networks/utils/nx_design.py @@ -158,6 +158,7 @@ def display_interactive_graph( output_file: str = "visualisation.html", window_height: int = 1000, additional_attributes: Optional[list[NeuronAttribute]] = None, + show_labels: bool = True, ): """ Display the graph in interactive browser window using pyvis. This saves an HTML file and opens it in the browser. @@ -203,25 +204,49 @@ def display_interactive_graph( attributes_displayed += additional_attributes # Ensure all the attributes are defined in the connections object all_nodes = connections.get_nodes(type="uid") - for attribute in attributes_displayed: - _ = connections.get_node_attribute(all_nodes, attribute) - def node_data_to_string(graph, node): - data_dict = graph.nodes[node] - return f'body_id: {data_dict["body_id"]}\n' + "\n".join( + def node_data_to_string(node_data): + return "\n".join( [ - f"{k}: {v}" - for k, v in data_dict.items() + f"{attribute_name}: {attribute_value}" + for attribute_name, attribute_value in node_data.items() ] ) - - # This is the text that is shown when hovering over a node (more detailed) - node_data = { - node: node_data_to_string(g,node) - for node in g.nodes + + node_attributes_lists = [ + connections.get_node_attribute(all_nodes, attribute) + for attribute in attributes_displayed + ] + # {node: {attribute: value}} + node_attributes = { + node_and_attributes[0]: dict(zip(attributes_displayed, node_and_attributes[1:])) + for node_and_attributes in zip(g.nodes, *node_attributes_lists) } - - nx.set_node_attributes(g, node_data, "title") + + nx.set_node_attributes(g, node_attributes) + # This is the text that is shown when hovering over a node (more detailed) + nx.set_node_attributes( + g, + { + node: node_data_to_string(node_data) + for node, node_data in node_attributes.items() + }, + "title", + ) + if show_labels: + nx.set_node_attributes( + g, + { + node: str(node_data["name"]) + + ( + f' [{node_data["side"][0].upper()}]' + if isinstance(node_data["side"], str) and len(node_data["side"]) > 0 + else "" + ) + for node, node_data in node_attributes.items() + }, + "label", + ) # set node colour based on neuron class nx.set_node_attributes( From e5242ae68afaec14df8329c4869e1d6a463f3b2c Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 13:59:27 +0200 Subject: [PATCH 02/10] Fix `ConnectomeReader.get_neurons_from_class` bug It should call `get_neuron_bodyids` with the `NeuronAttribute` "class_1" which gets converted to a connectome-specific attribute later --- vnc_networks/connectome_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py index 8b5fd19..7b6c611 100644 --- a/vnc_networks/connectome_reader.py +++ b/vnc_networks/connectome_reader.py @@ -367,7 +367,7 @@ def get_neurons_from_class(self, class_: NeuronClass) -> list[BodyId]: """ # verify if the class is indeed a neuron class for this dataset specific_class = self.specific_neuron_class(class_) - return self.get_neuron_bodyids({self.class_1: specific_class}) + return self.get_neuron_bodyids({"class_1": specific_class}) def specific_selection_dict(self, selection_dict: SelectionDict): """ From 8d720e0b3012c8f0db2d9b06f1b28873522784e1 Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 14:19:00 +0200 Subject: [PATCH 03/10] Fix bug in `FAFBReader.get_neuron_bodyids` If `nodes` was supplied, they would be returned even if they weren't valid BodyIds --- vnc_networks/connectome_reader.py | 50 +++++++++++++++++++------------ 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py index 7b6c611..84219bd 100644 --- a/vnc_networks/connectome_reader.py +++ b/vnc_networks/connectome_reader.py @@ -168,12 +168,16 @@ def get_neuron_bodyids( nodes: Optional[list[BodyId] | list[int]] = None, ) -> list[BodyId]: """ - Get the Ids of the neurons in the dataset. - Select (keep) according to the selection_dict. - Different criteria are treated as 'and' conditions. + Get the BodyIds of the neurons in the dataset that fulfil the conditions in the selection_dict. - For the specific case of "class_1" that refers to the NeuronClass, - we need to verify both the generic and the specific names. + For the specific case of "class_1" that refers to the NeuronClass, we need to verify both the generic and the specific names. + + Args: + selection_dict (SelectionDict, optional): Criteria that the returned neurons need to fulfil. Different criteria are treated as 'and' conditions. Defaults to {}. + nodes (Optional[list[BodyId] | list[int]], optional): If not None, only return BodyIds which are contained in this list. Defaults to None. + + Returns: + list[BodyId]: list of the BodyIds of neurons that fulfilled all supplied conditions. """ ... @@ -693,12 +697,16 @@ def get_neuron_bodyids( nodes: Optional[list[BodyId] | list[int]] = None, ) -> list[BodyId]: """ - Get the Ids of the neurons in the dataset. - Select (keep) according to the selection_dict. - Different criteria are treated as 'and' conditions. + Get the BodyIds of the neurons in the dataset that fulfil the conditions in the selection_dict. + + For the specific case of "class_1" that refers to the NeuronClass, we need to verify both the generic and the specific names. - For the specific case of "class_1" that refers to the NeuronClass, - we need to verify both the generic and the specific names. + Args: + selection_dict (SelectionDict, optional): Criteria that the returned neurons need to fulfil. Different criteria are treated as 'and' conditions. Defaults to {}. + nodes (Optional[list[BodyId] | list[int]], optional): If not None, only return BodyIds which are contained in this list. Defaults to None. + + Returns: + list[BodyId]: list of the BodyIds of neurons that fulfilled all supplied conditions. """ s_dict = self.specific_selection_dict(selection_dict) @@ -1683,20 +1691,24 @@ def get_neuron_bodyids( nodes: Optional[list[BodyId] | list[int]] = None, ) -> list[BodyId]: """ - Get the Ids of the neurons in the dataset. - Select (keep) according to the selection_dict. - Different criteria are treated as 'and' conditions. + Get the BodyIds of the neurons in the dataset that fulfil the conditions in the selection_dict. + + For the specific case of "class_1" that refers to the NeuronClass, we need to verify both the generic and the specific names. - For the specific case of "class_1" that refers to the NeuronClass, - we need to verify both the generic and the specific names. + Args: + selection_dict (SelectionDict, optional): Criteria that the returned neurons need to fulfil. Different criteria are treated as 'and' conditions. Defaults to {}. + nodes (Optional[list[BodyId] | list[int]], optional): If not None, only return BodyIds which are contained in this list. Defaults to None. + + Returns: + list[BodyId]: list of the BodyIds of neurons that fulfilled all supplied conditions. """ + # get all neurons in the dataset that are also in the nodes list + valid_nodes = set(self.list_all_nodes()) + if nodes is not None: + valid_nodes = valid_nodes.intersection(nodes) # Treat each attribute in the selection dict independently: # get the nodes that satisfy each condition, and return the intersection of all - if nodes is not None: - valid_nodes = set(nodes) - else: - valid_nodes = set(self.list_all_nodes()) for key, value in selection_dict.items(): specific_valid_nodes = self._filter_neurons( attribute=key, From ea871f5c7c3a50137f5bf9feba18421f1cdc5f61 Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 14:19:26 +0200 Subject: [PATCH 04/10] Fix some tiny type hinting problems in connectome_reader --- vnc_networks/connectome_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py index 84219bd..ef92f43 100644 --- a/vnc_networks/connectome_reader.py +++ b/vnc_networks/connectome_reader.py @@ -507,7 +507,7 @@ def _load_connections(self) -> pd.DataFrame: Needs to gather the columns ['start_bid', 'end_bid', 'syn_count', 'nt_type']. """ # Loading data in the connections file - columns = ["start_bid", "end_bid", "syn_count"] + columns: list[NeuronAttribute] = ["start_bid", "end_bid", "syn_count"] columns_to_read = [self.sna(a) for a in columns] connections = pd.read_feather(self._connections_file, columns=columns_to_read) read_columns = ( @@ -726,7 +726,7 @@ def get_neuron_bodyids( key ] # can be 'sensory' or 'sensory neuron' try: # will work if a generic NeuronClass is given - specific_value = self.specific_neuron_class(requested_value) + specific_value = self.specific_neuron_class(requested_value) # type: ignore requested_value might be a generic NeuronClass, or if not a specific class already except KeyError: # will work if a specific NeuronClass is given specific_value = requested_value neurons = neurons[neurons[self._class_1] == specific_value] From e291cc46bcb91dfe01b5657853ebff2fe4af2762 Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 15:10:42 +0200 Subject: [PATCH 05/10] Fix bug with `Connections.get_neuron_ids` not working when providing `None` as selection_dict --- tests/test_local/test_data_loading.py | 30 +++++++++++++++++++++++++++ vnc_networks/connections.py | 5 +++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/tests/test_local/test_data_loading.py b/tests/test_local/test_data_loading.py index 559dd50..c9182d7 100644 --- a/tests/test_local/test_data_loading.py +++ b/tests/test_local/test_data_loading.py @@ -3,6 +3,9 @@ """ +from vnc_networks.params import SelectionDict + + class TestDataLoading: """ Test the data loading functions. @@ -58,3 +61,30 @@ def test_connections_instantiation_MANCv1_2(self): df_2 = df[(df["start_bid"] == 10725) & (df["end_bid"] == 10439)] assert df_2["eff_weight"].values[0] == -1080, "Incorrect nt_type handling" + + def test_connections_getting_neuron_ids_MANCv1_2(self): + """ + Test that we get the same results if we get uids or bodyids and convert between the two. + """ + import vnc_networks + from vnc_networks.connections import Connections + + # Instantiate a Connections object + connections = Connections(vnc_networks.connectome_reader.MANC("v1.2")) + + # test a few different selection_dicts + selection_dicts: list[SelectionDict | None] = [ + None, + {}, + {"class_1": "descending"}, + {"class_1": "ascending", "nt_type": "GABA"}, + ] + for selection_dict in selection_dicts: + body_ids = connections.get_neuron_bodyids(selection_dict) + uids = connections.get_neuron_ids(selection_dict) + assert ( + set(connections.get_uids_from_bodyids(body_ids)) == set(uids) + ), f"Getting bodyids and converting to uids doesn't match with selection_dict {selection_dict}." + assert ( + set(connections.get_bodyids_from_uids(uids)) == set(body_ids) + ), f"Getting uids and converting to bodyids doesn't match with selection_dict {selection_dict}." diff --git a/vnc_networks/connections.py b/vnc_networks/connections.py index f2ec465..53a1d60 100644 --- a/vnc_networks/connections.py +++ b/vnc_networks/connections.py @@ -977,8 +977,9 @@ def get_neuron_ids( Get the neuron IDs from the nodes dataframe as loaded in the initial dataset, based on a selection dictionary. """ - nodes = self.get_nodes(type="body_id") - body_ids = self.CR.get_neuron_bodyids(selection_dict, nodes) + body_ids = self.get_nodes(type="body_id") + if selection_dict is not None: + body_ids = self.CR.get_neuron_bodyids(selection_dict, body_ids) return self.__get_uids_from_bodyids(body_ids) def get_neurons_pre(self): From 60d1db4574b16515896ab33fe224160da160f32a Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 15:20:34 +0200 Subject: [PATCH 06/10] Define the `nt_weights` dict in the base `ConnectomeReader` class to fix type hinting error --- vnc_networks/connectome_reader.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py index ef92f43..d72155d 100644 --- a/vnc_networks/connectome_reader.py +++ b/vnc_networks/connectome_reader.py @@ -10,7 +10,8 @@ import os import typing from abc import ABC, abstractmethod -from typing import Optional +from collections.abc import Mapping +from typing import Any, Optional import numpy as np import pandas as pd @@ -54,6 +55,9 @@ class ConnectomeReader(ABC): _ascending = "ascending" _descending = "descending" + # connectomes need to implement weight assignment for their neurotransmitters + nt_weights: Mapping[Any, int] + def __init__( self, connectome_name: str, From 669f9d007bd92667950318cb1b29ec3607718c7f Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 15:42:42 +0200 Subject: [PATCH 07/10] Make `specific_neuron_class` throw a `KeyError` instead of a `ValueError` because that's what `get_neuron_bodyids` catches --- vnc_networks/connectome_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py index d72155d..8a3b25e 100644 --- a/vnc_networks/connectome_reader.py +++ b/vnc_networks/connectome_reader.py @@ -652,7 +652,7 @@ def specific_neuron_class(self, generic_n_c: NeuronClass): if converted_type is None: raise KeyError except KeyError: - raise ValueError( + raise KeyError( f"ConnectomeReader::specific_neuron_class().\ The class {generic_n_c} is not defined in {self.connectome_name}." ) From ab01e746d785f46bf2ee8cd701ffe01e9d92faf4 Mon Sep 17 00:00:00 2001 From: Dom Date: Fri, 11 Apr 2025 16:09:38 +0200 Subject: [PATCH 08/10] Add `root_side` parameter to MANC connectome reader Fixes #52 --- vnc_networks/connectome_reader.py | 6 ++++++ vnc_networks/params.py | 1 + 2 files changed, 7 insertions(+) diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py index 8a3b25e..11d23bc 100644 --- a/vnc_networks/connectome_reader.py +++ b/vnc_networks/connectome_reader.py @@ -455,6 +455,7 @@ class MANCReader(ConnectomeReader): _nb_post_synapses: str _nb_pre_neurons: str _nb_post_neurons: str + _root_side: str def __init__( self, @@ -548,6 +549,7 @@ def list_possible_attributes(self) -> list[str]: "name", "type", "side", + "root_side", "neuropil", "hemilineage", "size", @@ -584,6 +586,7 @@ def sna( # specific_neuron_attribute, abbreviated due to frequent use "nb_pre_neurons": self._nb_pre_neurons, "nb_post_neurons": self._nb_post_neurons, "location": self._location, # synapse position + "root_side": self._root_side, } try: converted_type = mapping.get(generic_n_a) @@ -617,6 +620,7 @@ def decode_neuron_attribute(self, specific_attribute: str) -> NeuronAttribute: self._nb_pre_neurons: "nb_pre_neurons", self._nb_post_neurons: "nb_post_neurons", self._location: "location", # synapse position + self._root_side: "root_side", } try: converted_attr = mapping.get(specific_attribute) @@ -854,6 +858,7 @@ def _load_specific_namefields(self): self._nb_post_synapses = "post:int" self._nb_pre_neurons = "upstream:int" self._nb_post_neurons = "downstream:int" + self._root_side = "rootSide:string" # Synapse specific self._start_synset_id = ":START_ID(SynSet-ID)" self._end_synset_id = ":END_ID(SynSet-ID)" @@ -1182,6 +1187,7 @@ def _load_specific_namefields(self): self._nb_post_synapses = "post" self._nb_pre_neurons = "upstream" self._nb_post_neurons = "downstream" + self._root_side = "rootSide" # Synapse specific self._syn_id = "synapse_id" diff --git a/vnc_networks/params.py b/vnc_networks/params.py index e474acd..aab262c 100644 --- a/vnc_networks/params.py +++ b/vnc_networks/params.py @@ -129,6 +129,7 @@ "type", # morphology "side", # common to all + "root_side", # only MANC "neuropil", # common to all "size", # common to all "area", From 25a53e75d0d8ac4364aae1d8c46edefc473b2bfb Mon Sep 17 00:00:00 2001 From: Dom Date: Thu, 24 Apr 2025 17:19:46 +0200 Subject: [PATCH 09/10] Add notebook investigating missing synapses in FAFB graph --- scripts/fafb_missing_some_synapses.ipynb | 595 +++++++++++++++++++++++ 1 file changed, 595 insertions(+) create mode 100644 scripts/fafb_missing_some_synapses.ipynb diff --git a/scripts/fafb_missing_some_synapses.ipynb b/scripts/fafb_missing_some_synapses.ipynb new file mode 100644 index 0000000..be3b825 --- /dev/null +++ b/scripts/fafb_missing_some_synapses.ipynb @@ -0,0 +1,595 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "946dc5c7", + "metadata": {}, + "source": [ + "# It looks like the FAFB graph doesn't have all the connections that the connections table does" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4e1fc82e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attribute class_1 not found in the graph. Adding it.\n" + ] + } + ], + "source": [ + "import polars as pl\n", + "\n", + "import vnc_networks\n", + "\n", + "connections = vnc_networks.connections.Connections(\n", + " CR=vnc_networks.connectome_reader.FAFB_v783()\n", + ")\n", + "connections_table = pl.from_pandas(connections.connections)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "15eb1f75", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "connections table has 2709829 connections\n", + "connections graph has 2484163 connections\n" + ] + } + ], + "source": [ + "print(f\"connections table has {len(connections_table)} connections\")\n", + "print(f\"connections graph has {len(connections.graph.edges)} connections\")" + ] + }, + { + "cell_type": "markdown", + "id": "a484a36f", + "metadata": {}, + "source": [ + "It turns out that the FAFB connections table sometimes has multiple entries between the same pair of neurons.\n", + "\n", + "If we check the documentation [here](https://codex.flywire.ai/api/download) it says \"More than one row can be present for the same pair of cells if they synapse in multiple neuropils (regions).\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f2166d15", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (169_972, 3)
start_bidend_bidnumber_of_connections
i64i64u32
72057594062440217372057594062216070512
72057594062054050772057594061893276311
72057594061271856372057594062312212511
72057594063924230372057594062216070511
72057594062291506072057594061655102910
7205759406134559867205759406370855032
7205759406207957847205759406076893942
7205759406096583777205759406307584182
7205759406304032697205759406362502922
7205759406335935487205759406339234792
" + ], + "text/plain": [ + "shape: (169_972, 3)\n", + "┌────────────────────┬────────────────────┬───────────────────────┐\n", + "│ start_bid ┆ end_bid ┆ number_of_connections │\n", + "│ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ u32 │\n", + "╞════════════════════╪════════════════════╪═══════════════════════╡\n", + "│ 720575940624402173 ┆ 720575940622160705 ┆ 12 │\n", + "│ 720575940620540507 ┆ 720575940618932763 ┆ 11 │\n", + "│ 720575940612718563 ┆ 720575940623122125 ┆ 11 │\n", + "│ 720575940639242303 ┆ 720575940622160705 ┆ 11 │\n", + "│ 720575940622915060 ┆ 720575940616551029 ┆ 10 │\n", + "│ … ┆ … ┆ … │\n", + "│ 720575940613455986 ┆ 720575940637085503 ┆ 2 │\n", + "│ 720575940620795784 ┆ 720575940607689394 ┆ 2 │\n", + "│ 720575940609658377 ┆ 720575940630758418 ┆ 2 │\n", + "│ 720575940630403269 ┆ 720575940636250292 ┆ 2 │\n", + "│ 720575940633593548 ┆ 720575940633923479 ┆ 2 │\n", + "└────────────────────┴────────────────────┴───────────────────────┘" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connections_table.group_by([\"start_bid\", \"end_bid\"]).agg(\n", + " pl.col(\"syn_count\").len().alias(\"number_of_connections\")\n", + ").filter(pl.col(\"number_of_connections\") > 1).sort(\n", + " \"number_of_connections\", descending=True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "57b25dcd", + "metadata": {}, + "source": [ + "Looking at all the connections between the first pair of neurons:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e574e2d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (12, 11)
start_bidend_bidsyn_countnt_typeeff_weightsubdivision_startsubdivision_endsyn_count_normeff_weight_normstart_uidend_uid
i64i64i64stri64i64i64f64f64i64i64
72057594062440217372057594062216070545"ACH"45000.0346950.03469513198272220
72057594062440217372057594062216070512"ACH"12000.0092520.00925213198272220
72057594062440217372057594062216070511"ACH"11000.0084810.00848113198272220
7205759406244021737205759406221607056"ACH"6000.0046260.00462613198272220
72057594062440217372057594062216070513"ACH"13000.0100230.01002313198272220
7205759406244021737205759406221607056"ACH"6000.0046260.00462613198272220
72057594062440217372057594062216070525"ACH"25000.0192750.01927513198272220
72057594062440217372057594062216070535"ACH"35000.0269850.02698513198272220
72057594062440217372057594062216070561"ACH"61000.0470320.04703213198272220
72057594062440217372057594062216070533"ACH"33000.0254430.02544313198272220
" + ], + "text/plain": [ + "shape: (12, 11)\n", + "┌────────────┬────────────┬───────────┬─────────┬───┬────────────┬───────────┬───────────┬─────────┐\n", + "│ start_bid ┆ end_bid ┆ syn_count ┆ nt_type ┆ … ┆ syn_count_ ┆ eff_weigh ┆ start_uid ┆ end_uid │\n", + "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ norm ┆ t_norm ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ i64 ┆ str ┆ ┆ --- ┆ --- ┆ i64 ┆ i64 │\n", + "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ f64 ┆ ┆ │\n", + "╞════════════╪════════════╪═══════════╪═════════╪═══╪════════════╪═══════════╪═══════════╪═════════╡\n", + "│ 7205759406 ┆ 7205759406 ┆ 45 ┆ ACH ┆ … ┆ 0.034695 ┆ 0.034695 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 12 ┆ ACH ┆ … ┆ 0.009252 ┆ 0.009252 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 11 ┆ ACH ┆ … ┆ 0.008481 ┆ 0.008481 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 6 ┆ ACH ┆ … ┆ 0.004626 ┆ 0.004626 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 13 ┆ ACH ┆ … ┆ 0.010023 ┆ 0.010023 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 7205759406 ┆ 7205759406 ┆ 6 ┆ ACH ┆ … ┆ 0.004626 ┆ 0.004626 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 25 ┆ ACH ┆ … ┆ 0.019275 ┆ 0.019275 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 35 ┆ ACH ┆ … ┆ 0.026985 ┆ 0.026985 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 61 ┆ ACH ┆ … ┆ 0.047032 ┆ 0.047032 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 7205759406 ┆ 7205759406 ┆ 33 ┆ ACH ┆ … ┆ 0.025443 ┆ 0.025443 ┆ 131982 ┆ 72220 │\n", + "│ 24402173 ┆ 22160705 ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "└────────────┴────────────┴───────────┴─────────┴───┴────────────┴───────────┴───────────┴─────────┘" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connections_table.filter(\n", + " (pl.col(\"start_bid\") == 720575940624402173)\n", + " & (pl.col(\"end_bid\") == 720575940622160705)\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "fd5bb318", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'syn_count': 33,\n", + " 'nt_type': 'ACH',\n", + " 'eff_weight': 33,\n", + " 'syn_count_norm': 0.025443330763299923,\n", + " 'eff_weight_norm': 0.025443330763299923,\n", + " 'weight': 33}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connections.graph.edges[131982, 72220]\n" + ] + }, + { + "cell_type": "markdown", + "id": "e5de8e7c", + "metadata": {}, + "source": [ + "The graph just remembered that last connection in the table - the rest were dropped by networkx" + ] + }, + { + "cell_type": "markdown", + "id": "d7fa48e6", + "metadata": {}, + "source": [ + "## Quick check for MANC" + ] + }, + { + "cell_type": "markdown", + "id": "d91b9b1f", + "metadata": {}, + "source": [ + "MANC v1.2 must have already aggregated synapses over pairs of neurons (actually I think I did this when downloading the data from neuprint), so here there's no problem.\n", + "\n", + "This is because the neurotransmitter predictions are only applied at the level of neurons, so we first aggregate and then label the synapse type according to the neuron's neurotransmitter type." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "676a805a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attribute class_1 not found in the graph. Adding it.\n", + "connections table has 1372588 connections with 24151003 synapses\n", + "connections graph has 1372588 connections with 24151003 synapses\n" + ] + } + ], + "source": [ + "def check_connections_table_and_graph_count(\n", + " connectome_reader: vnc_networks.connectome_reader.ConnectomeReader,\n", + "):\n", + " c = vnc_networks.connections.Connections(connectome_reader)\n", + " print(\n", + " f\"connections table has {len(c.connections)} connections with {c.connections[\"syn_count\"].sum()} synapses\"\n", + " )\n", + " print(\n", + " f\"connections graph has {len(c.graph.edges)} connections with {sum(\n", + " c.graph.edges[e][\"syn_count\"] for e in c.graph.edges\n", + " )} synapses\"\n", + " )\n", + "\n", + "check_connections_table_and_graph_count(vnc_networks.connectome_reader.MANC_v_1_2())" + ] + }, + { + "cell_type": "markdown", + "id": "b87f95d5", + "metadata": {}, + "source": [ + "MANC v1.0 is all good too" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "819775ef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Attribute class_1 not found in the graph. Adding it.\n", + "connections table has 1548657 connections with 27387970 synapses\n", + "connections graph has 1548657 connections with 27387970 synapses\n" + ] + } + ], + "source": [ + "check_connections_table_and_graph_count(vnc_networks.connectome_reader.MANC_v_1_0())" + ] + }, + { + "cell_type": "markdown", + "id": "d5823402", + "metadata": {}, + "source": [ + "## How many synapses are affected?" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d064083e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We lost 3549543 out of 31574890 synapses, which is 11.2%\n" + ] + } + ], + "source": [ + "connections_table_synapses = connections_table[\"syn_count\"].sum()\n", + "graph_synapses = sum(\n", + " connections.graph.edges[e][\"syn_count\"] for e in connections.graph.edges\n", + ")\n", + "\n", + "print(\n", + " f\"We lost {(diff := connections_table_synapses - graph_synapses)} out of {connections_table_synapses} synapses, which is {diff/connections_table_synapses*100:.1f}%\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "20f974c7", + "metadata": {}, + "source": [ + "Weirdly, there are 12285 neuron pairs that have multiple different neurotransmitters" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "9647a987", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (12_285, 5)
start_bidend_biddifferent_neurotransmittersnum_different_neurotransmittersnum_synapses
i64i64list[struct[2]]u32i64
720575940623317321720575940639278399[{"GLUT",2}, {"GABA",3}, … {"ACH",1}]4156
720575940641501648720575940621268651[{"SER",1}, {"GABA",1}, … {"ACH",1}]457
720575940631763909720575940613096089[{"GABA",1}, {"GLUT",1}, … {"ACH",1}]449
720575940641501648720575940629399370[{"GABA",2}, {"ACH",1}, {"GLUT",1}]376
720575940622263066720575940607384242[{"GLUT",1}, {"ACH",1}, {"GABA",1}]325
720575940610384082720575940623682195[{"ACH",4}, {"SER",1}]239
720575940627209990720575940624202424[{"GABA",1}, {"GLUT",1}]210
720575940617505629720575940618392912[{"GLUT",1}, {"GABA",2}]248
720575940621801866720575940609819512[{"GLUT",2}, {"GABA",1}]227
720575940632032460720575940624705514[{"GLUT",1}, {"GABA",1}]251
" + ], + "text/plain": [ + "shape: (12_285, 5)\n", + "┌────────────────────┬────────────────────┬────────────────────┬────────────────────┬──────────────┐\n", + "│ start_bid ┆ end_bid ┆ different_neurotra ┆ num_different_neur ┆ num_synapses │\n", + "│ --- ┆ --- ┆ nsmitters ┆ otransmitter… ┆ --- │\n", + "│ i64 ┆ i64 ┆ --- ┆ --- ┆ i64 │\n", + "│ ┆ ┆ list[struct[2]] ┆ u32 ┆ │\n", + "╞════════════════════╪════════════════════╪════════════════════╪════════════════════╪══════════════╡\n", + "│ 720575940623317321 ┆ 720575940639278399 ┆ [{\"GLUT\",2}, ┆ 4 ┆ 156 │\n", + "│ ┆ ┆ {\"GABA\",3}, … {\"A… ┆ ┆ │\n", + "│ 720575940641501648 ┆ 720575940621268651 ┆ [{\"SER\",1}, ┆ 4 ┆ 57 │\n", + "│ ┆ ┆ {\"GABA\",1}, … ┆ ┆ │\n", + "│ ┆ ┆ {\"AC… ┆ ┆ │\n", + "│ 720575940631763909 ┆ 720575940613096089 ┆ [{\"GABA\",1}, ┆ 4 ┆ 49 │\n", + "│ ┆ ┆ {\"GLUT\",1}, … {\"A… ┆ ┆ │\n", + "│ 720575940641501648 ┆ 720575940629399370 ┆ [{\"GABA\",2}, ┆ 3 ┆ 76 │\n", + "│ ┆ ┆ {\"ACH\",1}, {\"GLUT… ┆ ┆ │\n", + "│ 720575940622263066 ┆ 720575940607384242 ┆ [{\"GLUT\",1}, ┆ 3 ┆ 25 │\n", + "│ ┆ ┆ {\"ACH\",1}, {\"GABA… ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 720575940610384082 ┆ 720575940623682195 ┆ [{\"ACH\",4}, ┆ 2 ┆ 39 │\n", + "│ ┆ ┆ {\"SER\",1}] ┆ ┆ │\n", + "│ 720575940627209990 ┆ 720575940624202424 ┆ [{\"GABA\",1}, ┆ 2 ┆ 10 │\n", + "│ ┆ ┆ {\"GLUT\",1}] ┆ ┆ │\n", + "│ 720575940617505629 ┆ 720575940618392912 ┆ [{\"GLUT\",1}, ┆ 2 ┆ 48 │\n", + "│ ┆ ┆ {\"GABA\",2}] ┆ ┆ │\n", + "│ 720575940621801866 ┆ 720575940609819512 ┆ [{\"GLUT\",2}, ┆ 2 ┆ 27 │\n", + "│ ┆ ┆ {\"GABA\",1}] ┆ ┆ │\n", + "│ 720575940632032460 ┆ 720575940624705514 ┆ [{\"GLUT\",1}, ┆ 2 ┆ 51 │\n", + "│ ┆ ┆ {\"GABA\",1}] ┆ ┆ │\n", + "└────────────────────┴────────────────────┴────────────────────┴────────────────────┴──────────────┘" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connections_table.group_by([\"start_bid\", \"end_bid\"]).agg(\n", + " pl.col(\"nt_type\").value_counts().alias(\"different_neurotransmitters\"),\n", + " pl.col(\"nt_type\").n_unique().alias(\"num_different_neurotransmitters\"),\n", + " pl.col(\"syn_count\").sum().alias(\"num_synapses\"),\n", + ").filter(pl.col(\"num_different_neurotransmitters\") > 1).sort(\n", + " pl.col(\"num_different_neurotransmitters\"), descending=True\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "0ff833e3", + "metadata": {}, + "source": [ + "If we count the total number of synapses per neurotransmitter type, things look kind of messy. We even have a lot of neurons that have multiple of acetylcholine, glutamate and GABA, which shouldn't really be possible? Maybe it's just mislabelling? Would need to check the [neurotransmitter paper](https://doi.org/10.1016/j.cell.2024.03.016)...\n", + "\n", + "I think it could be important to know that a neuron which primarily uses a fast acting neurotransmitter can also have neuromodulatory connections, and it would be nice to keep these." + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "1824ca1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (12_285, 9)
start_bidend_bidnum_connections_per_ntACHGLUTGABASERDAOCT
i64i64list[struct[2]]i64i64i64i64i64i64
720575940631763909720575940613096089[{"GABA",1}, {"GLUT",1}, … {"SER",1}]33565nullnull
720575940641501648720575940621268651[{"ACH",1}, {"SER",1}, … {"GLUT",1}]58395nullnull
720575940623317321720575940639278399[{"GLUT",1}, {"GABA",1}, … {"ACH",1}]115386null6null
720575940630911991720575940625310014[{"GLUT",1}, {"GABA",1}, {"ACH",1}]7117nullnullnull
720575940622263066720575940620543025[{"GABA",1}, {"GLUT",1}, {"ACH",1}]81331nullnullnull
720575940633356883720575940636316023[{"GLUT",1}, {"GABA",1}]null6523nullnullnull
720575940639071977720575940620818017[{"GABA",1}, {"ACH",1}]7null6nullnullnull
720575940630026745720575940617190361[{"GLUT",1}, {"GABA",1}]null2616nullnullnull
720575940626995880720575940629010356[{"ACH",1}, {"GABA",1}]23null25nullnullnull
720575940623433725720575940604088288[{"ACH",1}, {"SER",1}]36nullnull5nullnull
" + ], + "text/plain": [ + "shape: (12_285, 9)\n", + "┌───────────────────┬───────────────────┬───────────────────┬──────┬───┬──────┬──────┬──────┬──────┐\n", + "│ start_bid ┆ end_bid ┆ num_connections_p ┆ ACH ┆ … ┆ GABA ┆ SER ┆ DA ┆ OCT │\n", + "│ --- ┆ --- ┆ er_nt ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │\n", + "│ i64 ┆ i64 ┆ --- ┆ i64 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │\n", + "│ ┆ ┆ list[struct[2]] ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "╞═══════════════════╪═══════════════════╪═══════════════════╪══════╪═══╪══════╪══════╪══════╪══════╡\n", + "│ 72057594063176390 ┆ 72057594061309608 ┆ [{\"GABA\",1}, ┆ 33 ┆ … ┆ 6 ┆ 5 ┆ null ┆ null │\n", + "│ 9 ┆ 9 ┆ {\"GLUT\",1}, … ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ {\"S… ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594064150164 ┆ 72057594062126865 ┆ [{\"ACH\",1}, ┆ 5 ┆ … ┆ 39 ┆ 5 ┆ null ┆ null │\n", + "│ 8 ┆ 1 ┆ {\"SER\",1}, … ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ {\"GLU… ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594062331732 ┆ 72057594063927839 ┆ [{\"GLUT\",1}, ┆ 11 ┆ … ┆ 86 ┆ null ┆ 6 ┆ null │\n", + "│ 1 ┆ 9 ┆ {\"GABA\",1}, … ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ {\"A… ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594063091199 ┆ 72057594062531001 ┆ [{\"GLUT\",1}, ┆ 7 ┆ … ┆ 7 ┆ null ┆ null ┆ null │\n", + "│ 1 ┆ 4 ┆ {\"GABA\",1}, ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ {\"ACH… ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594062226306 ┆ 72057594062054302 ┆ [{\"GABA\",1}, ┆ 8 ┆ … ┆ 31 ┆ null ┆ null ┆ null │\n", + "│ 6 ┆ 5 ┆ {\"GLUT\",1}, ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ ┆ ┆ {\"ACH… ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", + "│ 72057594063335688 ┆ 72057594063631602 ┆ [{\"GLUT\",1}, ┆ null ┆ … ┆ 23 ┆ null ┆ null ┆ null │\n", + "│ 3 ┆ 3 ┆ {\"GABA\",1}] ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594063907197 ┆ 72057594062081801 ┆ [{\"GABA\",1}, ┆ 7 ┆ … ┆ 6 ┆ null ┆ null ┆ null │\n", + "│ 7 ┆ 7 ┆ {\"ACH\",1}] ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594063002674 ┆ 72057594061719036 ┆ [{\"GLUT\",1}, ┆ null ┆ … ┆ 16 ┆ null ┆ null ┆ null │\n", + "│ 5 ┆ 1 ┆ {\"GABA\",1}] ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594062699588 ┆ 72057594062901035 ┆ [{\"ACH\",1}, ┆ 23 ┆ … ┆ 25 ┆ null ┆ null ┆ null │\n", + "│ 0 ┆ 6 ┆ {\"GABA\",1}] ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "│ 72057594062343372 ┆ 72057594060408828 ┆ [{\"ACH\",1}, ┆ 36 ┆ … ┆ null ┆ 5 ┆ null ┆ null │\n", + "│ 5 ┆ 8 ┆ {\"SER\",1}] ┆ ┆ ┆ ┆ ┆ ┆ │\n", + "└───────────────────┴───────────────────┴───────────────────┴──────┴───┴──────┴──────┴──────┴──────┘" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connections_table.group_by([\"start_bid\", \"end_bid\", \"nt_type\"]).agg(\n", + " pl.col(\"syn_count\").sum().alias(\"num_nt_synapses\"),\n", + ").group_by([\"start_bid\", \"end_bid\"]).agg(\n", + " pl.struct(\"nt_type\", \"num_nt_synapses\").alias(\"num_synapses_per_nt\"),\n", + " pl.col(\"nt_type\").value_counts().alias(\"num_connections_per_nt\"),\n", + ").filter(pl.col(\"num_synapses_per_nt\").list.len() > 1).explode(\n", + " \"num_synapses_per_nt\"\n", + ").unnest(\"num_synapses_per_nt\").pivot(\n", + " on=\"nt_type\",\n", + " index=[\"start_bid\", \"end_bid\", \"num_connections_per_nt\"],\n", + " values=\"num_nt_synapses\",\n", + ").sort(pl.col(\"num_connections_per_nt\").list.len(), descending=True)\n" + ] + }, + { + "cell_type": "markdown", + "id": "c95ff1ba", + "metadata": {}, + "source": [ + "## What can we do?\n", + "\n", + "If all the neurotransmitter types for the same neuron pair were the same we could just aggregate the synapse counts, but this isn't the case...\n", + "\n", + "One easy thing we can do that maybe works is to use the networkx `MultiDiGraph` - a directed graph that can have multiple edges between pairs of nodes. Maybe this works with some things and not others though?" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "d3d60a29", + "metadata": {}, + "outputs": [], + "source": [ + "import networkx as nx\n", + "\n", + "fafb_multigraph = nx.from_pandas_edgelist(\n", + " connections.connections,\n", + " source=\"start_uid\",\n", + " target=\"end_uid\",\n", + " edge_attr=[\n", + " \"syn_count\", # absolute synapse count\n", + " \"nt_type\",\n", + " \"eff_weight\", # signed synapse count (nt weighted)\n", + " \"syn_count_norm\", # input normalized synapse count\n", + " \"eff_weight_norm\", # input normalized signed synapse count\n", + " ],\n", + " create_using=nx.MultiDiGraph,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4ee1b8eb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "connections table has 2709829 connections with 31574890 synapses\n", + "connections graph has 2709829 connections with 31574890 synapses\n" + ] + } + ], + "source": [ + "print(\n", + " f\"connections table has {len(connections.connections)} connections with {connections.connections[\"syn_count\"].sum()} synapses\"\n", + " )\n", + "print(\n", + " f\"connections graph has {len(fafb_multigraph.edges)} connections with {sum(\n", + " fafb_multigraph.edges[e][\"syn_count\"] for e in fafb_multigraph.edges\n", + ")} synapses\"\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "7c2c16fe", + "metadata": {}, + "source": [ + "Whether this works for all the methods that operate on the graph, I'm not sure" + ] + }, + { + "cell_type": "markdown", + "id": "25bccb83", + "metadata": {}, + "source": [ + "Alternatively, we can reuse the existing functionality of splitting neurons to split each neuron with multiple different neurotransmitter outputs into \"virtual neurons\", each with only one type of neurotransmitter.\n", + "\n", + "Then everything still fits in a normal digraph, and we can still represent it as an adjacency matrix" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 1cf21a33e9b4ec9f2f06f81daf06aed30d8ebaf4 Mon Sep 17 00:00:00 2001 From: Dom Date: Thu, 24 Apr 2025 20:07:15 +0200 Subject: [PATCH 10/10] Add function to get neuron and synapse counts by neuropil --- tests/test_local/test_data_loading.py | 162 ++++++++++++++++++ vnc_networks/connectome_reader.py | 227 ++++++++++++++++++++++++++ 2 files changed, 389 insertions(+) diff --git a/tests/test_local/test_data_loading.py b/tests/test_local/test_data_loading.py index c9182d7..c107e32 100644 --- a/tests/test_local/test_data_loading.py +++ b/tests/test_local/test_data_loading.py @@ -88,3 +88,165 @@ def test_connections_getting_neuron_ids_MANCv1_2(self): assert ( set(connections.get_bodyids_from_uids(uids)) == set(body_ids) ), f"Getting uids and converting to bodyids doesn't match with selection_dict {selection_dict}." + + def test_getting_counts_by_neuropil_MANCv1_2(self): + """ + Test that we can get neuron and synapse counts by neuropil + """ + import pandas as pd + + import vnc_networks + + # Instantiate a Connections object + connectome_reader = vnc_networks.connectome_reader.MANC("v1.2") + + # check that this matches what we expect + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "downstream", [10000, 23458] + ), + pd.DataFrame( + { + "body_id": [10000, 23458], + "CV": [703, 0], + "IntTct": [313, 0], + "LTct": [3181, 0], + "LegNp(T3)(R)": [0, 1688], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "upstream", [10000, 23458] + ), + pd.DataFrame( + { + "body_id": [10000, 23458], + "CV": [224, 0], + "IntTct": [185, 0], + "LTct": [1462, 0], + "LegNp(T3)(R)": [0, 685], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil("pre", [10000, 23458]), + pd.DataFrame( + { + "body_id": [10000, 23458], + "CV": [138, 0], + "IntTct": [73, 0], + "LTct": [752, 0], + "LegNp(T3)(R)": [0, 207], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil("post", [10000, 23458]), + pd.DataFrame( + { + "body_id": [10000, 23458], + "CV": [224, 0], + "IntTct": [185, 0], + "LTct": [1462, 0], + "LegNp(T3)(R)": [0, 685], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "total_synapses", [10000, 23458] + ), + pd.DataFrame( + { + "body_id": [10000, 23458], + "CV": [927, 0], + "IntTct": [498, 0], + "LTct": [4643, 0], + "LegNp(T3)(R)": [0, 2373], + } + ), + ) + + def test_getting_counts_by_neuropil_FAFBv783(self): + """ + Test that we can get neuron and synapse counts by neuropil + """ + import pandas as pd + + import vnc_networks + + # Instantiate a Connections object + connectome_reader = vnc_networks.connectome_reader.FAFB_v783() + + # check that this matches what we expect + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "downstream", [720575940627036426, 720575940633587552] + ), + pd.DataFrame( + { + "body_id": [720575940627036426, 720575940633587552], + "LOP_L": [9, 0], + "LO_L": [2, 0], + "SLP_R": [0, 2], + "SMP_R": [0, 31], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "upstream", [720575940627036426, 720575940633587552] + ), + pd.DataFrame( + { + "body_id": [720575940627036426, 720575940633587552], + "LOP_L": [3, 0], + "LO_L": [9, 0], + "SLP_R": [0, 11], + "SMP_R": [0, 13], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "pre", [720575940627036426, 720575940633587552] + ), + pd.DataFrame( + { + "body_id": [720575940627036426, 720575940633587552], + "LOP_L": [14, 0], + "LO_L": [83, 0], + "SLP_R": [0, 33], + "SMP_R": [0, 75], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "post", [720575940627036426, 720575940633587552] + ), + pd.DataFrame( + { + "body_id": [720575940627036426, 720575940633587552], + "LOP_L": [100, 0], + "LO_L": [8, 0], + "SLP_R": [0, 4], + "SMP_R": [0, 284], + } + ), + ) + pd.testing.assert_frame_equal( + connectome_reader.get_synapse_counts_by_neuropil( + "total_synapses", [720575940627036426, 720575940633587552] + ), + pd.DataFrame( + { + "body_id": [720575940627036426, 720575940633587552], + "LOP_L": [114, 0], + "LO_L": [91, 0], + "SLP_R": [0, 37], + "SMP_R": [0, 359], + } + ), + ) \ No newline at end of file diff --git a/vnc_networks/connectome_reader.py b/vnc_networks/connectome_reader.py index 11d23bc..a15b174 100644 --- a/vnc_networks/connectome_reader.py +++ b/vnc_networks/connectome_reader.py @@ -7,6 +7,7 @@ data types to the specific ones of the connectome. """ +import ast import os import typing from abc import ABC, abstractmethod @@ -151,6 +152,35 @@ def get_synapse_neuropil( """ ... + @abstractmethod + def get_synapse_counts_by_neuropil( + self, + synapse_count_type: typing.Literal[ + "downstream", "upstream", "pre", "post", "total_synapses" + ], + body_id_subset: list[BodyId] | list[int] | None = None, + ) -> pd.DataFrame: + """ + Get neuron or synapse counts for each neuron in each neuropil + + Args: + synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get + * `"downstream"` number of downstream neurons + * `"upstream"` number of upstream neurons + * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons) + * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons) + * `"total_synapses"` total number of synapses, sum of pre and post + + body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons. + If None, return counts for all. Defaults to None. + + Returns: + pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id. + **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying + a small number of neurons for `body_id_subset`). + """ + ... + @abstractmethod def list_possible_attributes(self) -> list[str]: """ @@ -1152,6 +1182,34 @@ def get_synapse_neuropil( data.loc[data["synapse_id"].isin(synapses_in_roi), "neuropil"] = roi return data + def get_synapse_counts_by_neuropil( + self, + synapse_count_type: typing.Literal[ + "downstream", "upstream", "pre", "post", "total_synapses" + ], + body_id_subset: list[BodyId] | list[int] | None = None, + ) -> pd.DataFrame: + """ + Get neuron or synapse counts for each neuron in each neuropil + + Args: + synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get + * `"downstream"` number of downstream neurons + * `"upstream"` number of upstream neurons + * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons) + * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons) + * `"total_synapses"` total number of synapses, sum of pre and post + + body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons. + If None, return counts for all. Defaults to None. + + Returns: + pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id. + **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying + a small number of neurons for `body_id_subset`). + """ + raise NotImplementedError("Not sure how to get this for MANCv1.0 yet...") + class MANC_v_1_2(MANCReader): def __init__( @@ -1188,6 +1246,7 @@ def _load_specific_namefields(self): self._nb_pre_neurons = "upstream" self._nb_post_neurons = "downstream" self._root_side = "rootSide" + self._roi_info = "roiInfo" # Synapse specific self._syn_id = "synapse_id" @@ -1311,6 +1370,69 @@ def get_synapse_neuropil( synapses.columns = ["synapse_id", "neuropil"] return synapses + def get_synapse_counts_by_neuropil( + self, + synapse_count_type: typing.Literal[ + "downstream", "upstream", "pre", "post", "total_synapses" + ], + body_id_subset: list[BodyId] | list[int] | None = None, + ) -> pd.DataFrame: + """ + Get neuron or synapse counts for each neuron in each neuropil + + Args: + synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get + * `"downstream"` number of downstream neurons + * `"upstream"` number of upstream neurons + * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons) + * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons) + * `"total_synapses"` total number of synapses, sum of pre and post + + body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons. + If None, return counts for all. Defaults to None. + + Returns: + pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id. + **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying + a small number of neurons for `body_id_subset`). + """ + # the total synapses count is called "synweight" in MANC. Renamed it in the arguments so it's more intuitive + synapse_count_type_name = ( + "synweight" + if synapse_count_type == "total_synapses" + else synapse_count_type + ) + + roi_info_table = pd.read_feather( + self._nodes_file, [self._body_id, self._roi_info] + ) + if body_id_subset is not None: + # Note: this removes some ROIs from the columns... + roi_info_table = roi_info_table[ + roi_info_table[self._body_id].isin(body_id_subset) + ] + return ( + pd.DataFrame( + { + body_id: { + roi: ( + roi_connections[synapse_count_type_name] + if synapse_count_type_name in roi_connections + else 0 + ) + for roi, roi_connections in connections.items() + } + for body_id, connections in zip( + roi_info_table[self._body_id], + roi_info_table[self._roi_info].apply(ast.literal_eval).values, + ) + } + ) + .T.reset_index(names="body_id") + .fillna(0) + .astype(int) + ) + @typing.overload def MANC( @@ -1557,6 +1679,111 @@ def get_synapse_neuropil( For that, load "neuropil_synapse_table.csv"' ) + def get_synapse_counts_by_neuropil( + self, + synapse_count_type: typing.Literal[ + "downstream", "upstream", "pre", "post", "total_synapses" + ], + body_id_subset: list[BodyId] | list[int] | None = None, + ): + """ + Get neuron or synapse counts for each neuron in each neuropil + + Args: + synapse_count_type (typing.Literal[ "downstream", "upstream", "pre", "post", "total_synapses"]): which count to get + * `"downstream"` number of downstream neurons + * `"upstream"` number of upstream neurons + * `"pre"` number of presynaptic synapses (ie. synapses to upstream neurons) + * `"post"` number of postsynaptic synapses (ie. synapses to downstream neurons) + * `"total_synapses"` total number of synapses, sum of pre and post + + body_id_subset (list[BodyId] | list[int] | None, optional): Only return counts for a certain set of neurons. + If None, return counts for all. Defaults to None. + + Returns: + pd.DataFrame: a table [body_id, rois...] with the counts for each neuropil for each body_id. + **Note:** ROI columns won't be returned if no neurons have a count in that column (ie. if specifying + a small number of neurons for `body_id_subset`). + """ + connections_table = pd.read_csv( + self._connections_file, + ) + + if synapse_count_type == "total_synapses": + if body_id_subset is not None: + # Note: this removes some ROIs from the columns... + connections_table = connections_table[ + connections_table[self._start_bid].isin(body_id_subset) + | connections_table[self._end_bid].isin(body_id_subset) + ] + # separately get the pre and post synapse counts then sum them + synapse_counts = ( + pd.concat( + [ + connections_table[ + [neuron_body_id_we_care_about, "neuropil", "syn_count"] + ] + .groupby([neuron_body_id_we_care_about, "neuropil"]) + .sum() + .reset_index() + .pivot( + index=neuron_body_id_we_care_about, + columns="neuropil", + values="syn_count", + ) + .reset_index(names="body_id") + .fillna(0) + .astype(int) + for neuron_body_id_we_care_about in [ + self._start_bid, + self._end_bid, + ] + ] + ) + .groupby("body_id") + .sum() + .reset_index() + .rename_axis(None, axis=1) # otherwise the index has a name + ) + if body_id_subset is not None: + synapse_counts = synapse_counts[ + synapse_counts["body_id"].isin(body_id_subset) + ].reset_index(drop=True) + return synapse_counts + elif synapse_count_type in ["downstream", "post"]: + neuron_body_id_we_care_about = self._start_bid + else: + neuron_body_id_we_care_about = self._end_bid + + if body_id_subset is not None: + # Note: this removes some ROIs from the columns... + connections_table = connections_table[ + connections_table[neuron_body_id_we_care_about].isin(body_id_subset) + ] + + if synapse_count_type in ["downstream", "upstream"]: + aggregate_group_function = lambda group: group.count() + else: + aggregate_group_function = lambda group: group.sum() + + return ( + aggregate_group_function( + connections_table[ + [neuron_body_id_we_care_about, "neuropil", "syn_count"] + ].groupby([neuron_body_id_we_care_about, "neuropil"]) + ) + .reset_index() + .pivot( + index=neuron_body_id_we_care_about, + columns="neuropil", + values="syn_count", + ) + .reset_index(names="body_id") + .rename_axis(None, axis=1) # otherwise the index has a name + .fillna(0) + .astype(int) + ) + def sna(self, generic_n_a: NeuronAttribute) -> str: """ Converts the generic Neuron Attribute to the specific one.