fixing black version to be same as workflow

seanzhangkx8 · seanzhangkx8 · commit e1b6ac2aa42e · 2025-02-28T11:53:21.000-05:00
diff --git a/convokit/model/backendMapper.py b/convokit/model/backendMapper.py
@@ -66,7 +66,12 @@ def get_data(
 
     @abstractmethod
     def update_data(
-        self, component_type: str, component_id: str, property_name: str, new_value, index=None,
+        self,
+        component_type: str,
+        component_id: str,
+        property_name: str,
+        new_value,
+        index=None,
     ):
         """
         Set or update the property data for the component of type component_type
@@ -174,7 +179,12 @@ def get_data(
             return collection[component_id][property_name]
 
     def update_data(
-        self, component_type: str, component_id: str, property_name: str, new_value, index=None,
+        self,
+        component_type: str,
+        component_id: str,
+        property_name: str,
+        new_value,
+        index=None,
     ):
         collection = self.get_collection(component_type)
         # don't create new collections if the ID is not found; this is supposed to be handled in the
@@ -282,7 +292,12 @@ def get_data(
             return result
 
     def update_data(
-        self, component_type: str, component_id: str, property_name: str, new_value, index=None,
+        self,
+        component_type: str,
+        component_id: str,
+        property_name: str,
+        new_value,
+        index=None,
     ):
         data = self.get_data(component_type, component_id)
         if index is not None and index.get(property_name, None) == ["bin"]:
diff --git a/convokit/model/corpus_helpers.py b/convokit/model/corpus_helpers.py
@@ -272,21 +272,37 @@ def unpack_all_binary_data(
 ):
     # unpack binary data for utterances
     unpack_binary_data_for_utts(
-        utterances, filename, meta_index.utterances_index, exclude_utterance_meta, KeyMeta,
+        utterances,
+        filename,
+        meta_index.utterances_index,
+        exclude_utterance_meta,
+        KeyMeta,
     )
     # unpack binary data for speakers
     unpack_binary_data(
-        filename, speakers_data, meta_index.speakers_index, "speaker", exclude_speaker_meta,
+        filename,
+        speakers_data,
+        meta_index.speakers_index,
+        "speaker",
+        exclude_speaker_meta,
     )
 
     # unpack binary data for conversations
     unpack_binary_data(
-        filename, convos_data, meta_index.conversations_index, "convo", exclude_conversation_meta,
+        filename,
+        convos_data,
+        meta_index.conversations_index,
+        "convo",
+        exclude_conversation_meta,
     )
 
     # unpack binary data for overall corpus
     unpack_binary_data(
-        filename, meta, meta_index.overall_index, "overall", exclude_overall_meta,
+        filename,
+        meta,
+        meta_index.overall_index,
+        "overall",
+        exclude_overall_meta,
     )
 
 
diff --git a/convokit/model/speaker.py b/convokit/model/speaker.py
@@ -24,7 +24,12 @@ class Speaker(CorpusComponent):
     """
 
     def __init__(
-        self, owner=None, id: str = None, utts=None, convos=None, meta: Optional[Dict] = None,
+        self,
+        owner=None,
+        id: str = None,
+        utts=None,
+        convos=None,
+        meta: Optional[Dict] = None,
     ):
         super().__init__(obj_type="speaker", owner=owner, id=id, meta=meta)
         self.utterances = utts if utts is not None else dict()
diff --git a/convokit/redirection/config.py b/convokit/redirection/config.py
@@ -3,7 +3,9 @@
 import torch
 
 DEFAULT_BNB_CONFIG = BitsAndBytesConfig(
-    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16,
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16,
 )
 
 DEFAULT_LORA_CONFIG = LoraConfig(
diff --git a/convokit/redirection/gemmaLikelihoodModel.py b/convokit/redirection/gemmaLikelihoodModel.py
@@ -98,10 +98,16 @@ def _calculate_likelihood_prob(self, past_context, future_context):
         future_context = "\n\n".join(future_context)
 
         context_ids = self.tokenizer.encode(
-            past_context, truncation=True, max_length=self.max_length, return_tensors="pt",
+            past_context,
+            truncation=True,
+            max_length=self.max_length,
+            return_tensors="pt",
         )
         future_ids = self.tokenizer.encode(
-            future_context, truncation=True, max_length=self.max_length, return_tensors="pt",
+            future_context,
+            truncation=True,
+            max_length=self.max_length,
+            return_tensors="pt",
         )
         input_ids = torch.cat([context_ids, future_ids], dim=1)
         if input_ids.shape[1] > self.max_length:
diff --git a/convokit/redirection/preprocessing.py b/convokit/redirection/preprocessing.py
@@ -52,7 +52,10 @@ def get_chunk_dataset(tokenizer, convos, max_tokens=512, overlap_tokens=50):
     chunks = []
     for convo in convos:
         convo_chunks = chunk_text_with_overlap(
-            tokenizer, convo, max_tokens=max_tokens, overlap_tokens=overlap_tokens,
+            tokenizer,
+            convo,
+            max_tokens=max_tokens,
+            overlap_tokens=overlap_tokens,
         )
         chunks += convo_chunks
 
diff --git a/convokit/tests/general/fill_missing_convo_ids/fill_missing_convo_ids_helpers.py b/convokit/tests/general/fill_missing_convo_ids/fill_missing_convo_ids_helpers.py
@@ -7,27 +7,72 @@ def construct_missing_convo_ids_corpus() -> Corpus:
     # test broken convo where there are multiple conversation_ids
     corpus = Corpus(
         utterances=[
-            Utterance(id="0", reply_to=None, speaker=Speaker(id="alice"), timestamp=0,),
-            Utterance(id="1", reply_to="0", speaker=Speaker(id="bob"), timestamp=2,),
-            Utterance(id="2", reply_to="1", speaker=Speaker(id="charlie"), timestamp=1,),
-            Utterance(id="3", reply_to=None, speaker=Speaker(id="alice2"), timestamp=0,),
+            Utterance(
+                id="0",
+                reply_to=None,
+                speaker=Speaker(id="alice"),
+                timestamp=0,
+            ),
+            Utterance(
+                id="1",
+                reply_to="0",
+                speaker=Speaker(id="bob"),
+                timestamp=2,
+            ),
+            Utterance(
+                id="2",
+                reply_to="1",
+                speaker=Speaker(id="charlie"),
+                timestamp=1,
+            ),
+            Utterance(
+                id="3",
+                reply_to=None,
+                speaker=Speaker(id="alice2"),
+                timestamp=0,
+            ),
         ]
     )
     return corpus
 
 
 def get_new_utterances_without_convo_ids() -> List[Utterance]:
     return [
-        Utterance(id="a", reply_to=None, speaker=Speaker(id="alice"), timestamp=0,),
-        Utterance(id="b", reply_to="a", speaker=Speaker(id="bob"), timestamp=0,),
-        Utterance(id="c", reply_to=None, speaker=Speaker(id="bob"), timestamp=0,),
+        Utterance(
+            id="a",
+            reply_to=None,
+            speaker=Speaker(id="alice"),
+            timestamp=0,
+        ),
+        Utterance(
+            id="b",
+            reply_to="a",
+            speaker=Speaker(id="bob"),
+            timestamp=0,
+        ),
+        Utterance(
+            id="c",
+            reply_to=None,
+            speaker=Speaker(id="bob"),
+            timestamp=0,
+        ),
     ]
 
 
 def get_new_utterances_without_existing_convo_ids():
     # i.e. they belong to existing convos
     # one responds to root utt, the other responds to leaf utt
     return [
-        Utterance(id="z", reply_to="0", speaker=Speaker(id="alice"), timestamp=0,),
-        Utterance(id="zz", reply_to="2", speaker=Speaker(id="charlie"), timestamp=0,),
+        Utterance(
+            id="z",
+            reply_to="0",
+            speaker=Speaker(id="alice"),
+            timestamp=0,
+        ),
+        Utterance(
+            id="zz",
+            reply_to="2",
+            speaker=Speaker(id="charlie"),
+            timestamp=0,
+        ),
     ]
diff --git a/convokit/tests/general/merge_corpus/merge_corpus_helpers.py b/convokit/tests/general/merge_corpus/merge_corpus_helpers.py
@@ -43,8 +43,16 @@ def construct_non_overlapping_corpus():
 def construct_overlapping_corpus():
     return Corpus(
         utterances=[
-            Utterance(id="2", text="this is a test", speaker=Speaker(id="charlie"),),
-            Utterance(id="4", text="this is a sentence", speaker=Speaker(id="echo"),),
+            Utterance(
+                id="2",
+                text="this is a test",
+                speaker=Speaker(id="charlie"),
+            ),
+            Utterance(
+                id="4",
+                text="this is a sentence",
+                speaker=Speaker(id="echo"),
+            ),
             Utterance(id="5", text="goodbye", speaker=Speaker(id="foxtrot")),
         ]
     )
diff --git a/convokit/tests/general/traverse_convo/traverse_convo_helpers.py b/convokit/tests/general/traverse_convo/traverse_convo_helpers.py
@@ -93,31 +93,67 @@ def construct_tree_corpus():
                 timestamp=0,
             ),
             Utterance(
-                id="2", reply_to="0", conversation_id="0", speaker=Speaker(id="alice"), timestamp=2,
+                id="2",
+                reply_to="0",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=2,
             ),
             Utterance(
-                id="1", reply_to="0", conversation_id="0", speaker=Speaker(id="alice"), timestamp=1,
+                id="1",
+                reply_to="0",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=1,
             ),
             Utterance(
-                id="3", reply_to="0", conversation_id="0", speaker=Speaker(id="alice"), timestamp=3,
+                id="3",
+                reply_to="0",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=3,
             ),
             Utterance(
-                id="4", reply_to="1", conversation_id="0", speaker=Speaker(id="alice"), timestamp=4,
+                id="4",
+                reply_to="1",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=4,
             ),
             Utterance(
-                id="5", reply_to="1", conversation_id="0", speaker=Speaker(id="alice"), timestamp=5,
+                id="5",
+                reply_to="1",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=5,
             ),
             Utterance(
-                id="6", reply_to="1", conversation_id="0", speaker=Speaker(id="alice"), timestamp=6,
+                id="6",
+                reply_to="1",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=6,
             ),
             Utterance(
-                id="7", reply_to="2", conversation_id="0", speaker=Speaker(id="alice"), timestamp=4,
+                id="7",
+                reply_to="2",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=4,
             ),
             Utterance(
-                id="8", reply_to="2", conversation_id="0", speaker=Speaker(id="alice"), timestamp=5,
+                id="8",
+                reply_to="2",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=5,
             ),
             Utterance(
-                id="9", reply_to="3", conversation_id="0", speaker=Speaker(id="alice"), timestamp=4,
+                id="9",
+                reply_to="3",
+                conversation_id="0",
+                speaker=Speaker(id="alice"),
+                timestamp=4,
             ),
             Utterance(
                 id="10",

Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,9 @@`
`3`	`3`	`import torch`
`4`	`4`
`5`	`5`	`DEFAULT_BNB_CONFIG = BitsAndBytesConfig(`
`6`		`- load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16,`
	`6`	`+ load_in_4bit=True,`
	`7`	`+ bnb_4bit_quant_type="nf4",`
	`8`	`+ bnb_4bit_compute_dtype=torch.bfloat16,`
`7`	`9`	`)`
`8`	`10`
`9`	`11`	`DEFAULT_LORA_CONFIG = LoraConfig(`