Fix style issues.

Adibvafa · Adibvafa · commit a671e5190362 · 2025-01-18T12:24:51.000-05:00
diff --git a/openpmcvl/granular/README.md b/openpmcvl/granular/README.md
@@ -55,7 +55,7 @@ When a subfigure is successfully detected and separated:
 When subfigure extraction fails:
 - `id`: Generated ID that would have been used
 - `source_fig_id`: ID of the original figure
-- `PMC_ID`: PMC ID of the source article  
+- `PMC_ID`: PMC ID of the source article
 - `media_name`: Original filename
 
 This script saves extracted subfigures as .jpg files in the target directory. Metadata for each subfigure is stored in separate JSONL files, with unique IDs that link back to the original figure-caption pairs in the source JSONL files.
@@ -106,4 +106,3 @@ The non biomedical subfigures will be removed. The following fields are added to
 
 The outputs from steps 3 and 5 contain labeled subcaptions and labeled subfigures respectively. By matching these labels (e.g. "Subfigure-A"), we can create the final subfigure-subcaption pairs. Any cases where labels are missing or captions couldn't be split will be handled in subsequent steps. Refer to notebook for more details.
 <br><br>
-
diff --git a/openpmcvl/granular/models/network.py b/openpmcvl/granular/models/network.py
@@ -141,7 +141,6 @@ def forward(self, x):
 
 
 class ResNet(nn.Module):
-
     def __init__(
         self,
         block,
diff --git a/openpmcvl/granular/models/subfigure_ocr.py b/openpmcvl/granular/models/subfigure_ocr.py
@@ -16,7 +16,6 @@
 
 class classifier:
     def __init__(self):
-
         self.current_dir = os.path.dirname(os.path.abspath(__file__))
         configuration_file = os.path.join(
             self.current_dir, "..", "config", "yolov3_default_subfig.cfg"
diff --git a/openpmcvl/granular/models/transformer_module.py b/openpmcvl/granular/models/transformer_module.py
@@ -28,7 +28,6 @@ def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
         self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
 
     def forward(self, q, k, v, mask=None):
-
         residual = q
 
         q = self.layer_norm(q)
@@ -70,7 +69,6 @@ def __init__(self, temperature, attn_dropout=0.1):
         self.dropout = nn.Dropout(attn_dropout)
 
     def forward(self, q, k, v, mask=None):
-
         attn = torch.matmul(q / self.temperature, k.transpose(2, 3))
 
         if mask is not None:
@@ -83,7 +81,6 @@ def forward(self, q, k, v, mask=None):
 
 
 class TransformerEncoderLayer(nn.Module):
-
     def __init__(
         self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"
     ):
@@ -104,7 +101,6 @@ def __init__(
         self.norm = nn.LayerNorm(d_model)
 
     def forward(self, src):
-
         q = k = src
         src = self.self_attn(q, k, src)[0]
 
@@ -116,7 +112,6 @@ def forward(self, src):
 
 
 class TransformerDecoderLayer(nn.Module):
-
     def __init__(
         self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"
     ):
@@ -141,7 +136,6 @@ def __init__(
         self.norm = nn.LayerNorm(d_model)
 
     def forward(self, tgt, memory):
-
         tgt = self.cross_attn(tgt, memory, memory)[0]
 
         tgt = self.self_attn(tgt, tgt, tgt)[0]
diff --git a/openpmcvl/granular/models/yolov3.py b/openpmcvl/granular/models/yolov3.py
@@ -48,7 +48,6 @@ class resblock(nn.Module):
     """
 
     def __init__(self, ch, nblocks=1, shortcut=True):
-
         super().__init__()
         self.shortcut = shortcut
         self.module_list = nn.ModuleList()
@@ -194,8 +193,7 @@ def forward(self, x, targets=None):
                 x = torch.cat((x, route_layers[0]), 1)
         if train:
             return sum(output)
-        else:
-            return torch.cat(output, 1)
+        return torch.cat(output, 1)
 
 
 def create_yolov3img_modules(config_model, ignore_thre):
@@ -327,5 +325,4 @@ def forward(self, x, targets=None):
                 x = torch.cat((x, route_layers[0]), 1)
         if train:
             return sum(output)
-        else:
-            return output
+        return output
diff --git a/openpmcvl/granular/pipeline/align.sh b/openpmcvl/granular/pipeline/align.sh
@@ -19,7 +19,7 @@
 # sbatch openpmcvl/granular/pipeline/align.sh 0 1 2 3 4 5 6 7 8 9 10 11
 
 
-# Activate virtual environment 
+# Activate virtual environment
 source $VENV_PATH/bin/activate
 
 # Set working directory
@@ -44,6 +44,6 @@ for num in $JSONL_NUMBERS; do
         --root_dir "$PMC_ROOT" \
         --dataset_path "$input_file" \
         --save_path "$output_file"
-    
+
     echo "Finished aligning ${num}"
 done
diff --git a/openpmcvl/granular/pipeline/classify.sh b/openpmcvl/granular/pipeline/classify.sh
@@ -45,6 +45,6 @@ for num in $JSONL_NUMBERS; do
       --output_file "$output_file" \
       --batch_size 256 \
       --num_workers 8 \
-    
+
     echo "Finished classifying ${num}"
-done
+done
diff --git a/openpmcvl/granular/pipeline/preprocess.sh b/openpmcvl/granular/pipeline/preprocess.sh
@@ -49,4 +49,4 @@ stdbuf -oL -eL srun python3 openpmcvl/granular/pipeline/preprocess.py \
   --output_file $OUTPUT_FILE \
   --figure_root $FIGURE_ROOT \
   --keywords MRI fMRI CT CAT PET PET-MRI MEG EEG ultrasound X-ray Xray nuclear imaging tracer isotope scan positron EKG spectroscopy radiograph tomography endoscope endoscopy colonoscopy elastography ultrasonic ultrasonography echocardiogram endomicroscopy pancreatoscopy cholangioscopy enteroscopy retroscopy chromoendoscopy sigmoidoscopy cholangiography pancreatography cholangio-pancreatography esophagogastroduodenoscopy radiology pathology histopathology \
-  2>&1 | tee -a %x-%j.out
+  2>&1 | tee -a %x-%j.out
diff --git a/openpmcvl/granular/pipeline/subcaption.ipynb b/openpmcvl/granular/pipeline/subcaption.ipynb
@@ -105,7 +105,7 @@
     "\n",
     "def generate_jsonl(dataset, requests_file):\n",
     "    \"\"\"Generate JSONL file with API requests.\n",
-    "    \n",
+    "\n",
     "    Args:\n",
     "        dataset: List of metadata containing captions and IDs\n",
     "        requests_file: Path to output requests JSONL file\n",
@@ -123,7 +123,6 @@
     "\n",
     "            # Only process captions under 400 words\n",
     "            if len(data[\"caption\"].split()) <= 400:\n",
-    "\n",
     "                # Generate API request for this caption\n",
     "                request = generate_api_request(\n",
     "                    custom_id=f\"{data['id']}\",\n",
diff --git a/openpmcvl/granular/pipeline/subcaption.py b/openpmcvl/granular/pipeline/subcaption.py
@@ -11,9 +11,9 @@
 
 PROMPT = """
 Subfigure labels are letters referring to individual subfigures within a larger figure.
-Check if the caption contains explicit subfigure label. 
-If not, output "NO" and end the generation. 
-If yes, output "YES", then generate the subcaption of the subfigures according to the caption. 
+Check if the caption contains explicit subfigure label.
+If not, output "NO" and end the generation.
+If yes, output "YES", then generate the subcaption of the subfigures according to the caption.
 The output should use the template:
     YES
     Subfigure-A: ...
diff --git a/openpmcvl/granular/pipeline/subcaption.sh b/openpmcvl/granular/pipeline/subcaption.sh
@@ -39,6 +39,6 @@ for num in $JSONL_NUMBERS; do
       --output-file "$PMC_ROOT/${num}_subcaptions.jsonl" \
       --max-tokens 500 \
       2>&1 | tee -a %x-%j.out
-    
+
     echo "Finished processing ${num}"
-done
+done
diff --git a/openpmcvl/granular/pipeline/subfigure.sh b/openpmcvl/granular/pipeline/subfigure.sh
@@ -37,7 +37,7 @@ for num in $JSONL_NUMBERS; do
     # Define the paths for the evaluation file and the record file
     eval_file="$PMC_ROOT/${num}_meta.jsonl"
     rcd_file="$PMC_ROOT/${num}_subfigures.jsonl"
-    
+
     # Run the subfigure separation script
     stdbuf -oL -eL srun python3 openpmcvl/granular/pipeline/subfigure.py \
       --separation_model openpmcvl/granular/checkpoints/subfigure_detector.pth \
@@ -49,7 +49,7 @@ for num in $JSONL_NUMBERS; do
       --batch_size 128 \
       --num_workers 8 \
       --gpu 0
-    
+
     # Print a message indicating the completion of processing for the current JSONL number
     echo "Finished processing ${num}"
 done