Skip to content

Commit a671e51

Browse files
committed
Fix style issues.
1 parent 70a9eb1 commit a671e51

12 files changed

+16
-29
lines changed

openpmcvl/granular/README.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ When a subfigure is successfully detected and separated:
5555
When subfigure extraction fails:
5656
- `id`: Generated ID that would have been used
5757
- `source_fig_id`: ID of the original figure
58-
- `PMC_ID`: PMC ID of the source article
58+
- `PMC_ID`: PMC ID of the source article
5959
- `media_name`: Original filename
6060

6161
This script saves extracted subfigures as .jpg files in the target directory. Metadata for each subfigure is stored in separate JSONL files, with unique IDs that link back to the original figure-caption pairs in the source JSONL files.
@@ -106,4 +106,3 @@ The non biomedical subfigures will be removed. The following fields are added to
106106

107107
The outputs from steps 3 and 5 contain labeled subcaptions and labeled subfigures respectively. By matching these labels (e.g. "Subfigure-A"), we can create the final subfigure-subcaption pairs. Any cases where labels are missing or captions couldn't be split will be handled in subsequent steps. Refer to notebook for more details.
108108
<br><br>
109-

openpmcvl/granular/models/network.py

-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,6 @@ def forward(self, x):
141141

142142

143143
class ResNet(nn.Module):
144-
145144
def __init__(
146145
self,
147146
block,

openpmcvl/granular/models/subfigure_ocr.py

-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
class classifier:
1818
def __init__(self):
19-
2019
self.current_dir = os.path.dirname(os.path.abspath(__file__))
2120
configuration_file = os.path.join(
2221
self.current_dir, "..", "config", "yolov3_default_subfig.cfg"

openpmcvl/granular/models/transformer_module.py

-6
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
2828
self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
2929

3030
def forward(self, q, k, v, mask=None):
31-
3231
residual = q
3332

3433
q = self.layer_norm(q)
@@ -70,7 +69,6 @@ def __init__(self, temperature, attn_dropout=0.1):
7069
self.dropout = nn.Dropout(attn_dropout)
7170

7271
def forward(self, q, k, v, mask=None):
73-
7472
attn = torch.matmul(q / self.temperature, k.transpose(2, 3))
7573

7674
if mask is not None:
@@ -83,7 +81,6 @@ def forward(self, q, k, v, mask=None):
8381

8482

8583
class TransformerEncoderLayer(nn.Module):
86-
8784
def __init__(
8885
self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"
8986
):
@@ -104,7 +101,6 @@ def __init__(
104101
self.norm = nn.LayerNorm(d_model)
105102

106103
def forward(self, src):
107-
108104
q = k = src
109105
src = self.self_attn(q, k, src)[0]
110106

@@ -116,7 +112,6 @@ def forward(self, src):
116112

117113

118114
class TransformerDecoderLayer(nn.Module):
119-
120115
def __init__(
121116
self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"
122117
):
@@ -141,7 +136,6 @@ def __init__(
141136
self.norm = nn.LayerNorm(d_model)
142137

143138
def forward(self, tgt, memory):
144-
145139
tgt = self.cross_attn(tgt, memory, memory)[0]
146140

147141
tgt = self.self_attn(tgt, tgt, tgt)[0]

openpmcvl/granular/models/yolov3.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ class resblock(nn.Module):
4848
"""
4949

5050
def __init__(self, ch, nblocks=1, shortcut=True):
51-
5251
super().__init__()
5352
self.shortcut = shortcut
5453
self.module_list = nn.ModuleList()
@@ -194,8 +193,7 @@ def forward(self, x, targets=None):
194193
x = torch.cat((x, route_layers[0]), 1)
195194
if train:
196195
return sum(output)
197-
else:
198-
return torch.cat(output, 1)
196+
return torch.cat(output, 1)
199197

200198

201199
def create_yolov3img_modules(config_model, ignore_thre):
@@ -327,5 +325,4 @@ def forward(self, x, targets=None):
327325
x = torch.cat((x, route_layers[0]), 1)
328326
if train:
329327
return sum(output)
330-
else:
331-
return output
328+
return output

openpmcvl/granular/pipeline/align.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# sbatch openpmcvl/granular/pipeline/align.sh 0 1 2 3 4 5 6 7 8 9 10 11
2020

2121

22-
# Activate virtual environment
22+
# Activate virtual environment
2323
source $VENV_PATH/bin/activate
2424

2525
# Set working directory
@@ -44,6 +44,6 @@ for num in $JSONL_NUMBERS; do
4444
--root_dir "$PMC_ROOT" \
4545
--dataset_path "$input_file" \
4646
--save_path "$output_file"
47-
47+
4848
echo "Finished aligning ${num}"
4949
done

openpmcvl/granular/pipeline/classify.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,6 @@ for num in $JSONL_NUMBERS; do
4545
--output_file "$output_file" \
4646
--batch_size 256 \
4747
--num_workers 8 \
48-
48+
4949
echo "Finished classifying ${num}"
50-
done
50+
done

openpmcvl/granular/pipeline/preprocess.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,4 @@ stdbuf -oL -eL srun python3 openpmcvl/granular/pipeline/preprocess.py \
4949
--output_file $OUTPUT_FILE \
5050
--figure_root $FIGURE_ROOT \
5151
--keywords MRI fMRI CT CAT PET PET-MRI MEG EEG ultrasound X-ray Xray nuclear imaging tracer isotope scan positron EKG spectroscopy radiograph tomography endoscope endoscopy colonoscopy elastography ultrasonic ultrasonography echocardiogram endomicroscopy pancreatoscopy cholangioscopy enteroscopy retroscopy chromoendoscopy sigmoidoscopy cholangiography pancreatography cholangio-pancreatography esophagogastroduodenoscopy radiology pathology histopathology \
52-
2>&1 | tee -a %x-%j.out
52+
2>&1 | tee -a %x-%j.out

openpmcvl/granular/pipeline/subcaption.ipynb

+1-2
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
"\n",
106106
"def generate_jsonl(dataset, requests_file):\n",
107107
" \"\"\"Generate JSONL file with API requests.\n",
108-
" \n",
108+
"\n",
109109
" Args:\n",
110110
" dataset: List of metadata containing captions and IDs\n",
111111
" requests_file: Path to output requests JSONL file\n",
@@ -123,7 +123,6 @@
123123
"\n",
124124
" # Only process captions under 400 words\n",
125125
" if len(data[\"caption\"].split()) <= 400:\n",
126-
"\n",
127126
" # Generate API request for this caption\n",
128127
" request = generate_api_request(\n",
129128
" custom_id=f\"{data['id']}\",\n",

openpmcvl/granular/pipeline/subcaption.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111

1212
PROMPT = """
1313
Subfigure labels are letters referring to individual subfigures within a larger figure.
14-
Check if the caption contains explicit subfigure label.
15-
If not, output "NO" and end the generation.
16-
If yes, output "YES", then generate the subcaption of the subfigures according to the caption.
14+
Check if the caption contains explicit subfigure label.
15+
If not, output "NO" and end the generation.
16+
If yes, output "YES", then generate the subcaption of the subfigures according to the caption.
1717
The output should use the template:
1818
YES
1919
Subfigure-A: ...

openpmcvl/granular/pipeline/subcaption.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,6 @@ for num in $JSONL_NUMBERS; do
3939
--output-file "$PMC_ROOT/${num}_subcaptions.jsonl" \
4040
--max-tokens 500 \
4141
2>&1 | tee -a %x-%j.out
42-
42+
4343
echo "Finished processing ${num}"
44-
done
44+
done

openpmcvl/granular/pipeline/subfigure.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ for num in $JSONL_NUMBERS; do
3737
# Define the paths for the evaluation file and the record file
3838
eval_file="$PMC_ROOT/${num}_meta.jsonl"
3939
rcd_file="$PMC_ROOT/${num}_subfigures.jsonl"
40-
40+
4141
# Run the subfigure separation script
4242
stdbuf -oL -eL srun python3 openpmcvl/granular/pipeline/subfigure.py \
4343
--separation_model openpmcvl/granular/checkpoints/subfigure_detector.pth \
@@ -49,7 +49,7 @@ for num in $JSONL_NUMBERS; do
4949
--batch_size 128 \
5050
--num_workers 8 \
5151
--gpu 0
52-
52+
5353
# Print a message indicating the completion of processing for the current JSONL number
5454
echo "Finished processing ${num}"
5555
done

0 commit comments

Comments
 (0)