Skip to content

Commit e2ac458

Browse files
committed
Deployed bfe0b7c with MkDocs version: 1.6.1
1 parent 6bc4c34 commit e2ac458

File tree

17 files changed

+1556
-933
lines changed

17 files changed

+1556
-933
lines changed

404.html

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,7 @@
6767
</li>
6868

6969
<li>
70-
<a href="/user/illumination-correction/" class="dropdown-item">Illumination Correction</a>
71-
</li>
72-
73-
<li>
74-
<a href="/user/segmentation-check/" class="dropdown-item">Segmentation Check</a>
70+
<a href="/user/example-pipeline-cli/" class="dropdown-item">Example CLI Pipeline</a>
7571
</li>
7672
</ul>
7773
</li>

assets/create_starrynight_example.sh

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,123 +4,137 @@ PROJECT=XXXXXX
44
BATCH=XXXXXX
55

66
export S3_PATH="s3://${BUCKET}/projects/${PROJECT}/${BATCH}"
7+
export INPUT_DIR='./scratch/starrynight_example_input'
8+
export OUTPUT_BASELINE_DIR='./scratch/starrynight_example_output_baseline'
79

810
# Inputs
911

1012
## SBS images
1113

12-
parallel mkdir -p scratch/starrynight_example/Source1/Batch1/images/Plate1/20X_c{1}_SBS-{1}/ ::: 1 2 3 4 5 6 7 8 9 10
14+
parallel mkdir -p ${INPUT_DIR}/Source1/Batch1/images/Plate1/20X_c{1}_SBS-{1}/ ::: 1 2 3
1315

1416
parallel --match '.*' --match '(.*) (.*) (.*)' \
1517
aws s3 cp "${S3_PATH}/images/Plate1/20X_c{1}_SBS-{1}/Well{2.1}_Point{2.1}_{2.2}_ChannelC,A,T,G,DAPI_Seq{2.3}.ome.tiff" \
16-
"scratch/starrynight_example/Source1/Batch1/images/Plate1/20X_c{1}_SBS-{1}/" ::: \
18+
"${INPUT_DIR}/Source1/Batch1/images/Plate1/20X_c{1}_SBS-{1}/" ::: \
1719
1 2 3 ::: \
1820
"A1 0000 0000" "A1 0001 0001" "A2 0000 1025" "A2 0001 1026" "B1 0000 3075" "B1 0001 3076"
1921

2022
## Cell Painting images
2123

22-
mkdir -p scratch/starrynight_example/Source1/Batch1/images/20X_CP_Plate1_20240319_122800_179
24+
mkdir -p ${INPUT_DIR}/Source1/Batch1/images/20X_CP_Plate1_20240319_122800_179
2325

2426
parallel --match '(.*) (.*) (.*)' \
2527
aws s3 cp "${S3_PATH}/images/Plate1/20X_CP_Plate1_20240319_122800_179/Well{1.1}_Point{1.1}_{1.2}_ChannelPhalloAF750,ZO1-AF488,DAPI_Seq{1.3}.ome.tiff" \
26-
"scratch/starrynight_example/Source1/Batch1/images/Plate1/20X_CP_Plate1_20240319_122800_179/" ::: \
28+
"${INPUT_DIR}/Source1/Batch1/images/Plate1/20X_CP_Plate1_20240319_122800_179/" ::: \
2729
"A1 0000 0000" "A1 0001 0001" "A2 0000 1025" "A2 0001 1026" "B1 0000 3075" "B1 0001 3076"
2830

2931
# Outputs
3032

3133
## Illumination correction images
3234

33-
mkdir -p scratch/starrynight_example/Source1/Batch1/illum/Plate1
35+
mkdir -p ${OUTPUT_BASELINE_DIR}/Source1/Batch1/illum/Plate1
3436
parallel \
35-
aws s3 cp "${S3_PATH}/illum/Plate1/Plate1_Cycle{1}_Illum{2}.npy" "scratch/starrynight_example/Source1/Batch1/illum/Plate1/" ::: \
37+
aws s3 cp "${S3_PATH}/illum/Plate1/Plate1_Cycle{1}_Illum{2}.npy" "${OUTPUT_BASELINE_DIR}/Source1/Batch1/illum/Plate1/" ::: \
3638
1 2 3 ::: \
3739
DNA A T G C
3840

41+
parallel \
42+
aws s3 cp "${S3_PATH}/illum/Plate1/Plate1_Illum{1}.npy" "${OUTPUT_BASELINE_DIR}/Source1/Batch1/illum/Plate1/" ::: \
43+
DNA Phalloidin ZO1
3944

4045
## Cell Painting images: Illumination corrected
4146

4247
parallel \
4348
aws s3 cp "${S3_PATH}/images_corrected/painting/Plate1-Well{1}/Plate_Plate1_Well_Well{1}_Site_{2}_Corr{3}.tiff" \
44-
"scratch/starrynight_example/Source1/Batch1/images_corrected/painting/Plate1-Well{1}/" ::: \
49+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_corrected/painting/Plate1-Well{1}/" ::: \
4550
A1 A2 B1 ::: 0 1 ::: DNA Phalloidin ZO1
4651

47-
4852
parallel \
4953
aws s3 cp "${S3_PATH}/images_corrected/painting/Plate1-Well{1}/PaintingIllumApplication_{2}.csv" \
50-
"scratch/starrynight_example/Source1/Batch1/images_corrected/painting/Plate1-Well{1}/" ::: \
54+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_corrected/painting/Plate1-Well{1}/" ::: \
5155
A1 A2 B1 ::: Cells ConfluentRegions Experiment Image Nuclei
5256

5357
# SBS images: Illumination aligned
5458

5559
parallel \
5660
aws s3 cp "${S3_PATH}/images_aligned/barcoding/Plate1-Well{1}-{2}/Plate_Plate1_Well_{1}_Site_{2}_Cycle0{3}_{4}.tiff" \
57-
"scratch/starrynight_example/Source1/Batch1/images_aligned/barcoding/Plate1-Well{1}-{2}/" ::: \
61+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_aligned/barcoding/Plate1-Well{1}-{2}/" ::: \
5862
A1 A2 B1 ::: 0 1 ::: 1 2 3 ::: A T G C DAPI
5963

6064

6165
parallel \
6266
aws s3 cp "${S3_PATH}/images_aligned/barcoding/Plate1-Well{1}-{2}/BarcodingApplication_{3}.csv" \
63-
"scratch/starrynight_example/Source1/Batch1/images_aligned/barcoding/Plate1-Well{1}-{2}/" ::: \
67+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_aligned/barcoding/Plate1-Well{1}-{2}/" ::: \
6468
A1 A2 B1 ::: 0 1 ::: Experiment Image
6569

6670
## SBS images: Illumination corrected
6771

6872
parallel \
6973
aws s3 cp "${S3_PATH}/images_corrected/barcoding/Plate1-Well{1}-{2}/Plate_Plate1_Well_{1}_Site_{2}_Cycle0{3}_{4}.tiff" \
70-
"scratch/starrynight_example/Source1/Batch1/images_corrected/barcoding/Plate1-Well{1}-{2}/" ::: \
74+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_corrected/barcoding/Plate1-Well{1}-{2}/" ::: \
7175
A1 A2 B1 ::: 0 1 ::: 1 2 3 ::: A T G C
7276

7377
parallel \
7478
aws s3 cp "${S3_PATH}/images_corrected/barcoding/Plate1-Well{1}-{2}/Plate_Plate1_Well_{1}_Site_{2}_Cycle0{3}_{4}.tiff" \
75-
"scratch/starrynight_example/Source1/Batch1/images_corrected/barcoding/Plate1-Well{1}-{2}/" ::: \
79+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_corrected/barcoding/Plate1-Well{1}-{2}/" ::: \
7680
A1 A2 B1 ::: 0 1 ::: 1 ::: DAPI
77-
# DAPI is only present in the first cycle
81+
# DAPI is present only in the first cycle
7882

7983

8084
parallel \
8185
aws s3 cp "${S3_PATH}/images_corrected/barcoding/Plate1-Well{1}-{2}/BarcodePreprocessing_{3}.csv" \
82-
"scratch/starrynight_example/Source1/Batch1/images_corrected/barcoding/Plate1-Well{1}-{2}/" ::: \
86+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_corrected/barcoding/Plate1-Well{1}-{2}/" ::: \
8387
A1 A2 B1 ::: 0 1 ::: BarcodeFoci PreFoci Experiment Image Nuclei
8488

8589
## Segmentation images
8690

8791
parallel \
8892
aws s3 cp "${S3_PATH}/images_segmentation/Plate1/Plate_Plate1_Well_Well{1}_Site_{2}_Corr{3}_SegmentCheck.png" \
89-
"scratch/starrynight_example/Source1/Batch1/images_segmentation/Plate1/" ::: \
93+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_segmentation/Plate1/" ::: \
9094
A1 A2 B1 ::: 0 ::: DNA
91-
# Notice the odd naming of Well
9295

9396
parallel \
9497
aws s3 cp "${S3_PATH}/images_segmentation/Plate1/Plate_Plate1_Well_Well{1}_Site_{2}_Corr{3}_SegmentCheck.png" \
95-
"scratch/starrynight_example/Source1/Batch1/images_segmentation/Plate1/" ::: \
98+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_segmentation/Plate1/" ::: \
9699
A1 A2 B1 ::: 0 ::: DNA
97100

98101
parallel \
99102
aws s3 cp "${S3_PATH}/images_segmentation/Plate1/SegmentationCheck_{1}.csv" \
100-
"scratch/starrynight_example/Source1/Batch1/images_segmentation/Plate1/" ::: \
103+
"${OUTPUT_BASELINE_DIR}/Source1/Batch1/images_segmentation/Plate1/" ::: \
101104
Experiment Image Nuclei Cells PreCells ConfluentRegions
102105

103-
104106
## Load Data CSVs
105107

106108
export S3_PATH_WORKSPACE="s3://${BUCKET}/projects/${PROJECT}/workspace"
107109

108110
aws s3 sync \
109111
"${S3_PATH_WORKSPACE}/load_data_csv/${BATCH}/Plate1/" \
110-
"scratch/starrynight_example/Source1/workspace_example/load_data_csv/Batch1/Plate1/"
111-
112+
"${OUTPUT_BASELINE_DIR}/Source1/workspace_example/load_data_csv/Batch1/Plate1/"
112113

113114
## Analysis CSVs
114115

115116
parallel \
116117
aws s3 sync \
117118
"${S3_PATH_WORKSPACE}/analysis/${BATCH}/Plate1-Well{1}-{2}/" \
118-
"scratch/starrynight_example/Source1/workspace_example/analysis/${BATCH}/Plate1-Well{1}-{2}/" \
119+
"${OUTPUT_BASELINE_DIR}/Source1/workspace_example/analysis/${BATCH}/Plate1-Well{1}-{2}/" \
119120
--exclude \""*.csv\"" ::: \
120121
A1 A2 B1 ::: 0 1
121122

122123

123124
parallel \
124-
aws s3 sync "${S3_PATH_WORKSPACE}/analysisfix/${BATCH}/Plate1-Well{1}-{2}/" \
125-
"scratch/starrynight_example/Source1/workspace_example/analysis/${BATCH}/Plate1-Well{1}-{2}/" ::: \
125+
aws s3 sync \
126+
"${S3_PATH_WORKSPACE}/analysisfix/${BATCH}/Plate1-Well{1}-{2}/" \
127+
"${OUTPUT_BASELINE_DIR}/Source1/workspace_example/analysis/${BATCH}/Plate1-Well{1}-{2}/" ::: \
126128
A1 A2 B1 ::: 0 1
129+
# Note that the analysis files are synced from two different locations: analysis and analysisfix.
130+
# analysisfix was a rerun of analysis
131+
132+
# Compress files to reduce disk usage after downloading
133+
echo "Compressing files to reduce disk usage..."
134+
135+
## Compress all TIFF files
136+
find ${OUTPUT_BASELINE_DIR} -type f -name "*.tiff" | parallel 'magick {} -compress jpeg -quality 80 {}'
137+
find ${INPUT_DIR} -type f -name "*.tiff" | parallel 'magick {} -compress jpeg -quality 80 {}'
138+
139+
# Compress CSV files
140+
find ${OUTPUT_BASELINE_DIR} -type f -name "*.csv" | parallel 'gzip -9 {}'

assets/parse_yaml.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import yaml
2+
import os
3+
import csv
4+
5+
6+
def get_file_size(path):
7+
"""Get file size in bytes or None if file doesn't exist."""
8+
try:
9+
return os.path.getsize(path) if os.path.exists(path) else None
10+
except Exception as e:
11+
print(f"Error getting file size for {path}: {e}")
12+
return None
13+
14+
15+
def read_csv_headers(file_path, max_headers=20):
16+
"""Read headers from a CSV file and return the first max_headers.
17+
If there are more than max_headers, the last item will indicate how many more columns exist."""
18+
try:
19+
with open(file_path, "r", newline="") as csvfile:
20+
reader = csv.reader(csvfile)
21+
headers = next(reader, [])
22+
23+
# If we have more headers than max_headers, add a summary entry
24+
if len(headers) > max_headers:
25+
remaining = len(headers) - (max_headers - 1)
26+
return headers[: max_headers - 1] + [f"{remaining} more columns"]
27+
else:
28+
return headers
29+
except Exception as e:
30+
print(f"Error reading CSV headers from {file_path}: {e}")
31+
return []
32+
33+
34+
def build_file_paths(yaml_data):
35+
"""Process YAML and return structure with full paths and file sizes."""
36+
result = {}
37+
38+
for section_name, section_data in yaml_data.items():
39+
result[section_name] = section_data.copy()
40+
base_path = section_data["path"]
41+
42+
if "files" in section_data:
43+
for set_name, folders in section_data["files"].items():
44+
result[section_name]["files"][set_name] = []
45+
46+
for folder_item in folders:
47+
folder_path = os.path.join(base_path, folder_item["folder"])
48+
processed_folder = {"folder": folder_item["folder"], "files": []}
49+
50+
for file_item in folder_item["files"]:
51+
if isinstance(file_item, dict):
52+
# Handle file type groups (csv, tiff, png)
53+
processed_types = {}
54+
for file_type, file_list in file_item.items():
55+
processed_types[file_type] = []
56+
for file_name in file_list:
57+
full_path = os.path.join(folder_path, file_name)
58+
file_info = {
59+
"path": full_path,
60+
"size": get_file_size(full_path),
61+
}
62+
63+
# Add headers for CSV files
64+
if (
65+
file_type.lower() == "csv"
66+
or file_name.lower().endswith(".csv")
67+
):
68+
file_info["headers"] = read_csv_headers(
69+
full_path
70+
)
71+
72+
processed_types[file_type].append(file_info)
73+
74+
processed_folder["files"].append(processed_types)
75+
else:
76+
# Handle direct file paths
77+
full_path = os.path.join(folder_path, file_item)
78+
file_info = {
79+
"path": full_path,
80+
"size": get_file_size(full_path),
81+
}
82+
83+
# Add headers for CSV files
84+
if file_item.lower().endswith(".csv"):
85+
file_info["headers"] = read_csv_headers(full_path)
86+
87+
processed_folder["files"].append(file_info)
88+
89+
result[section_name]["files"][set_name].append(processed_folder)
90+
91+
return result
92+
93+
94+
def main():
95+
# Parse YAML and build full paths with sizes
96+
with open("sample_files.yaml", "r") as f:
97+
yaml_data = yaml.safe_load(f)
98+
99+
processed_data = build_file_paths(yaml_data)
100+
101+
# Save processed data
102+
with open("sample_files_parsed.yaml", "w") as f:
103+
yaml.dump(processed_data, f, default_flow_style=False, sort_keys=False)
104+
105+
print("Processed YAML file has been saved to: sample_files_parsed.yaml")
106+
107+
108+
if __name__ == "__main__":
109+
main()

0 commit comments

Comments
 (0)