Skip to content

Commit 951a707

Browse files
authored
Merge pull request #311 from pachterlab/development
Development
2 parents 0391b45 + 5d25130 commit 951a707

File tree

16 files changed

+342
-239
lines changed

16 files changed

+342
-239
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
- name: Setup python
1313
uses: actions/setup-python@v1
1414
with:
15-
python-version: '3.9.22'
15+
python-version: '3.10.19'
1616
architecture: x64
1717
- name: Install dependencies
1818
run: pip install -r dev-requirements.txt
@@ -22,7 +22,7 @@ jobs:
2222
runs-on: ubuntu-latest
2323
strategy:
2424
matrix:
25-
python: [3.9.22, 3.10.17 ]
25+
python: [3.10.19 ]
2626
os: [ubuntu-20.04]
2727
name: Test on Python ${{ matrix.python }}
2828
steps:

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ instance/
6565

6666
# Mac stuff:
6767
.DS_Store
68+
*.swp
6869

6970
# Sphinx documentation
7071
docs/_build/
@@ -114,4 +115,4 @@ venv.bak/
114115
/.idea/
115116

116117
# Temp files
117-
/scratch/
118+
/scratch/

dev-requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
bumpversion==0.6.0
2-
coverage==5.2.1
2+
coverage==7.6.1
33
flake8==3.8.2
44
pytest==8.2.2
55
pytest-cov==5.0.0
@@ -8,5 +8,5 @@ sphinx>=3.3.1
88
sphinx-autoapi>=1.5.1
99
sphinx_rtd_theme>=0.5.0
1010
twine>=2.0.0
11-
wheel==0.38.1
11+
wheel==0.46.2
1212
yapf==0.30.0
55.6 KB
Binary file not shown.
63.7 KB
Binary file not shown.
108 KB
Binary file not shown.
109 KB
Binary file not shown.

kb_python/bins/linux/kallisto/license.txt

100755100644
File mode changed.

kb_python/count.py

Lines changed: 66 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -658,8 +658,12 @@ def bustools_whitelist(
658658

659659

660660
def matrix_to_cellranger(
661-
matrix_path: str, barcodes_path: str, genes_path: str, t2g_path: str,
662-
out_dir: str, gzip: bool = False
661+
matrix_path: str,
662+
barcodes_path: str,
663+
genes_path: str,
664+
t2g_path: str,
665+
out_dir: str,
666+
gzip: bool = False
663667
) -> Dict[str, str]:
664668
"""Convert bustools count matrix to cellranger-format matrix.
665669
@@ -1065,8 +1069,10 @@ def filter_with_bustools(
10651069
if cellranger:
10661070
if not tcc:
10671071
cr_result = matrix_to_cellranger(
1068-
count_result['mtx'], count_result['barcodes'],
1069-
count_result['genes'], t2g_path,
1072+
count_result['mtx'],
1073+
count_result['barcodes'],
1074+
count_result['genes'],
1075+
t2g_path,
10701076
os.path.join(counts_dir, CELLRANGER_DIR),
10711077
gzip=gzip
10721078
)
@@ -1290,7 +1296,7 @@ def count(
12901296
by_name: Aggregate counts by name instead of ID.
12911297
cellranger: Whether to convert the final count matrix into a
12921298
cellranger-compatible matrix, defaults to `False`
1293-
gzip: Whether to gzip compress cellranger output matrices,
1299+
gzip: Whether to gzip compress cellranger output matrices,
12941300
defaults to `False`
12951301
delete_bus: Whether to delete intermediate BUS files after successful count,
12961302
defaults to `False`
@@ -1649,8 +1655,10 @@ def update_results_with_suffix(current_results, new_results, suffix):
16491655
final_result = quant_result if quant else count_result
16501656
if cellranger:
16511657
cr_result = matrix_to_cellranger(
1652-
count_result['mtx'], count_result['barcodes'],
1653-
count_result['genes'], t2g_path,
1658+
count_result['mtx'],
1659+
count_result['barcodes'],
1660+
count_result['genes'],
1661+
t2g_path,
16541662
os.path.join(counts_dir, f'{CELLRANGER_DIR}{suffix}'),
16551663
gzip=gzip
16561664
)
@@ -1760,24 +1768,26 @@ def update_results_with_suffix(current_results, new_results, suffix):
17601768
if delete_bus:
17611769
logger.info('Deleting intermediate BUS files to save disk space')
17621770
bus_files_to_delete = []
1763-
1771+
17641772
# Collect all .bus files from results
17651773
if 'bus' in unfiltered_results:
17661774
bus_files_to_delete.append(unfiltered_results['bus'])
17671775
if 'bus_scs' in unfiltered_results:
17681776
bus_files_to_delete.append(unfiltered_results['bus_scs'])
1769-
1777+
17701778
# For smartseq3, delete suffix versions too
17711779
for suffix in ['', INTERNAL_SUFFIX, UMI_SUFFIX]:
17721780
if f'bus{suffix}' in unfiltered_results:
17731781
bus_files_to_delete.append(unfiltered_results[f'bus{suffix}'])
17741782
if f'bus_scs{suffix}' in unfiltered_results:
1775-
bus_files_to_delete.append(unfiltered_results[f'bus_scs{suffix}'])
1776-
1783+
bus_files_to_delete.append(
1784+
unfiltered_results[f'bus_scs{suffix}']
1785+
)
1786+
17771787
# Delete filtered bus if exists
17781788
if 'filtered' in results and 'bus_scs' in results['filtered']:
17791789
bus_files_to_delete.append(results['filtered']['bus_scs'])
1780-
1790+
17811791
# Delete each BUS file
17821792
for bus_file in bus_files_to_delete:
17831793
if bus_file and os.path.exists(bus_file):
@@ -1875,7 +1885,7 @@ def count_nac(
18751885
by_name: Aggregate counts by name instead of ID.
18761886
cellranger: Whether to convert the final count matrix into a
18771887
cellranger-compatible matrix, defaults to `False`
1878-
gzip: Whether to gzip compress cellranger output matrices,
1888+
gzip: Whether to gzip compress cellranger output matrices,
18791889
defaults to `False`
18801890
cellranger_style: Whether to organize output in CellRanger-style directories
18811891
(spliced/ and unspliced/ subdirectories), defaults to `False`
@@ -2181,13 +2191,19 @@ def update_results_with_suffix(current_results, new_results, suffix):
21812191
elif i == 1: # unprocessed/unspliced
21822192
cr_dir = os.path.join(counts_dir, 'unspliced')
21832193
else: # ambiguous
2184-
cr_dir = os.path.join(counts_dir, f'{CELLRANGER_DIR}_{prefix}{suffix}')
2194+
cr_dir = os.path.join(
2195+
counts_dir, f'{CELLRANGER_DIR}_{prefix}{suffix}'
2196+
)
21852197
else:
2186-
cr_dir = os.path.join(counts_dir, f'{CELLRANGER_DIR}_{prefix}{suffix}')
2187-
2198+
cr_dir = os.path.join(
2199+
counts_dir, f'{CELLRANGER_DIR}_{prefix}{suffix}'
2200+
)
2201+
21882202
cr_result = matrix_to_cellranger(
2189-
count_result[i]['mtx'], count_result[i]['barcodes'],
2190-
count_result[i]['genes'], t2g_path,
2203+
count_result[i]['mtx'],
2204+
count_result[i]['barcodes'],
2205+
count_result[i]['genes'],
2206+
t2g_path,
21912207
cr_dir,
21922208
gzip=gzip
21932209
)
@@ -2225,7 +2241,10 @@ def update_results_with_suffix(current_results, new_results, suffix):
22252241
update_results_with_suffix(prefix_results, res, suffix)
22262242
if cellranger:
22272243
cr_result = matrix_to_cellranger(
2228-
res['mtx'], res['barcodes'], res['genes'], t2g_path,
2244+
res['mtx'],
2245+
res['barcodes'],
2246+
res['genes'],
2247+
t2g_path,
22292248
os.path.join(
22302249
counts_dir, f'{CELLRANGER_DIR}_{prefix}{suffix}'
22312250
),
@@ -2352,17 +2371,28 @@ def update_results_with_suffix(current_results, new_results, suffix):
23522371
if cellranger_style:
23532372
# Create spliced/unspliced subdirectories for CellRanger style
23542373
if i == 0: # processed/spliced
2355-
cr_dir = os.path.join(filtered_counts_dir, 'spliced')
2374+
cr_dir = os.path.join(
2375+
filtered_counts_dir, 'spliced'
2376+
)
23562377
elif i == 1: # unprocessed/unspliced
2357-
cr_dir = os.path.join(filtered_counts_dir, 'unspliced')
2378+
cr_dir = os.path.join(
2379+
filtered_counts_dir, 'unspliced'
2380+
)
23582381
else: # ambiguous
2359-
cr_dir = os.path.join(filtered_counts_dir, f'{CELLRANGER_DIR}_{prefix}')
2382+
cr_dir = os.path.join(
2383+
filtered_counts_dir,
2384+
f'{CELLRANGER_DIR}_{prefix}'
2385+
)
23602386
else:
2361-
cr_dir = os.path.join(filtered_counts_dir, f'{CELLRANGER_DIR}_{prefix}')
2362-
2387+
cr_dir = os.path.join(
2388+
filtered_counts_dir, f'{CELLRANGER_DIR}_{prefix}'
2389+
)
2390+
23632391
cr_result = matrix_to_cellranger(
2364-
count_result[i]['mtx'], count_result[i]['barcodes'],
2365-
count_result[i]['genes'], t2g_path,
2392+
count_result[i]['mtx'],
2393+
count_result[i]['barcodes'],
2394+
count_result[i]['genes'],
2395+
t2g_path,
23662396
cr_dir,
23672397
gzip=gzip
23682398
)
@@ -2396,7 +2426,10 @@ def update_results_with_suffix(current_results, new_results, suffix):
23962426
filtered_results[prefix] = {}
23972427
if cellranger:
23982428
cr_result = matrix_to_cellranger(
2399-
res['mtx'], res['barcodes'], res['genes'], t2g_path,
2429+
res['mtx'],
2430+
res['barcodes'],
2431+
res['genes'],
2432+
t2g_path,
24002433
os.path.join(
24012434
filtered_counts_dir,
24022435
f'{CELLRANGER_DIR}_{prefix}'
@@ -2488,19 +2521,21 @@ def update_results_with_suffix(current_results, new_results, suffix):
24882521
if delete_bus:
24892522
logger.info('Deleting intermediate BUS files to save disk space')
24902523
bus_files_to_delete = []
2491-
2524+
24922525
# Collect all .bus files from results
24932526
prefixes = ['processed', 'unprocessed', 'ambiguous']
24942527
for prefix in prefixes:
24952528
if prefix in unfiltered_results:
24962529
for suffix in ['', INTERNAL_SUFFIX, UMI_SUFFIX]:
24972530
if f'bus{suffix}' in unfiltered_results[prefix]:
2498-
bus_files_to_delete.append(unfiltered_results[prefix][f'bus{suffix}'])
2499-
2531+
bus_files_to_delete.append(
2532+
unfiltered_results[prefix][f'bus{suffix}']
2533+
)
2534+
25002535
# Delete filtered bus files if they exist
25012536
if 'filtered' in results and 'bus_scs' in results['filtered']:
25022537
bus_files_to_delete.append(results['filtered']['bus_scs'])
2503-
2538+
25042539
# Delete each BUS file
25052540
for bus_file in bus_files_to_delete:
25062541
if bus_file and os.path.exists(bus_file):

kb_python/main.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ def parse_count(
408408
'Plots for TCC matrices have not yet been implemented. '
409409
'The HTML report will not contain any plots.'
410410
)
411-
# Note: We are currently not supporting --genomebam
411+
412412
if args.genomebam:
413413
parser.error('--genomebam is not currently supported')
414414
if args.genomebam and not args.gtf:
@@ -591,11 +591,11 @@ def parse_count(
591591
parser.error(
592592
f'Option `--aa` cannot be used with workflow {args.workflow}.'
593593
)
594-
594+
595595
# Auto-enable gzip and cellranger-style when --cellranger is used
596596
use_gzip = args.cellranger and not args.no_gzip or args.gzip
597597
use_cellranger_style = args.cellranger
598-
598+
599599
from .count import count_nac
600600
count_nac(
601601
args.i,
@@ -1462,7 +1462,10 @@ def setup_count_args(
14621462
)
14631463
parser_count.add_argument(
14641464
'--gzip',
1465-
help='Gzip compress output matrices (matrix.mtx.gz, barcodes.tsv.gz, genes.tsv.gz). Automatically enabled with --cellranger',
1465+
help=(
1466+
'Gzip compress output matrices (matrix.mtx.gz, barcodes.tsv.gz, genes.tsv.gz). '
1467+
'Automatically enabled with --cellranger. '
1468+
),
14661469
action='store_true'
14671470
)
14681471
parser_count.add_argument(
@@ -1472,7 +1475,9 @@ def setup_count_args(
14721475
)
14731476
parser_count.add_argument(
14741477
'--delete-bus',
1475-
help='Delete intermediate BUS files after successful count to save disk space',
1478+
help=(
1479+
'Delete intermediate BUS files after successful count to save disk space'
1480+
),
14761481
action='store_true'
14771482
)
14781483
parser_count.add_argument(

0 commit comments

Comments
 (0)