Skip to content

Commit 9c541bc

Browse files
committed
Merge branch 'main' of github.com:sigven/pcgr
2 parents 027e687 + 395ec99 commit 9c541bc

28 files changed

+177
-51
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ PCGR originates from the [Norwegian Cancer Genomics Consortium (NCGC)](https://c
2828

2929
### Top News
3030

31+
- *July 15th 2025:* **2.2.2 release**
32+
- [CHANGELOG](https://sigven.github.io/pcgr/articles/CHANGELOG.html)
33+
3134
- *March 23rd 2025:* **2.2.1 release**
3235
- fix bug in CPSR for ClinVar variants with non-standard significance levels
3336

pcgr/annoutils.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ def assign_cds_exon_intron_annotations(csq_record, grantham_scores, logger):
230230
csq_record['CDS_CHANGE'] = '.'
231231
csq_record['HGVSp_short'] = '.'
232232
csq_record['PROTEIN_CHANGE'] = '.'
233-
csq_record['GRANTHAM_DISTANCE'] = '.'
233+
csq_record['GRANTHAM_DISTANCE'] = -1
234234
csq_record['ALTERATION'] = '.'
235235
csq_record['EXON_AFFECTED'] = '.'
236236
csq_record['CDS_RELATIVE_POSITION'] = '.'
@@ -264,8 +264,14 @@ def assign_cds_exon_intron_annotations(csq_record, grantham_scores, logger):
264264
csq_record['NULL_VARIANT'] = True
265265

266266
if not csq_record['MaxEntScan_diff'] is None and not csq_record['MaxEntScan_ref'] is None and not csq_record['MaxEntScan_alt'] is None:
267+
fraction_drop = 0.0
268+
#if float(csq_record['MaxEntScan_ref']) > 0:
269+
# fraction_drop = float(csq_record['MaxEntScan_diff']) / float(csq_record['MaxEntScan_ref']).round(4)
270+
#else:
271+
# fraction_drop = 0.0
267272
csq_record['MAXENTSCAN'] = 'MES|' + str(csq_record['MaxEntScan_diff']) + '|' + \
268-
str(csq_record['MaxEntScan_ref']) + '|' + str(csq_record['MaxEntScan_alt'])
273+
str(csq_record['MaxEntScan_ref']) + '|' + str(csq_record['MaxEntScan_alt']) #+ \
274+
#'|' + str(fraction_drop)
269275

270276
if re.search(pcgr_vars.CSQ_SPLICE_DONOR_PATTERN, str(csq_record['Consequence'])) is not None:
271277
if re.search(r'(\+3(A|G)>|\+4A>|\+5G>)', str(csq_record['HGVSc'])) is not None:
@@ -435,10 +441,14 @@ def assign_cds_exon_intron_annotations(csq_record, grantham_scores, logger):
435441
if 'Amino_acids' in csq_record.keys():
436442
if not csq_record['Amino_acids'] is None:
437443
if '/' in str(csq_record['Amino_acids']):
438-
key = str(csq_record['Amino_acids']).split('/')[0] + '_' + str(csq_record['Amino_acids']).split('/')[1]
439-
if key in grantham_scores.keys():
440-
#print('GRANTHAM\t' + str(key) + ' ' + str(grantham_scores[key]))
441-
csq_record['GRANTHAM_DISTANCE'] = grantham_scores[key]
444+
aaref = str(csq_record['Amino_acids']).split('/')[0]
445+
aalt = str(csq_record['Amino_acids']).split('/')[1]
446+
key = str(aaref) + '_' + str(aalt)
447+
## check cases for double aminio acid changes in 'Amino_acids', e.g. GQ/GY - only consider the second amino acid change
448+
if len(aaref) == 2 and len(aalt) == 2 and aaref[0] == aalt[0]:
449+
key = aaref[1] + '_' + aalt[1]
450+
if key in grantham_scores.keys():
451+
csq_record['GRANTHAM_DISTANCE'] = grantham_scores[key]
442452

443453

444454
csq_record['PROTEIN_CHANGE'] = protein_change_VEP

pcgr/cna.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def annotate_cna_segments(output_segment_gene_fname: str,
145145
## load copy-number biomarker evidence
146146

147147
cna_query_segment_df['segment_length_mb'] = \
148-
((cna_query_segment_df['segment_end'] - cna_query_segment_df['segment_start']) / 1e6).astype(float).round(5)
148+
((cna_query_segment_df['segment_end'] - cna_query_segment_df['segment_start']) / 1e6).astype(float).round(4)
149149

150150
biomarkers = {}
151151
cna_actionable_dict = {}
@@ -170,29 +170,32 @@ def annotate_cna_segments(output_segment_gene_fname: str,
170170

171171
## Mark copy number amplifications (threshold defined by user) in input
172172
cna_query_segment_df['aberration_key'] = 'nan'
173-
cna_query_segment_df['amp_cond'] = True
174-
cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] < n_copy_amplifications,"amp_cond"] = False
173+
cna_query_segment_df['amp_cond'] = False
175174
cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] >= n_copy_amplifications,"amp_cond"] = True
176175

177176
cna_query_segment_df.loc[cna_query_segment_df.amp_cond, 'aberration_key'] = \
178177
cna_query_segment_df.loc[cna_query_segment_df.amp_cond, 'entrezgene'].astype(str) + '_amplification'
179178

180179
## Mark homozygous deletions in input
181-
cna_query_segment_df['loss_cond'] = True
182-
cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] > 0,"loss_cond"] = False
183-
cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] == 0,"loss_cond"] = True
180+
cna_query_segment_df['homloss_cond'] = False
181+
cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] == 0,"homloss_cond"] = True
182+
183+
## Mark heterozygous deletions in input
184+
cna_query_segment_df['hetloss_cond'] = False
185+
cna_query_segment_df.loc[cna_query_segment_df['n_major'] + cna_query_segment_df['n_minor'] == 1,"hetloss_cond"] = True
184186

185187
cna_query_segment_df['variant_class'] = 'undefined'
186188
cna_query_segment_df.loc[cna_query_segment_df.amp_cond, 'variant_class'] = 'gain'
187-
cna_query_segment_df.loc[cna_query_segment_df.loss_cond, 'variant_class'] = 'homdel'
189+
cna_query_segment_df.loc[cna_query_segment_df.homloss_cond, 'variant_class'] = 'homdel'
190+
cna_query_segment_df.loc[cna_query_segment_df.hetloss_cond, 'variant_class'] = 'hetdel'
188191

189-
cna_query_segment_df.loc[cna_query_segment_df.loss_cond, 'aberration_key'] = \
190-
cna_query_segment_df.loc[cna_query_segment_df.loss_cond, 'entrezgene'].astype(str) + '_ablation'
192+
cna_query_segment_df.loc[cna_query_segment_df.homloss_cond, 'aberration_key'] = \
193+
cna_query_segment_df.loc[cna_query_segment_df.homloss_cond, 'entrezgene'].astype(str) + '_ablation'
191194

192195
## Append actionability evidence to input amplifications (column 'biomarker_match')
193196
cna_query_segment_df = cna_query_segment_df.merge(
194197
cna_actionable_df, left_on=["aberration_key"], right_on=["aberration_key"], how="left")
195-
cna_query_segment_df.drop(['amp_cond', 'loss_cond', 'aberration_key'], axis=1, inplace=True)
198+
cna_query_segment_df.drop(['amp_cond', 'hetloss_cond', 'homloss_cond','aberration_key'], axis=1, inplace=True)
196199
cna_query_segment_df.loc[cna_query_segment_df['biomarker_match'].isnull(),"biomarker_match"] = '.'
197200

198201
## remove all temporary files

pcgr/oncogenicity.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,12 @@ def load_oncogenic_variants(oncogenic_variants_fname: str, logger: Logger):
460460
gene = str(row['entrezgene'])
461461
if not 'oncogenic' in str(row['oncogenicity']).lower():
462462
continue
463-
oncogenic_variants[str(gene) + '-' + str(row['var_id'])] = row
463+
oncogenic_variants[str(gene) + '-' + str(row['var_id'])] = row
464+
if 'grantham_distance' in row.keys():
465+
if row['grantham_distance'] == '':
466+
row['grantham_distance'] = -1
467+
else:
468+
row['grantham_distance'] = float(row['grantham_distance'])
464469
if not len(row['hgvsp']) == 0:
465470
oncogenic_variants[str(gene) + '-' + str(row['hgvsp'])] = row
466471
if not len(row['hgvs_c']) == 0:
@@ -530,10 +535,11 @@ def match_oncogenic_variants(transcript_csq_elements, oncogenic_variants, rec, p
530535
if oncogenic_varkeys[oncogenic_varkey].startswith('by_codon'):
531536
grantham_distance = rec.INFO.get('GRANTHAM_DISTANCE')
532537
if not grantham_distance is None:
533-
if float(grantham_distance / float(oncogenic_variants[oncogenic_varkey]['grantham_distance'])) > 0.8:
534-
if not oncogenic_info in known_oncogenic_sites:
535-
known_oncogenic_sites[oncogenic_info] = []
536-
known_oncogenic_sites[oncogenic_info].append(oncogenic_varkeys[oncogenic_varkey])
538+
if grantham_distance > 0 and oncogenic_variants[oncogenic_varkey]['grantham_distance'] > 0:
539+
if float(grantham_distance / float(oncogenic_variants[oncogenic_varkey]['grantham_distance'])) >= 0.8:
540+
if not oncogenic_info in known_oncogenic_sites:
541+
known_oncogenic_sites[oncogenic_info] = []
542+
known_oncogenic_sites[oncogenic_info].append(oncogenic_varkeys[oncogenic_varkey])
537543
else:
538544
if not oncogenic_info in known_oncogenic_matches:
539545
known_oncogenic_matches[oncogenic_info] = []

pcgrr/R/cna.R

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ plot_cna_segments <- function(chrom_coordinates = NULL,
8686

8787

8888
## Identify segments that involve oncogene gain or
89-
## tumor suppressor loss
89+
## tumor suppressor loss (homozygous or heterozygous)
9090
onc_gain_tsg_loss <- cna_gene |>
9191
dplyr::select(
9292
c("CHROM", "SEGMENT_START", "SEGMENT_END",
@@ -97,9 +97,11 @@ plot_cna_segments <- function(chrom_coordinates = NULL,
9797
(.data$ONCOGENE == TRUE &
9898
.data$VARIANT_CLASS == "gain") |
9999
(.data$TUMOR_SUPPRESSOR == TRUE &
100-
.data$VARIANT_CLASS == "homdel"))
100+
(.data$VARIANT_CLASS == "homdel" |
101+
.data$VARIANT_CLASS == "hetdel")))
101102

102103
tsg_loss <- data.frame()
104+
tsg_het_loss <- data.frame()
103105
onc_gain <- data.frame()
104106

105107
## If there are oncogene gains or tumor suppressor losses,
@@ -146,6 +148,27 @@ plot_cna_segments <- function(chrom_coordinates = NULL,
146148
.groups = "drop")
147149
}
148150

151+
tsg_het_loss <- onc_gain_tsg_loss |>
152+
dplyr::filter(
153+
.data$TUMOR_SUPPRESSOR == TRUE &
154+
.data$VARIANT_CLASS == "hetdel")
155+
156+
## For now, if multiple TSGs are involved in a lost segment, we will only
157+
## show the top three in the plot (hover)
158+
if(NROW(tsg_het_loss) > 0){
159+
tsg_het_loss <- tsg_het_loss |>
160+
dplyr::arrange(
161+
dplyr::desc(.data$TUMOR_SUPPRESSOR_RANK)) |>
162+
dplyr::group_by(
163+
.data$CHROM,
164+
.data$SEGMENT_START,
165+
.data$SEGMENT_END) |>
166+
dplyr::summarise(
167+
TSG_HET_LOSS = paste(
168+
utils::head(.data$SYMBOL, 3), collapse = ", "),
169+
.groups = "drop")
170+
}
171+
149172
}
150173

151174
## Prepare data for plotting
@@ -199,13 +222,29 @@ plot_cna_segments <- function(chrom_coordinates = NULL,
199222
!is.na(.data$TSG_LOSS),
200223
paste0(
201224
.data$SegmentInfo,
202-
"<br> - Tumor suppressor loss: ",
225+
"<br> - Tumor suppressor loss (homozygous del): ",
203226
.data$TSG_LOSS),
204227
.data$SegmentInfo))
205228
}else{
206229
cna_segments_global$TSG_LOSS <- as.character(NA)
207230
}
208231

232+
if(NROW(tsg_het_loss) > 0){
233+
cna_segments_global <- cna_segments_global |>
234+
dplyr::left_join(
235+
tsg_het_loss,
236+
by = c("CHROM", "SEGMENT_START", "SEGMENT_END")
237+
) |>
238+
dplyr::mutate(SegmentInfo = dplyr::if_else(
239+
!is.na(.data$TSG_HET_LOSS),
240+
paste0(.data$SegmentInfo,
241+
"<br> - Tumor suppressor loss (heterozygous del): ",
242+
.data$TSG_HET_LOSS),
243+
.data$SegmentInfo))
244+
}else{
245+
cna_segments_global$TSG_HET_LOSS <- as.character(NA)
246+
}
247+
209248
if(NROW(onc_gain) > 0){
210249
cna_segments_global <- cna_segments_global |>
211250
dplyr::left_join(
@@ -379,10 +418,22 @@ get_oncogenic_cna_events <- function(cna_df_display = NULL){
379418
)
380419
)
381420

421+
tsgene_hetloss_variants <-
422+
dplyr::filter(
423+
cna_df_display,
424+
.data$TUMOR_SUPPRESSOR == TRUE &
425+
.data$VARIANT_CLASS == "hetdel") |>
426+
dplyr::select(
427+
dplyr::any_of(
428+
pcgrr::dt_display$cna_other_oncogenic
429+
)
430+
)
431+
382432
cna_oncogenic_events <-
383433
dplyr::bind_rows(
384434
oncogene_gain_variants,
385-
tsgene_loss_variants
435+
tsgene_loss_variants,
436+
tsgene_hetloss_variants
386437
) |>
387438
dplyr::select(
388439
dplyr::any_of(
@@ -394,6 +445,12 @@ get_oncogenic_cna_events <- function(cna_df_display = NULL){
394445
dplyr::desc(.data$GLOBAL_ASSOC_RANK),
395446
)
396447

448+
if("SEGMENT_LENGTH_MB" %in% colnames(cna_oncogenic_events)){
449+
cna_oncogenic_events <- cna_oncogenic_events |>
450+
dplyr::mutate(SEGMENT_LENGTH_MB = round(
451+
.data$SEGMENT_LENGTH_MB, digits = 2))
452+
}
453+
397454
return(cna_oncogenic_events)
398455

399456

pcgrr/R/input_data.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ load_somatic_cna <- function(
6565
)) |>
6666
tidyr::separate(
6767
col = "SEGMENT_NAME",
68-
into = c("SEGMENT_ID", "N_MAJOR","N_MINOR","ARM","CYTOBAND","EVENT_TYPE"),
68+
into = c("SEGMENT_ID", "N_MAJOR",
69+
"N_MINOR","ARM",
70+
"CYTOBAND","EVENT_TYPE"),
6971
sep = "\\|",
7072
remove = T
7173
) |>
@@ -90,7 +92,6 @@ load_somatic_cna <- function(
9092
variant_origin = "Somatic")
9193

9294
callset_cna[['segment']] <- segments
93-
9495
if (NROW(callset_cna$variant) > 0) {
9596
callset_cna[['variant']] <- callset_cna[['variant']] |>
9697
dplyr::mutate(CN_TOTAL =

pcgrr/R/report.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,11 @@ init_cna_vstats <- function() {
220220

221221
vstats <- list()
222222
for (t in c("n_tsg_loss",
223+
"n_tsg_hetloss",
223224
"n_oncogene_gain",
224225
"n_other_drugtarget_gain",
225226
"n_segments_loss",
227+
"n_segments_hetloss",
226228
"n_segments_gain",
227229
"n_actionable_tier1",
228230
"n_actionable_tier2")) {

pcgrr/R/utils.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,7 @@ variant_stats_report <- function(
661661
"VAR_ID" %in% colnames(callset$variant) &
662662
"VARIANT_CLASS" %in% colnames(callset$variant)){
663663
for (n in c("n_tsg_loss",
664+
"n_tsg_hetloss",
664665
"n_oncogene_gain",
665666
"n_other_drugtarget_gain")) {
666667
call_stats[[name]][[n]] <- 0
@@ -672,6 +673,13 @@ variant_stats_report <- function(
672673
.data$ACTIONABILITY_TIER == 3 &
673674
.data$VARIANT_CLASS == "homdel") |>
674675
nrow()
676+
call_stats[[name]][["n_tsg_hetloss"]] <-
677+
callset$variant |>
678+
dplyr::filter(
679+
!is.na(.data$ACTIONABILITY_TIER) &
680+
.data$ACTIONABILITY_TIER == 3 &
681+
.data$VARIANT_CLASS == "hetdel") |>
682+
nrow()
675683
call_stats[[name]][["n_oncogene_gain"]] <-
676684
callset$variant |>
677685
dplyr::filter(
@@ -711,6 +719,13 @@ variant_stats_report <- function(
711719
dplyr::select(.data$VAR_ID) |>
712720
dplyr::distinct() |>
713721
NROW()
722+
723+
call_stats[[name]][["n_segments_hetloss"]] <-
724+
callset$variant |>
725+
dplyr::filter(.data$VARIANT_CLASS == "hetdel") |>
726+
dplyr::select(.data$VAR_ID) |>
727+
dplyr::distinct() |>
728+
NROW()
714729
}
715730

716731
if(vartype == 'snv_indel' &

pcgrr/R/variant_classification.R

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ assign_amp_asco_tiers <- function(
173173
dplyr::mutate(AMP_ASCO_TIER_OTHER_VARS = dplyr::if_else(
174174
(!is.na(.data$TUMOR_SUPPRESSOR) &
175175
.data$TUMOR_SUPPRESSOR == TRUE &
176-
.data$VARIANT_CLASS == "homdel") |
176+
(.data$VARIANT_CLASS == "homdel" |
177+
.data$VARIANT_CLASS == "hetdel")) |
177178
(!is.na(.data$ONCOGENE) &
178179
.data$ONCOGENE == TRUE &
179180
.data$VARIANT_CLASS == "gain"),
@@ -226,7 +227,8 @@ assign_amp_asco_tiers <- function(
226227
dplyr::mutate(AMP_ASCO_TIER = dplyr::if_else(
227228
(!is.na(.data$TUMOR_SUPPRESSOR) &
228229
.data$TUMOR_SUPPRESSOR == TRUE &
229-
.data$VARIANT_CLASS == "homdel") |
230+
(.data$VARIANT_CLASS == "homdel" |
231+
.data$VARIANT_CLASS == "hetdel")) |
230232
(!is.na(.data$ONCOGENE) &
231233
.data$ONCOGENE == TRUE &
232234
.data$VARIANT_CLASS == "gain"),

pcgrr/data-raw/data-raw.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ for (c in c("pathogenicity",
101101
color_palette[[c]][["values"]] <- c("#9B3297", "#0073C2")
102102
}
103103
if (c == "cna_variant_class") {
104-
color_palette[[c]][["levels"]] <- c("gain", "homdel")
105-
color_palette[[c]][["values"]] <- c("#00a65a", "#CD534C")
104+
color_palette[[c]][["levels"]] <- c("gain", "hetdel", "homdel")
105+
color_palette[[c]][["values"]] <- c("#00a65a", "#E58B85", "#CD534C")
106106
}
107107
if (c == "warning") {
108108
color_palette[[c]] <- "#ff7518"

0 commit comments

Comments
 (0)