Skip to content

All-vs-all PAF adapter #4985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions packages/core/configuration/configurationSchema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,6 @@ function makeConfigurationSchemaModel<
completeModel = completeModel.postProcessSnapshot(snap => {
const newSnap: SnapshotOut<typeof completeModel> = {}
let matchesDefault = true
// let keyCount = 0
for (const [key, value] of Object.entries(snap)) {
if (matchesDefault) {
if (typeof defaultSnap[key] === 'object' && typeof value === 'object') {
Expand All @@ -221,14 +220,10 @@ function makeConfigurationSchemaModel<
!isEmptyObject(value) &&
!isEmptyArray(value)
) {
// keyCount += 1
newSnap[key] = value
}
}
if (matchesDefault) {
return {}
}
return newSnap
return matchesDefault ? {} : newSnap
})

if (options.preProcessSnapshot) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import { readConfObject } from '@jbrowse/core/configuration'
import { BaseFeatureDataAdapter } from '@jbrowse/core/data_adapters/BaseAdapter'
import { fetchAndMaybeUnzip } from '@jbrowse/core/util'
import { openLocation } from '@jbrowse/core/util/io'
import { doesIntersect2 } from '@jbrowse/core/util/range'
import { ObservableCreate } from '@jbrowse/core/util/rxjs'
import { MismatchParser } from '@jbrowse/plugin-alignments'

import SyntenyFeature from '../SyntenyFeature'
import {
flipCigar,
parseLineByLine,
parsePAFLine,
swapIndelCigar,
} from '../util'
import { getWeightedMeans } from './util'

import type { PAFRecord } from './util'
import type { AnyConfigurationModel } from '@jbrowse/core/configuration'
import type { BaseOptions } from '@jbrowse/core/data_adapters/BaseAdapter'
import type { Feature } from '@jbrowse/core/util'
import type { Region } from '@jbrowse/core/util/types'

const { parseCigar } = MismatchParser

interface PAFOptions extends BaseOptions {
config?: AnyConfigurationModel
}

export default class AllVsAllPAFAdapter extends BaseFeatureDataAdapter {
private setupP?: Promise<PAFRecord[]>

public static capabilities = ['getFeatures', 'getRefNames']

async setup(opts?: BaseOptions) {
if (!this.setupP) {
this.setupP = this.setupPre(opts).catch((e: unknown) => {
this.setupP = undefined
throw e
})
}
return this.setupP
}

async setupPre(opts?: BaseOptions) {
return parseLineByLine(
await fetchAndMaybeUnzip(
openLocation(this.getConf('pafLocation'), this.pluginManager),
opts,
),
parsePAFLine,
opts,
)
}

async hasDataForRefName() {
// determining this properly is basically a call to getFeatures so is not
// really that important, and has to be true or else getFeatures is never
// called (BaseAdapter filters it out)
return true
}

getAssemblyNames() {
return this.getConf('assemblyNames') as string[]
}

async getRefNames(opts: BaseOptions = {}) {
// @ts-expect-error
const r1 = opts.regions?.[0].assemblyName
const feats = await this.setup(opts)

const idx = this.getAssemblyNames().indexOf(r1)
if (idx !== -1) {
const set = new Set<string>()
for (const feat of feats) {
set.add(idx === 0 ? feat.qname : feat.tname)
}
return [...set]
}
console.warn('Unable to do ref renaming on adapter')
return []
}

getFeatures(query: Region, opts: PAFOptions = {}) {
return ObservableCreate<Feature>(async observer => {
let pafRecords = await this.setup(opts)

// note: this is not the adapter config, it is responding to a display
// setting passed in via the opts parameter
if (
opts.config &&
readConfObject(opts.config, 'colorBy') === 'meanQueryIdentity'
) {
pafRecords = getWeightedMeans(pafRecords)
}
const assemblyNames = this.getAssemblyNames()

// The index of the assembly name in the query list corresponds to the
// adapter in the subadapters list
const index = assemblyNames.indexOf(query.assemblyName)

// if the getFeatures::query is on the query assembly, flip orientation
// of data
const flip = index === 0
if (index === -1) {
console.warn(`${query.assemblyName} not found in this adapter`)
observer.complete()
}

const len = pafRecords.length
for (let i = 0; i < len; i++) {
const currentPafRecord = pafRecords[i]!
let start = 0
let end = 0
let refName = ''
let mateName = ''
let mateStart = 0
let mateEnd = 0

if (flip) {
start = currentPafRecord.qstart
end = currentPafRecord.qend
refName = currentPafRecord.qname
mateName = currentPafRecord.tname
mateStart = currentPafRecord.tstart
mateEnd = currentPafRecord.tend
} else {
start = currentPafRecord.tstart
end = currentPafRecord.tend
refName = currentPafRecord.tname
mateName = currentPafRecord.qname
mateStart = currentPafRecord.qstart
mateEnd = currentPafRecord.qend
}
const { extra, strand } = currentPafRecord
const refName2 = refName.replace(`${query.assemblyName}#1#`, '')
if (
refName2 === query.refName &&
doesIntersect2(query.start, query.end, start, end)
) {
const { numMatches = 0, blockLen = 1, cg, ...rest } = extra

let CIGAR = extra.cg
if (extra.cg) {
if (flip && strand === -1) {
CIGAR = flipCigar(parseCigar(extra.cg)).join('')
} else if (flip) {
CIGAR = swapIndelCigar(extra.cg)
}
}

observer.next(
new SyntenyFeature({
uniqueId: i + query.assemblyName,
assemblyName: query.assemblyName,
start,
end,
type: 'match',
refName: refName2,
strand,
...rest,
CIGAR,
syntenyId: i,
identity: numMatches / blockLen,
numMatches,
blockLen,
mate: {
start: mateStart,
end: mateEnd,
refName: mateName,
assemblyName: assemblyNames[+flip],
},
}),
)
}
}

observer.complete()
})
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { ConfigurationSchema } from '@jbrowse/core/configuration'

/**
* #config AllVsAllPAFAdapter
*/
function x() {} // eslint-disable-line @typescript-eslint/no-unused-vars

const AllVsAllPAFAdapter = ConfigurationSchema(
'AllVsAllPAFAdapter',
{
/**
* #slot
*/
assemblyNames: {
type: 'stringArray',
defaultValue: [],
description:
'Array of assembly names to use for this file. The query assembly name is the first value in the array, target assembly name is the second',
},
/**
* #slot
* can be optionally gzipped
*/
pafLocation: {
type: 'fileLocation',
defaultValue: {
uri: '/path/to/file.paf',
locationType: 'UriLocation',
},
},
},
{
explicitlyTyped: true,

/**
* #preProcessSnapshot
*
*
* preprocessor to allow minimal config:
* ```json
* {
* "type": "AllVsAllPAFAdapter",
* "uri": "file.paf.gz"
* }
* ```
*/
preProcessSnapshot: snap => {
return snap.uri
? {
...snap,
pafLocation: {
uri: snap.uri,
baseUri: snap.baseUri,
},
}
: snap
},
},
)

export default AllVsAllPAFAdapter
21 changes: 21 additions & 0 deletions plugins/comparative-adapters/src/AllVsAllPAFAdapter/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import AdapterType from '@jbrowse/core/pluggableElementTypes/AdapterType'

import configSchema from './configSchema'

import type PluginManager from '@jbrowse/core/PluginManager'

export default function AllVsAllPAFAdapterF(pluginManager: PluginManager) {
pluginManager.addAdapterType(
() =>
new AdapterType({
name: 'AllVsAllPAFAdapter',
displayName: 'AllVsAllPAF adapter',
configSchema,
adapterMetadata: {
category: 'Synteny adapters',
},
getAdapterClass: () =>
import('./AllVsAllPAFAdapter').then(r => r.default),
}),
)
}
108 changes: 108 additions & 0 deletions plugins/comparative-adapters/src/AllVsAllPAFAdapter/util.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import { zip } from '../util'

export interface PAFRecord {
qname: string
qstart: number
qend: number
tname: string
tstart: number
tend: number
strand: number
extra: {
cg?: string
blockLen?: number
mappingQual?: number
numMatches?: number
meanScore?: number
}
}
// based on "weighted mean" method from https://github.com/tpoorten/dotPlotly
// License reproduced here
//
// MIT License

// Copyright (c) 2017 Tom Poorten

// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:

// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.

// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// Notes: in the weighted mean longer alignments factor in more heavily of all
// the fragments of a query vs the reference that it mapped to
//
// this uses a combined key query+'-'+ref to iteratively map all the alignments
// that match a particular ref from a particular query (so 1d array of what
// could be a 2d map)
//
// the result is a single number that says e.g. chr5 from human mapped to chr5
// on mouse with 0.8 quality, and that0.8 is then attached to all the pieces of
// chr5 on human that mapped to chr5 on mouse. if chr5 on human also more
// weakly mapped to chr6 on mouse, then it would have another value e.g. 0.6.
// this can show strong and weak levels of synteny, especially in polyploidy
// situations

export function getWeightedMeans(ret: PAFRecord[]) {
const scoreMap: Record<string, { quals: number[]; len: number[] }> = {}
for (const entry of ret) {
const query = entry.qname
const target = entry.tname
const key = `${query}-${target}`
if (!scoreMap[key]) {
scoreMap[key] = { quals: [], len: [] }
}
scoreMap[key].quals.push(entry.extra.mappingQual || 1)
scoreMap[key].len.push(entry.extra.blockLen || 1)
}

const meanScoreMap = Object.fromEntries(
Object.entries(scoreMap).map(([key, val]) => {
const vals = zip(val.quals, val.len)
return [key, weightedMean(vals)]
}),
)
for (const entry of ret) {
const query = entry.qname
const target = entry.tname
const key = `${query}-${target}`
entry.extra.meanScore = meanScoreMap[key]
}

let min = 10000
let max = 0
for (const entry of ret) {
min = Math.min(entry.extra.meanScore || 0, min)
max = Math.max(entry.extra.meanScore || 0, max)
}
for (const entry of ret) {
const b = entry.extra.meanScore || 0
entry.extra.meanScore = (b - min) / (max - min)
}

return ret
}

// https://gist.github.com/stekhn/a12ed417e91f90ecec14bcfa4c2ae16a
function weightedMean(tuples: [number, number][]) {
const [valueSum, weightSum] = tuples.reduce(
([valueSum, weightSum], [value, weight]) => [
valueSum + value * weight,
weightSum + weight,
],
[0, 0],
)
return valueSum / weightSum
}
Loading