-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Below is some prototype code.
This convert_gt runs without error but is untested
"""
convert_gt(b::Bgen, T=Float32)
Imports dosage information and chr/pos/snpID/ref/alt into numeric arrays.
# Input
- `b`: a `Bgen` object
- `T`: Type for genotype array
# Output
- `G`: a `p × n` matrix of type `T`. Each column is a genotype
- `Gchr`: Vector of `String`s holding chromosome number for each variant
- `Gpos`: Vector of `Int` holding each variant's position
- `GsnpID`: Vector of `String`s holding variant ID for each variant
- `Gref`: Vector of `String`s holding reference allele for each variant
- `Galt`: Vector of `String`s holding alterante allele for each variant
"""
function convert_gt(b::Bgen, T=Float32)
n = n_samples(b)
p = n_variants(b)
# return arrays
G = Matrix{T}(undef, p, n)
Gchr = Vector{String}(undef, p)
Gpos = Vector{Int}(undef, p)
GsnpID = Vector{String}(undef, p)
Gref = Vector{String}(undef, p)
Galt = Vector{String}(undef, p)
# loop over each variant
i = 1
for v in iterator(b; from_bgen_start=true)
dose = minor_allele_dosage!(b, v; T=T)
copyto!(@view(G[i, :]), dose)
Gchr[i], Gpos[i], GsnpID[i], Gref[i], Galt[i] =
chrom(v), pos(v), rsid(v), major_allele(v), minor_allele(v)
i += 1
clear!(v)
end
return G, Gchr, Gpos, GsnpID, Gref, Galt
endThis convert_ht does not work due to major_allele and minor_allele not working.
"""
convert_ht(b::Bgen)
Import phased haplotypes as a `BitMatrix`, and store chr/pos/snpID/ref/alt.
# Input
- `b`: a `Bgen` object. Each variant must be phased and samples must be diploid
# Output
- `H`: a `p × 2n` matrix of type `T`. Each column is a haplotype.
- `Hchr`: Vector of `String`s holding chromosome number for each variant
- `Hpos`: Vector of `Int` holding each variant's position
- `HsnpID`: Vector of `String`s holding variant ID for each variant
- `Href`: Vector of `String`s holding reference allele for each variant
- `Halt`: Vector of `String`s holding alterante allele for each variant
"""
function convert_ht(b::Bgen)
n = 2n_samples(b)
p = n_variants(b)
# return arrays
H = BitMatrix(undef, p, n)
Hchr = Vector{String}(undef, p)
Hpos = Vector{Int}(undef, p)
HsnpID = Vector{String}(undef, p)
Href = Vector{String}(undef, p)
Halt = Vector{String}(undef, p)
# loop over each variant
i = 1
for v in iterator(b; from_bgen_start=true)
dose = probabilities!(b, v)
phased(v) || error("variant $(rsid(v)) at position $(pos(v)) not phased!")
for j in 1:n_samples(b)
Hi = @view(dose[:, j])
H[i, 2j - 1] = read_haplotype1(Hi)
H[i, 2j] = read_haplotype2(Hi)
end
Hchr[i], Hpos[i], HsnpID[i], Href[i], Halt[i] =
chrom(v), pos(v), rsid(v), major_allele(v), minor_allele(v)
i += 1
clear!(v)
end
return H, Hchr, Hpos, HsnpID, Href, Halt
end
read_haplotype1(Hi::AbstractVector) = Hi[2] ≥ 0.5 ? true : false
read_haplotype2(Hi::AbstractVector) = Hi[4] ≥ 0.5 ? true : falseDo you know of a dataset that is in both VCF and BGEN format? That would be great for testing. Also, I'm guessing probabilities! and minor_allele_dosage! are allocating for every variant. If so, probably we need to modify it so it accepts preallocated vectors.
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request