Skip to content

Commit 8a5b383

Browse files
committed
Add high level wrapper runMacTS
1 parent c76a45d commit 8a5b383

5 files changed

Lines changed: 1841 additions & 0 deletions

File tree

R/runMacTs.R

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,290 @@ finalizeInbredTs <- function(x, inbred = FALSE, ploidy = 2L) {
156156
out$stage <- "finalizeInbredTs"
157157
out
158158
}
159+
160+
#' Build runMacs-style MaCS Command for TS Workflow
161+
#'
162+
#' @param nInd Integer number of individuals.
163+
#' @param inbred Logical.
164+
#' @param species Character species preset name.
165+
#' @param split Optional split time in generations.
166+
#' @param ploidy Integer ploidy.
167+
#' @param manualCommand Optional user-provided MaCS command tail.
168+
#' @param manualGenLen Optional user-provided chromosome genetic length(s) in Morgan.
169+
#' @param nChr Integer number of chromosomes.
170+
#'
171+
#' @return List with `command`, `genLen`, and `seqLen`.
172+
#' @keywords internal
173+
#' @noRd
174+
.runMacTS_build_command <- function(nInd, inbred, species, split, ploidy,
175+
manualCommand, manualGenLen, nChr) {
176+
popSize <- ifelse(inbred, nInd, ploidy * nInd)
177+
if (!is.null(manualCommand)) {
178+
if (is.null(manualGenLen)) {
179+
stop("You must define manualGenLen when using manualCommand")
180+
}
181+
command <- paste0(popSize, " ", manualCommand, " -s ")
182+
genLen <- manualGenLen
183+
} else {
184+
species <- toupper(species)
185+
if (species == "GENERIC") {
186+
genLen <- 1.0
187+
Ne <- 100
188+
speciesParams <- "1E8 -t 1E-5 -r 4E-6"
189+
speciesHist <- "-eN 0.25 5.0 -eN 2.50 15.0 -eN 25.00 60.0 -eN 250.00 120.0 -eN 2500.00 1000.0"
190+
} else if (species == "CATTLE") {
191+
cattleChrSum <- 2.8e9
192+
cattleChrBp <- cattleChrSum / 30
193+
recRate <- 9.26e-9
194+
genLen <- recRate * cattleChrBp
195+
mutRate <- 9.4e-9
196+
Ne <- 90
197+
histNe <- c(120, 250, 350, 1000, 1500, 2000, 2500, 3500, 7000, 10000, 17000, 62000)
198+
histGen <- c(3, 6, 12, 18, 24, 154, 454, 654, 1754, 2354, 3354, 33154)
199+
speciesParams <- paste(c(round(cattleChrBp), "-t", mutRate * 4 * Ne, "-r", recRate * 4 * Ne),
200+
collapse = " ")
201+
histNe <- histNe / Ne
202+
histGen <- histGen / (4 * Ne)
203+
speciesHist <- NULL
204+
for (i in seq_len(length(histNe))) {
205+
speciesHist <- paste(speciesHist, "-eN", histGen[i], histNe[i])
206+
}
207+
} else if (species == "WHEAT") {
208+
genLen <- 1.43
209+
Ne <- 50
210+
speciesParams <- "8E8 -t 4E-7 -r 3.6E-7"
211+
speciesHist <- "-eN 0.03 1 -eN 0.05 2 -eN 0.10 4 -eN 0.15 6 -eN 0.20 8 -eN 0.25 10 -eN 0.30 12 -eN 0.35 14 -eN 0.40 16 -eN 0.45 18 -eN 0.50 20 -eN 1.00 40 -eN 2.00 60 -eN 3.00 80 -eN 4.00 100 -eN 5.00 120 -eN 10.00 140 -eN 20.00 160 -eN 30.00 180 -eN 40.00 200 -eN 50.00 240 -eN 100.00 320 -eN 200.00 400 -eN 300.00 480 -eN 400.00 560 -eN 500.00 640"
212+
} else if (species == "MAIZE") {
213+
genLen <- 2.0
214+
Ne <- 100
215+
speciesParams <- "2E8 -t 5E-6 -r 4E-6"
216+
speciesHist <- "-eN 0.03 1 -eN 0.05 2 -eN 0.10 4 -eN 0.15 6 -eN 0.20 8 -eN 0.25 10 -eN 0.30 12 -eN 0.35 14 -eN 0.40 16 -eN 0.45 18 -eN 0.50 20 -eN 2.00 40 -eN 3.00 60 -eN 4.00 80 -eN 5.00 100"
217+
} else {
218+
stop("No rules for species ", species)
219+
}
220+
if (is.null(split)) {
221+
splitI <- ""
222+
splitJ <- ""
223+
} else {
224+
stopifnot(popSize %% 2 == 0)
225+
splitI <- paste(" -I 2", popSize %/% 2, popSize %/% 2)
226+
splitJ <- paste(" -ej", split / (4 * Ne) + 0.000001, "2 1")
227+
}
228+
command <- paste0(popSize, " ", speciesParams, splitI, " ", speciesHist, splitJ, " -s ")
229+
}
230+
if (!is.null(manualGenLen)) {
231+
genLen <- manualGenLen
232+
}
233+
if (length(genLen) == 1L) {
234+
genLen <- rep(genLen, nChr)
235+
}
236+
if (length(genLen) != nChr) {
237+
stop("genLen must have length 1 or nChr")
238+
}
239+
tokens <- strsplit(command, "[,[:space:]]+", perl = TRUE)[[1L]]
240+
tokens <- tokens[nzchar(tokens)]
241+
if (length(tokens) < 2L) {
242+
stop("Failed to parse sequence length from command")
243+
}
244+
seqLen <- suppressWarnings(as.numeric(tokens[2L]))
245+
if (!is.finite(seqLen) || seqLen <= 0) {
246+
stop("Invalid sequence length parsed from command")
247+
}
248+
list(command = command, genLen = as.numeric(genLen), seqLen = seqLen)
249+
}
250+
251+
#' High-level TS wrapper parallel to runMacs
252+
#'
253+
#' @param nInd Integer number of individuals to simulate.
254+
#' @param nChr Integer number of chromosomes.
255+
#' @param segSites Optional site-count cap per chromosome (scalar or vector).
256+
#' @param inbred Logical.
257+
#' @param species Species preset used by `runMacs`.
258+
#' @param split Optional population split time in generations.
259+
#' @param ploidy Integer ploidy.
260+
#' @param manualCommand Optional MaCS command tail (advanced users).
261+
#' @param manualGenLen Optional genetic length(s) in Morgan.
262+
#' @param nThreads Optional thread count.
263+
#' @param mutationMode One of `"postTs"`, `"macs"`, `"none"`.
264+
#' @param usePhysicalPositions Logical; TS coordinates in bp if `TRUE`.
265+
#' @param Nref Optional reference `Ne` for time scaling.
266+
#' @param seed Optional integer vector (length 1 or `nChr`) for ancestry.
267+
#' @param mutSeed Optional integer vector (length 1 or `nChr`) for post-TS mutation.
268+
#' @param mutSeedOffset Integer offset used when deriving post-TS mutation seeds.
269+
#' @param siteSamplingSeed Integer seed for `asMapPop` site sampling.
270+
#' @param expandInbredTs Logical; whether to expand inbred TS sample leaves before conversion.
271+
#' @param returnTs Logical; return TS tables and metadata alongside `MapPop`.
272+
#'
273+
#' @return `MapPop` by default; otherwise a list with `pop`, `tables`, and metadata.
274+
#' @keywords internal
275+
#' @noRd
276+
runMacTS <- function(nInd, nChr = 1, segSites = NULL, inbred = FALSE,
277+
species = "GENERIC", split = NULL, ploidy = 2L,
278+
manualCommand = NULL, manualGenLen = NULL, nThreads = NULL,
279+
mutationMode = c("postTs", "macs", "none"),
280+
usePhysicalPositions = FALSE, Nref = NA_real_,
281+
seed = NULL, mutSeed = NULL, mutSeedOffset = 104729L,
282+
siteSamplingSeed = 42L, expandInbredTs = FALSE,
283+
returnTs = FALSE) {
284+
mutationMode <- match.arg(mutationMode)
285+
nInd <- as.integer(nInd)
286+
nChr <- as.integer(nChr)
287+
ploidy <- as.integer(ploidy)
288+
if (is.null(nThreads)) {
289+
nThreads <- getNumThreads()
290+
}
291+
nThreads <- as.integer(nThreads)
292+
if (nChr < nThreads) {
293+
nThreads <- nChr
294+
}
295+
if (nInd <= 0L || nChr <= 0L || ploidy <= 0L) {
296+
stop("nInd, nChr, and ploidy must be positive integers")
297+
}
298+
if (!is.null(segSites)) {
299+
segSites <- as.integer(segSites)
300+
if (length(segSites) == 1L) {
301+
segSites <- rep(segSites, nChr)
302+
}
303+
if (length(segSites) != nChr) {
304+
stop("segSites must have length 1 or nChr")
305+
}
306+
}
307+
308+
setup <- .runMacTS_build_command(
309+
nInd = nInd,
310+
inbred = inbred,
311+
species = species,
312+
split = split,
313+
ploidy = ploidy,
314+
manualCommand = manualCommand,
315+
manualGenLen = manualGenLen,
316+
nChr = nChr
317+
)
318+
args <- setup$command
319+
genLen <- setup$genLen
320+
seqLen <- setup$seqLen
321+
322+
if (is.null(seed)) {
323+
seed <- sample.int(n = 1e8, size = nChr)
324+
}
325+
seed <- as.integer(seed)
326+
if (length(seed) == 1L) {
327+
seed <- rep(seed, nChr)
328+
}
329+
if (length(seed) != nChr) {
330+
stop("seed must have length 1 or nChr")
331+
}
332+
333+
runOut <- NULL
334+
if (mutationMode == "macs") {
335+
runOut <- MaCSTS(
336+
args = args,
337+
nChr = nChr,
338+
inbred = inbred,
339+
ploidy = ploidy,
340+
nThreads = nThreads,
341+
seed = seed,
342+
usePhysicalPositions = usePhysicalPositions,
343+
useMacsMut = TRUE,
344+
Nref = Nref,
345+
expandInbredSamples = FALSE
346+
)
347+
} else {
348+
runOut <- simAnc(
349+
args = args,
350+
nChr = nChr,
351+
inbred = inbred,
352+
ploidy = ploidy,
353+
nThreads = nThreads,
354+
seed = seed,
355+
usePhysicalPositions = usePhysicalPositions,
356+
Nref = Nref
357+
)
358+
if (mutationMode == "postTs") {
359+
if (is.null(mutSeed)) {
360+
mutSeed <- as.integer(seed + as.integer(mutSeedOffset))
361+
}
362+
mutSeed <- as.integer(mutSeed)
363+
if (length(mutSeed) == 1L) {
364+
mutSeed <- rep(mutSeed, nChr)
365+
}
366+
if (length(mutSeed) != nChr) {
367+
stop("mutSeed must have length 1 or nChr")
368+
}
369+
timeScale <- if (!is.null(runOut$timeScale)) as.numeric(runOut$timeScale) else 1
370+
dThetaPost <- as.numeric(runOut$dTheta) / timeScale
371+
runOut <- simMut(runOut, dTheta = dThetaPost, seed = mutSeed)
372+
}
373+
}
374+
375+
if (isTRUE(expandInbredTs) && isTRUE(inbred) && ploidy > 1L) {
376+
runOut <- finalizeInbredTs(runOut, inbred = inbred, ploidy = ploidy)
377+
}
378+
379+
if (mutationMode == "none") {
380+
if (!isTRUE(returnTs)) {
381+
stop("mutationMode='none' produces ancestry-only TS with zero sites; set returnTs=TRUE or use mutationMode='postTs'/'macs'.")
382+
}
383+
return(list(
384+
pop = NULL,
385+
tables = runOut$tables,
386+
args = args,
387+
seed = seed,
388+
mutationMode = mutationMode,
389+
mutSeed = NA_integer_,
390+
usePhysicalPositions = usePhysicalPositions,
391+
timeScale = if (!is.null(runOut$timeScale)) runOut$timeScale else 1,
392+
Nref = if (!is.null(runOut$Nref)) runOut$Nref else NA_real_
393+
))
394+
}
395+
396+
siteCounts <- vapply(runOut$tables, function(tc_xptr) {
397+
as.integer(rtsk_table_collection_summary2(tc_xptr)$num_sites)
398+
}, integer(1))
399+
if (any(siteCounts <= 0L)) {
400+
badChr <- which(siteCounts <= 0L)
401+
stop("No segregating sites on chromosome(s): ",
402+
paste(badChr, collapse = ", "),
403+
". Increase mutation rate or inspect TS via returnTs=TRUE.")
404+
}
405+
406+
breaks <- if (usePhysicalPositions) {
407+
rep(list(c(0, seqLen)), nChr)
408+
} else {
409+
rep(list(c(0, 1)), nChr)
410+
}
411+
rates <- if (usePhysicalPositions) {
412+
lapply(genLen, function(g) c(g / seqLen))
413+
} else {
414+
lapply(genLen, function(g) c(g))
415+
}
416+
417+
popOut <- asMapPop(
418+
chr_info = list(
419+
tables = runOut$tables,
420+
breaks = breaks,
421+
rates = rates
422+
),
423+
ploidy = ploidy,
424+
inbred = inbred,
425+
segSites = segSites,
426+
site_sampling_seed = as.integer(siteSamplingSeed),
427+
nThreads = nThreads,
428+
returnMeta = FALSE
429+
)
430+
431+
if (!isTRUE(returnTs)) {
432+
return(popOut)
433+
}
434+
list(
435+
pop = popOut,
436+
tables = runOut$tables,
437+
args = args,
438+
seed = seed,
439+
mutationMode = mutationMode,
440+
mutSeed = if (!is.null(runOut$mutationSeed)) runOut$mutationSeed else NA_integer_,
441+
usePhysicalPositions = usePhysicalPositions,
442+
timeScale = if (!is.null(runOut$timeScale)) runOut$timeScale else 1,
443+
Nref = if (!is.null(runOut$Nref)) runOut$Nref else NA_real_
444+
)
445+
}

dev/testData/inbred_test.trees

688 KB
Binary file not shown.

0 commit comments

Comments
 (0)