@@ -156,3 +156,290 @@ finalizeInbredTs <- function(x, inbred = FALSE, ploidy = 2L) {
156156 out $ stage <- " finalizeInbredTs"
157157 out
158158}
159+
160+ # ' Build runMacs-style MaCS Command for TS Workflow
161+ # '
162+ # ' @param nInd Integer number of individuals.
163+ # ' @param inbred Logical.
164+ # ' @param species Character species preset name.
165+ # ' @param split Optional split time in generations.
166+ # ' @param ploidy Integer ploidy.
167+ # ' @param manualCommand Optional user-provided MaCS command tail.
168+ # ' @param manualGenLen Optional user-provided chromosome genetic length(s) in Morgan.
169+ # ' @param nChr Integer number of chromosomes.
170+ # '
171+ # ' @return List with `command`, `genLen`, and `seqLen`.
172+ # ' @keywords internal
173+ # ' @noRd
174+ .runMacTS_build_command <- function (nInd , inbred , species , split , ploidy ,
175+ manualCommand , manualGenLen , nChr ) {
176+ popSize <- ifelse(inbred , nInd , ploidy * nInd )
177+ if (! is.null(manualCommand )) {
178+ if (is.null(manualGenLen )) {
179+ stop(" You must define manualGenLen when using manualCommand" )
180+ }
181+ command <- paste0(popSize , " " , manualCommand , " -s " )
182+ genLen <- manualGenLen
183+ } else {
184+ species <- toupper(species )
185+ if (species == " GENERIC" ) {
186+ genLen <- 1.0
187+ Ne <- 100
188+ speciesParams <- " 1E8 -t 1E-5 -r 4E-6"
189+ speciesHist <- " -eN 0.25 5.0 -eN 2.50 15.0 -eN 25.00 60.0 -eN 250.00 120.0 -eN 2500.00 1000.0"
190+ } else if (species == " CATTLE" ) {
191+ cattleChrSum <- 2.8e9
192+ cattleChrBp <- cattleChrSum / 30
193+ recRate <- 9.26e-9
194+ genLen <- recRate * cattleChrBp
195+ mutRate <- 9.4e-9
196+ Ne <- 90
197+ histNe <- c(120 , 250 , 350 , 1000 , 1500 , 2000 , 2500 , 3500 , 7000 , 10000 , 17000 , 62000 )
198+ histGen <- c(3 , 6 , 12 , 18 , 24 , 154 , 454 , 654 , 1754 , 2354 , 3354 , 33154 )
199+ speciesParams <- paste(c(round(cattleChrBp ), " -t" , mutRate * 4 * Ne , " -r" , recRate * 4 * Ne ),
200+ collapse = " " )
201+ histNe <- histNe / Ne
202+ histGen <- histGen / (4 * Ne )
203+ speciesHist <- NULL
204+ for (i in seq_len(length(histNe ))) {
205+ speciesHist <- paste(speciesHist , " -eN" , histGen [i ], histNe [i ])
206+ }
207+ } else if (species == " WHEAT" ) {
208+ genLen <- 1.43
209+ Ne <- 50
210+ speciesParams <- " 8E8 -t 4E-7 -r 3.6E-7"
211+ speciesHist <- " -eN 0.03 1 -eN 0.05 2 -eN 0.10 4 -eN 0.15 6 -eN 0.20 8 -eN 0.25 10 -eN 0.30 12 -eN 0.35 14 -eN 0.40 16 -eN 0.45 18 -eN 0.50 20 -eN 1.00 40 -eN 2.00 60 -eN 3.00 80 -eN 4.00 100 -eN 5.00 120 -eN 10.00 140 -eN 20.00 160 -eN 30.00 180 -eN 40.00 200 -eN 50.00 240 -eN 100.00 320 -eN 200.00 400 -eN 300.00 480 -eN 400.00 560 -eN 500.00 640"
212+ } else if (species == " MAIZE" ) {
213+ genLen <- 2.0
214+ Ne <- 100
215+ speciesParams <- " 2E8 -t 5E-6 -r 4E-6"
216+ speciesHist <- " -eN 0.03 1 -eN 0.05 2 -eN 0.10 4 -eN 0.15 6 -eN 0.20 8 -eN 0.25 10 -eN 0.30 12 -eN 0.35 14 -eN 0.40 16 -eN 0.45 18 -eN 0.50 20 -eN 2.00 40 -eN 3.00 60 -eN 4.00 80 -eN 5.00 100"
217+ } else {
218+ stop(" No rules for species " , species )
219+ }
220+ if (is.null(split )) {
221+ splitI <- " "
222+ splitJ <- " "
223+ } else {
224+ stopifnot(popSize %% 2 == 0 )
225+ splitI <- paste(" -I 2" , popSize %/% 2 , popSize %/% 2 )
226+ splitJ <- paste(" -ej" , split / (4 * Ne ) + 0.000001 , " 2 1" )
227+ }
228+ command <- paste0(popSize , " " , speciesParams , splitI , " " , speciesHist , splitJ , " -s " )
229+ }
230+ if (! is.null(manualGenLen )) {
231+ genLen <- manualGenLen
232+ }
233+ if (length(genLen ) == 1L ) {
234+ genLen <- rep(genLen , nChr )
235+ }
236+ if (length(genLen ) != nChr ) {
237+ stop(" genLen must have length 1 or nChr" )
238+ }
239+ tokens <- strsplit(command , " [,[:space:]]+" , perl = TRUE )[[1L ]]
240+ tokens <- tokens [nzchar(tokens )]
241+ if (length(tokens ) < 2L ) {
242+ stop(" Failed to parse sequence length from command" )
243+ }
244+ seqLen <- suppressWarnings(as.numeric(tokens [2L ]))
245+ if (! is.finite(seqLen ) || seqLen < = 0 ) {
246+ stop(" Invalid sequence length parsed from command" )
247+ }
248+ list (command = command , genLen = as.numeric(genLen ), seqLen = seqLen )
249+ }
250+
251+ # ' High-level TS wrapper parallel to runMacs
252+ # '
253+ # ' @param nInd Integer number of individuals to simulate.
254+ # ' @param nChr Integer number of chromosomes.
255+ # ' @param segSites Optional site-count cap per chromosome (scalar or vector).
256+ # ' @param inbred Logical.
257+ # ' @param species Species preset used by `runMacs`.
258+ # ' @param split Optional population split time in generations.
259+ # ' @param ploidy Integer ploidy.
260+ # ' @param manualCommand Optional MaCS command tail (advanced users).
261+ # ' @param manualGenLen Optional genetic length(s) in Morgan.
262+ # ' @param nThreads Optional thread count.
263+ # ' @param mutationMode One of `"postTs"`, `"macs"`, `"none"`.
264+ # ' @param usePhysicalPositions Logical; TS coordinates in bp if `TRUE`.
265+ # ' @param Nref Optional reference `Ne` for time scaling.
266+ # ' @param seed Optional integer vector (length 1 or `nChr`) for ancestry.
267+ # ' @param mutSeed Optional integer vector (length 1 or `nChr`) for post-TS mutation.
268+ # ' @param mutSeedOffset Integer offset used when deriving post-TS mutation seeds.
269+ # ' @param siteSamplingSeed Integer seed for `asMapPop` site sampling.
270+ # ' @param expandInbredTs Logical; whether to expand inbred TS sample leaves before conversion.
271+ # ' @param returnTs Logical; return TS tables and metadata alongside `MapPop`.
272+ # '
273+ # ' @return `MapPop` by default; otherwise a list with `pop`, `tables`, and metadata.
274+ # ' @keywords internal
275+ # ' @noRd
276+ runMacTS <- function (nInd , nChr = 1 , segSites = NULL , inbred = FALSE ,
277+ species = " GENERIC" , split = NULL , ploidy = 2L ,
278+ manualCommand = NULL , manualGenLen = NULL , nThreads = NULL ,
279+ mutationMode = c(" postTs" , " macs" , " none" ),
280+ usePhysicalPositions = FALSE , Nref = NA_real_ ,
281+ seed = NULL , mutSeed = NULL , mutSeedOffset = 104729L ,
282+ siteSamplingSeed = 42L , expandInbredTs = FALSE ,
283+ returnTs = FALSE ) {
284+ mutationMode <- match.arg(mutationMode )
285+ nInd <- as.integer(nInd )
286+ nChr <- as.integer(nChr )
287+ ploidy <- as.integer(ploidy )
288+ if (is.null(nThreads )) {
289+ nThreads <- getNumThreads()
290+ }
291+ nThreads <- as.integer(nThreads )
292+ if (nChr < nThreads ) {
293+ nThreads <- nChr
294+ }
295+ if (nInd < = 0L || nChr < = 0L || ploidy < = 0L ) {
296+ stop(" nInd, nChr, and ploidy must be positive integers" )
297+ }
298+ if (! is.null(segSites )) {
299+ segSites <- as.integer(segSites )
300+ if (length(segSites ) == 1L ) {
301+ segSites <- rep(segSites , nChr )
302+ }
303+ if (length(segSites ) != nChr ) {
304+ stop(" segSites must have length 1 or nChr" )
305+ }
306+ }
307+
308+ setup <- .runMacTS_build_command(
309+ nInd = nInd ,
310+ inbred = inbred ,
311+ species = species ,
312+ split = split ,
313+ ploidy = ploidy ,
314+ manualCommand = manualCommand ,
315+ manualGenLen = manualGenLen ,
316+ nChr = nChr
317+ )
318+ args <- setup $ command
319+ genLen <- setup $ genLen
320+ seqLen <- setup $ seqLen
321+
322+ if (is.null(seed )) {
323+ seed <- sample.int(n = 1e8 , size = nChr )
324+ }
325+ seed <- as.integer(seed )
326+ if (length(seed ) == 1L ) {
327+ seed <- rep(seed , nChr )
328+ }
329+ if (length(seed ) != nChr ) {
330+ stop(" seed must have length 1 or nChr" )
331+ }
332+
333+ runOut <- NULL
334+ if (mutationMode == " macs" ) {
335+ runOut <- MaCSTS(
336+ args = args ,
337+ nChr = nChr ,
338+ inbred = inbred ,
339+ ploidy = ploidy ,
340+ nThreads = nThreads ,
341+ seed = seed ,
342+ usePhysicalPositions = usePhysicalPositions ,
343+ useMacsMut = TRUE ,
344+ Nref = Nref ,
345+ expandInbredSamples = FALSE
346+ )
347+ } else {
348+ runOut <- simAnc(
349+ args = args ,
350+ nChr = nChr ,
351+ inbred = inbred ,
352+ ploidy = ploidy ,
353+ nThreads = nThreads ,
354+ seed = seed ,
355+ usePhysicalPositions = usePhysicalPositions ,
356+ Nref = Nref
357+ )
358+ if (mutationMode == " postTs" ) {
359+ if (is.null(mutSeed )) {
360+ mutSeed <- as.integer(seed + as.integer(mutSeedOffset ))
361+ }
362+ mutSeed <- as.integer(mutSeed )
363+ if (length(mutSeed ) == 1L ) {
364+ mutSeed <- rep(mutSeed , nChr )
365+ }
366+ if (length(mutSeed ) != nChr ) {
367+ stop(" mutSeed must have length 1 or nChr" )
368+ }
369+ timeScale <- if (! is.null(runOut $ timeScale )) as.numeric(runOut $ timeScale ) else 1
370+ dThetaPost <- as.numeric(runOut $ dTheta ) / timeScale
371+ runOut <- simMut(runOut , dTheta = dThetaPost , seed = mutSeed )
372+ }
373+ }
374+
375+ if (isTRUE(expandInbredTs ) && isTRUE(inbred ) && ploidy > 1L ) {
376+ runOut <- finalizeInbredTs(runOut , inbred = inbred , ploidy = ploidy )
377+ }
378+
379+ if (mutationMode == " none" ) {
380+ if (! isTRUE(returnTs )) {
381+ stop(" mutationMode='none' produces ancestry-only TS with zero sites; set returnTs=TRUE or use mutationMode='postTs'/'macs'." )
382+ }
383+ return (list (
384+ pop = NULL ,
385+ tables = runOut $ tables ,
386+ args = args ,
387+ seed = seed ,
388+ mutationMode = mutationMode ,
389+ mutSeed = NA_integer_ ,
390+ usePhysicalPositions = usePhysicalPositions ,
391+ timeScale = if (! is.null(runOut $ timeScale )) runOut $ timeScale else 1 ,
392+ Nref = if (! is.null(runOut $ Nref )) runOut $ Nref else NA_real_
393+ ))
394+ }
395+
396+ siteCounts <- vapply(runOut $ tables , function (tc_xptr ) {
397+ as.integer(rtsk_table_collection_summary2(tc_xptr )$ num_sites )
398+ }, integer(1 ))
399+ if (any(siteCounts < = 0L )) {
400+ badChr <- which(siteCounts < = 0L )
401+ stop(" No segregating sites on chromosome(s): " ,
402+ paste(badChr , collapse = " , " ),
403+ " . Increase mutation rate or inspect TS via returnTs=TRUE." )
404+ }
405+
406+ breaks <- if (usePhysicalPositions ) {
407+ rep(list (c(0 , seqLen )), nChr )
408+ } else {
409+ rep(list (c(0 , 1 )), nChr )
410+ }
411+ rates <- if (usePhysicalPositions ) {
412+ lapply(genLen , function (g ) c(g / seqLen ))
413+ } else {
414+ lapply(genLen , function (g ) c(g ))
415+ }
416+
417+ popOut <- asMapPop(
418+ chr_info = list (
419+ tables = runOut $ tables ,
420+ breaks = breaks ,
421+ rates = rates
422+ ),
423+ ploidy = ploidy ,
424+ inbred = inbred ,
425+ segSites = segSites ,
426+ site_sampling_seed = as.integer(siteSamplingSeed ),
427+ nThreads = nThreads ,
428+ returnMeta = FALSE
429+ )
430+
431+ if (! isTRUE(returnTs )) {
432+ return (popOut )
433+ }
434+ list (
435+ pop = popOut ,
436+ tables = runOut $ tables ,
437+ args = args ,
438+ seed = seed ,
439+ mutationMode = mutationMode ,
440+ mutSeed = if (! is.null(runOut $ mutationSeed )) runOut $ mutationSeed else NA_integer_ ,
441+ usePhysicalPositions = usePhysicalPositions ,
442+ timeScale = if (! is.null(runOut $ timeScale )) runOut $ timeScale else 1 ,
443+ Nref = if (! is.null(runOut $ Nref )) runOut $ Nref else NA_real_
444+ )
445+ }
0 commit comments