Skip to content

Commit 0c10424

Browse files
Run coroutine BCF demo directly on VCF
1 parent 07d4d6c commit 0c10424

4 files changed

Lines changed: 56 additions & 52 deletions

File tree

README.Rmd

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,7 +780,15 @@ R's C API. htslib owns the file/header/record state, the coroutine only yields
780780
status codes, and R objects are created after control has returned to the normal
781781
R call stack.
782782

783-
The README displays the actual demo source below, rather than a shortened
783+
The README runs the demo when htslib is available on the build machine. The
784+
example input is plain VCF text because htslib can stream VCF and BCF through
785+
the same API; no `bcftools` conversion step is needed.
786+
787+
```{r ffi-bcf-coroutine-run, eval=.Platform$OS.type != "windows" && nzchar(Sys.which("pkg-config")) && system2("pkg-config", c("--exists", "htslib")) == 0L}
788+
cat(system2(R.home("bin/Rscript"), "scripts/demo-streaming-bcf-reader-ffi.R", stdout = TRUE), sep = "\n")
789+
```
790+
791+
The README also displays the actual demo source below, rather than a shortened
784792
pseudo-example. The full R script is foldable so the page stays readable.
785793

786794
<details>

README.md

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ tcc_read_cstring(ptr)
178178
tcc_read_bytes(ptr, 5)
179179
#> [1] 68 65 6c 6c 6f
180180
tcc_ptr_addr(ptr, hex = TRUE)
181-
#> [1] "0x5f2009633c00"
181+
#> [1] "0x5dafff18bed0"
182182
tcc_ptr_is_null(ptr)
183183
#> [1] FALSE
184184
tcc_free(ptr)
@@ -209,11 +209,11 @@ through output parameters.
209209
ptr_ref <- tcc_malloc(.Machine$sizeof.pointer %||% 8L)
210210
target <- tcc_malloc(8)
211211
tcc_ptr_set(ptr_ref, target)
212-
#> <pointer: 0x5f2008dab1e0>
212+
#> <pointer: 0x5dafff262a50>
213213
tcc_data_ptr(ptr_ref)
214-
#> <pointer: 0x5f200c96caa0>
214+
#> <pointer: 0x5db00198a030>
215215
tcc_ptr_set(ptr_ref, tcc_null_ptr())
216-
#> <pointer: 0x5f2008dab1e0>
216+
#> <pointer: 0x5dafff262a50>
217217
tcc_free(target)
218218
#> NULL
219219
tcc_free(ptr_ref)
@@ -441,7 +441,7 @@ ffi <- tcc_ffi() |>
441441

442442
x <- as.integer(1:100) # to avoid ALTREP
443443
.Internal(inspect(x))
444-
#> @5f200aab3a60 13 INTSXP g0c0 [REF(65535)] 1 : 100 (compact)
444+
#> @5db002c5b160 13 INTSXP g0c0 [REF(65535)] 1 : 100 (compact)
445445
ffi$sum_array(x, length(x))
446446
#> [1] 5050
447447

@@ -457,7 +457,7 @@ y[1]
457457
#> [1] 11
458458

459459
.Internal(inspect(x))
460-
#> @5f200aab3a60 13 INTSXP g0c0 [REF(65535)] 11 : 110 (expanded)
460+
#> @5db002c5b160 13 INTSXP g0c0 [REF(65535)] 11 : 110 (expanded)
461461
```
462462

463463
## Advanced FFI features
@@ -484,15 +484,15 @@ ffi <- tcc_ffi() |>
484484

485485
p1 <- ffi$struct_point_new()
486486
ffi$struct_point_set_x(p1, 0.0)
487-
#> <pointer: 0x5f20071ddec0>
487+
#> <pointer: 0x5db001d8b2f0>
488488
ffi$struct_point_set_y(p1, 0.0)
489-
#> <pointer: 0x5f20071ddec0>
489+
#> <pointer: 0x5db001d8b2f0>
490490

491491
p2 <- ffi$struct_point_new()
492492
ffi$struct_point_set_x(p2, 3.0)
493-
#> <pointer: 0x5f2007874610>
493+
#> <pointer: 0x5db0024ee190>
494494
ffi$struct_point_set_y(p2, 4.0)
495-
#> <pointer: 0x5f2007874610>
495+
#> <pointer: 0x5db0024ee190>
496496

497497
ffi$distance(p1, p2)
498498
#> [1] 5
@@ -537,9 +537,9 @@ ffi <- tcc_ffi() |>
537537

538538
s <- ffi$struct_flags_new()
539539
ffi$struct_flags_set_active(s, 1L)
540-
#> <pointer: 0x5f2007a731f0>
540+
#> <pointer: 0x5dafffa48a90>
541541
ffi$struct_flags_set_level(s, 9L)
542-
#> <pointer: 0x5f2007a731f0>
542+
#> <pointer: 0x5dafffa48a90>
543543
ffi$struct_flags_get_active(s)
544544
#> [1] 1
545545
ffi$struct_flags_get_level(s)
@@ -906,7 +906,27 @@ call R’s C API. htslib owns the file/header/record state, the coroutine
906906
only yields status codes, and R objects are created after control has
907907
returned to the normal R call stack.
908908

909-
The README displays the actual demo source below, rather than a
909+
The README runs the demo when htslib is available on the build machine.
910+
The example input is plain VCF text because htslib can stream VCF and
911+
BCF through the same API; no `bcftools` conversion step is needed.
912+
913+
``` r
914+
cat(system2(R.home("bin/Rscript"), "scripts/demo-streaming-bcf-reader-ffi.R", stdout = TRUE), sep = "\n")
915+
#> Rtinycc version: 0.1.10
916+
#> Demo: stackful coroutine + htslib BCF/VCF API streaming reader
917+
#> Note: htslib reads run on the alternate coroutine stack; R objects are built only after each yield.
918+
#>
919+
#> Input: generated VCF text (opened directly by htslib)
920+
#> Samples: sample1
921+
#>
922+
#> == Streaming records one resume at a time ==
923+
#> record 1: chr1:10 id=rs1 ref=A alt=C qual=50 alleles=[A,C]
924+
#> record 2: chr1:20 id=. ref=G alt=A,T qual=99 alleles=[G,A,T]
925+
#> record 3: chr1:30 id=rs3 ref=TT alt=T qual=. alleles=[TT,T]
926+
#> done_after_collect=TRUE
927+
```
928+
929+
The README also displays the actual demo source below, rather than a
910930
shortened pseudo-example. The full R script is foldable so the page
911931
stays readable.
912932

@@ -1358,21 +1378,9 @@ write_demo_vcf <- function(path) {
13581378
invisible(path)
13591379
}
13601380

1361-
make_demo_bcf <- function() {
1381+
make_demo_vcf <- function() {
13621382
vcf <- tempfile(fileext = ".vcf")
1363-
bcf <- tempfile(fileext = ".bcf")
13641383
write_demo_vcf(vcf)
1365-
1366-
if (nzchar(Sys.which("bcftools"))) {
1367-
status <- system2("bcftools", c("view", "-Ob", "-o", bcf, vcf), stdout = TRUE, stderr = TRUE)
1368-
if (identical(attr(status, "status"), NULL) && file.exists(bcf)) {
1369-
return(bcf)
1370-
}
1371-
warning("bcftools failed; falling back to streaming the VCF text with htslib")
1372-
} else {
1373-
warning("bcftools not found; falling back to streaming the VCF text with htslib")
1374-
}
1375-
13761384
vcf
13771385
}
13781386

@@ -1382,10 +1390,10 @@ if (identical(sys.nframe(), 0L)) {
13821390
say("Note: htslib reads run on the alternate coroutine stack; R objects are built only after each yield.")
13831391

13841392
ffi <- build_streaming_bcf_ffi()
1385-
path <- make_demo_bcf()
1393+
path <- make_demo_vcf()
13861394

13871395
say("")
1388-
say("Input: ", path)
1396+
say("Input: generated VCF text (opened directly by htslib)")
13891397

13901398
reader <- new_bcf_reader(path, ffi)
13911399
on.exit(close(reader), add = TRUE)
@@ -1766,7 +1774,7 @@ ffi <- tcc_ffi() |>
17661774
tcc_compile()
17671775
17681776
ffi$struct_point_new()
1769-
#> <pointer: 0x5f2008670da0>
1777+
#> <pointer: 0x5db00115f320>
17701778
ffi$enum_status_OK()
17711779
#> [1] 0
17721780
ffi$global_global_counter_get()
@@ -1883,11 +1891,11 @@ if (Sys.info()[["sysname"]] == "Linux") {
18831891
#> # A tibble: 5 × 13
18841892
#> expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time
18851893
#> <bch:expr> <bch:t> <bch:t> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm>
1886-
#> 1 read_tabl… 55ms 55ms 18.2 6.33MB 18.2 1 1 55ms
1887-
#> 2 vroom_df_… 6.29ms 6.39ms 156. 1.22MB 0 2 0 12.8ms
1888-
#> 3 vroom_df_… 6.67ms 6.94ms 144. 2.44MB 0 2 0 13.9ms
1889-
#> 4 c_read_df 21.43ms 21.47ms 46.6 1.22MB 0 2 0 42.9ms
1890-
#> 5 io_uring_… 21.25ms 21.32ms 46.9 1.22MB 0 2 0 42.6ms
1894+
#> 1 read_tabl… 52.26ms 52.26ms 19.1 6.33MB 19.1 1 1 52.3ms
1895+
#> 2 vroom_df_… 6.41ms 6.56ms 152. 1.22MB 0 2 0 13.1ms
1896+
#> 3 vroom_df_… 6.51ms 6.61ms 151. 2.44MB 0 2 0 13.2ms
1897+
#> 4 c_read_df 21.07ms 21.16ms 47.2 1.22MB 0 2 0 42.3ms
1898+
#> 5 io_uring_… 20.18ms 20.66ms 48.4 1.22MB 0 2 0 41.3ms
18911899
#> # ℹ 4 more variables: result <list>, memory <list>, time <list>, gc <list>
18921900
```
18931901

@@ -1989,9 +1997,9 @@ ffi <- tcc_ffi() |>
19891997

19901998
b <- ffi$struct_buf_new()
19911999
ffi$struct_buf_set_data_elt(b, 0L, 0xCAL)
1992-
#> <pointer: 0x5f200d6c0360>
2000+
#> <pointer: 0x5db00ab77c60>
19932001
ffi$struct_buf_set_data_elt(b, 1L, 0xFEL)
1994-
#> <pointer: 0x5f200d6c0360>
2002+
#> <pointer: 0x5db00ab77c60>
19952003
ffi$struct_buf_get_data_elt(b, 0L)
19962004
#> [1] 202
19972005
ffi$struct_buf_get_data_elt(b, 1L)
-749 Bytes
Loading

scripts/demo-streaming-bcf-reader-ffi.R

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -439,21 +439,9 @@ write_demo_vcf <- function(path) {
439439
invisible(path)
440440
}
441441

442-
make_demo_bcf <- function() {
442+
make_demo_vcf <- function() {
443443
vcf <- tempfile(fileext = ".vcf")
444-
bcf <- tempfile(fileext = ".bcf")
445444
write_demo_vcf(vcf)
446-
447-
if (nzchar(Sys.which("bcftools"))) {
448-
status <- system2("bcftools", c("view", "-Ob", "-o", bcf, vcf), stdout = TRUE, stderr = TRUE)
449-
if (identical(attr(status, "status"), NULL) && file.exists(bcf)) {
450-
return(bcf)
451-
}
452-
warning("bcftools failed; falling back to streaming the VCF text with htslib")
453-
} else {
454-
warning("bcftools not found; falling back to streaming the VCF text with htslib")
455-
}
456-
457445
vcf
458446
}
459447

@@ -463,10 +451,10 @@ if (identical(sys.nframe(), 0L)) {
463451
say("Note: htslib reads run on the alternate coroutine stack; R objects are built only after each yield.")
464452

465453
ffi <- build_streaming_bcf_ffi()
466-
path <- make_demo_bcf()
454+
path <- make_demo_vcf()
467455

468456
say("")
469-
say("Input: ", path)
457+
say("Input: generated VCF text (opened directly by htslib)")
470458

471459
reader <- new_bcf_reader(path, ffi)
472460
on.exit(close(reader), add = TRUE)

0 commit comments

Comments
 (0)