Skip to content

Commit f48aaf1

Browse files
authored
Merge pull request #4 from jorgeMFS/main
Enhance VCFX with bug fixes, CI, Docker support & utilities
2 parents 59d48e2 + 63db39b commit f48aaf1

461 files changed

Lines changed: 1893 additions & 202806 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.clang-format

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
BasedOnStyle: LLVM
2+
IndentWidth: 4
3+
ColumnLimit: 120

.dockerignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ LICENSE
2929
# Docker files (not needed in the build context)
3030
Dockerfile
3131
docker-compose.yml
32-
.dockerignore
32+
.dockerignore

.github/workflows/build-test.yml

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
name: Build and Test
2+
3+
on:
4+
push:
5+
branches: [ main ]
6+
pull_request:
7+
branches: [ main ]
8+
9+
jobs:
10+
build-and-test:
11+
strategy:
12+
matrix:
13+
os: [ubuntu-latest, macos-latest]
14+
runs-on: ${{ matrix.os }}
15+
steps:
16+
- name: Checkout repository
17+
uses: actions/checkout@v3
18+
19+
- name: Install dependencies (Linux)
20+
if: runner.os == 'Linux'
21+
run: |
22+
sudo apt-get update
23+
sudo apt-get install -y build-essential cmake libz-dev
24+
25+
- name: Install dependencies (macOS)
26+
if: runner.os == 'macOS'
27+
run: |
28+
brew update
29+
brew install cmake zlib bash
30+
echo "$(brew --prefix)/bin" >> $GITHUB_PATH
31+
32+
- name: Configure
33+
run: cmake -S . -B build
34+
shell: bash
35+
- name: Build
36+
run: cmake --build build --parallel
37+
shell: bash
38+
39+
- name: Run tests
40+
run: |
41+
cd build
42+
ctest --output-on-failure
43+
shell: bash
44+
45+
python-wheels:
46+
needs: build-and-test
47+
strategy:
48+
matrix:
49+
os: [ubuntu-latest, macos-latest]
50+
runs-on: ${{ matrix.os }}
51+
steps:
52+
- name: Checkout repository
53+
uses: actions/checkout@v3
54+
55+
- name: Install dependencies (Linux)
56+
if: runner.os == 'Linux'
57+
run: |
58+
sudo apt-get update
59+
sudo apt-get install -y build-essential cmake libz-dev
60+
61+
- name: Install dependencies (macOS)
62+
if: runner.os == 'macOS'
63+
run: |
64+
brew update
65+
brew install cmake zlib bash
66+
echo "$(brew --prefix)/bin" >> $GITHUB_PATH
67+
68+
- name: Build project
69+
run: |
70+
cmake -S . -B build -DPYTHON_BINDINGS=ON
71+
cmake --build build --parallel
72+
cmake --install build --prefix $PWD/install
73+
shell: bash
74+
75+
- name: Set up Python
76+
uses: actions/setup-python@v4
77+
with:
78+
python-version: '3.x'
79+
80+
- name: Build wheel
81+
run: |
82+
python -m pip install --upgrade pip wheel
83+
python -m pip wheel ./python -w dist
84+
shell: bash
85+
86+
- name: Test Python wheel
87+
run: |
88+
python -m pip install dist/*.whl
89+
export PATH="$PWD/install/bin:$PATH"
90+
echo "$PWD/install/bin" >> $GITHUB_PATH
91+
cat <<'EOF' | python -
92+
import vcfx
93+
print('version:', vcfx.get_version())
94+
tools = vcfx.available_tools()
95+
print('tools:', len(tools))
96+
if tools:
97+
vcfx.run_tool(tools[0], '--help', check=False)
98+
EOF
99+
shell: bash

.github/workflows/docker-publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,4 @@ jobs:
5757
labels: ${{ steps.meta.outputs.labels }}
5858
platforms: linux/amd64,linux/arm64
5959
cache-from: type=gha
60-
cache-to: type=gha,mode=max
60+
cache-to: type=gha,mode=max

.github/workflows/docs.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,40 +29,40 @@ jobs:
2929
uses: actions/checkout@v3
3030
with:
3131
fetch-depth: 0
32-
32+
3333
- name: Set up Python
3434
uses: actions/setup-python@v4
3535
with:
3636
python-version: '3.x'
37-
37+
3838
- name: Install dependencies
3939
run: |
4040
python -m pip install --upgrade pip
4141
pip install mkdocs-material pymdown-extensions
42-
42+
4343
- name: Deploy to GitHub Pages
4444
run: |
4545
git config --global user.name "${GITHUB_ACTOR}"
4646
git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com"
4747
mkdocs gh-deploy --force
48-
48+
4949
# Only for pull requests - just build to validate
5050
build:
5151
runs-on: ubuntu-latest
5252
if: github.event_name == 'pull_request'
5353
steps:
5454
- name: Checkout repository
5555
uses: actions/checkout@v3
56-
56+
5757
- name: Set up Python
5858
uses: actions/setup-python@v4
5959
with:
6060
python-version: '3.x'
61-
61+
6262
- name: Install dependencies
6363
run: |
6464
python -m pip install --upgrade pip
6565
pip install mkdocs-material pymdown-extensions
66-
66+
6767
- name: Build documentation
68-
run: mkdocs build
68+
run: mkdocs build

.gitignore

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,4 +41,22 @@ Thumbs.db
4141
# Other
4242
tools.md
4343
prompt.md
44-
names.md
44+
names.md
45+
46+
# Temporary outputs from genotype_query tests
47+
tests/tmp/genotype_query/
48+
tests/data/genotype_query/missing_malformed.vcf
49+
tests/data/genotype_query/multi_sample.vcf
50+
tests/data/genotype_query/single_sample.vcf
51+
tests/expected/genotype_query/missing_malformed_01.vcf
52+
tests/expected/genotype_query/multi_11_flexible.vcf
53+
tests/expected/genotype_query/multi_11_strict.vcf
54+
tests/expected/genotype_query/multi_12_flexible.vcf
55+
tests/expected/genotype_query/no_match.vcf
56+
tests/expected/genotype_query/single_sample_flex_01.vcf
57+
tests/expected/genotype_query/single_sample_strict_01.vcf
58+
59+
# General temporary test output directories
60+
tests/tmp/
61+
tests/out/
62+
tmp/

CMakeLists.txt

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,40 @@ set(VCFX_VERSION_MINOR 0)
66
set(VCFX_VERSION_PATCH 2)
77
set(VCFX_VERSION "${VCFX_VERSION_MAJOR}.${VCFX_VERSION_MINOR}.${VCFX_VERSION_PATCH}")
88

9-
project(VCFX
9+
add_compile_definitions(VCFX_VERSION="${VCFX_VERSION}")
10+
11+
project(VCFX
1012
VERSION ${VCFX_VERSION}
1113
DESCRIPTION "A Comprehensive VCF Manipulation Toolkit"
1214
LANGUAGES CXX
1315
)
1416

17+
# Set a user-friendly default install prefix when none is provided.
18+
# This prevents installation attempts into system directories when
19+
# running without root privileges.
20+
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
21+
set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "Install prefix" FORCE)
22+
endif()
23+
24+
include(GNUInstallDirs)
25+
1526
# Optionally allow building for WebAssembly via Emscripten
1627
option(BUILD_WASM "Build with emscripten toolchain" OFF)
28+
option(PYTHON_BINDINGS "Build Python bindings" ON)
1729

1830
if(BUILD_WASM)
19-
set(CMAKE_TOOLCHAIN_FILE "/path/to/emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE)
31+
if(NOT CMAKE_TOOLCHAIN_FILE)
32+
if(DEFINED ENV{EMSDK} AND EXISTS "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake")
33+
set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE)
34+
elseif(DEFINED ENV{EMSCRIPTEN} AND EXISTS "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake")
35+
set(CMAKE_TOOLCHAIN_FILE "$ENV{EMSCRIPTEN}/cmake/Modules/Platform/Emscripten.cmake" CACHE FILEPATH "Emscripten toolchain" FORCE)
36+
endif()
37+
endif()
38+
39+
if(NOT EXISTS "${CMAKE_TOOLCHAIN_FILE}")
40+
message(FATAL_ERROR "Emscripten toolchain file not found. Please set CMAKE_TOOLCHAIN_FILE or EMSDK.")
41+
endif()
42+
2043
message(STATUS "Building for WebAssembly (Emscripten).")
2144
endif()
2245

@@ -42,12 +65,14 @@ enable_testing()
4265
# Add top-level 'src' subdirectory, which in turn references each tool subdirectory
4366
add_subdirectory(src)
4467

45-
# Add a tests subdir if you have tests
46-
# Comment out this line since we don't have a CMakeLists.txt file in the tests directory
47-
# add_subdirectory(tests)
68+
if(PYTHON_BINDINGS)
69+
add_subdirectory(python)
70+
endif()
71+
72+
# Add the test suite
73+
add_subdirectory(tests)
4874

4975
# Installation configuration
50-
include(GNUInstallDirs)
5176

5277
# Install header files
5378
install(FILES

DOCKER.md

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,16 @@ VCFX is available as a pre-built Docker image on GitHub Container Registry:
88

99
```bash
1010
# Pull the image (only needed once)
11-
docker pull ghcr.io/ieeta-pt/vcfx:latest
11+
docker pull ghcr.io/jorgemfs/vcfx:latest
1212

1313
# Run a VCFX tool
14-
docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options]
14+
docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options]
1515

1616
# Mount a directory with your data
17-
docker run --rm -v /path/to/your/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options]
17+
docker run --rm -v /path/to/your/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options]
1818

1919
# Example: Process a VCF file (using tests/data/valid.vcf as an example)
20-
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv'
20+
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv'
2121
```
2222

2323
Using the pre-built image is recommended for most users as it:
@@ -65,19 +65,19 @@ There are several ways to run VCFX tools with Docker:
6565

6666
```bash
6767
# With the pre-built image
68-
docker run --rm ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options]
68+
docker run --rm ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options]
6969

7070
# With a locally built image
7171
docker run --rm vcfx:local VCFX_tool_name [options]
7272

7373
# Mount the tests/data directory to access test files
74-
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options]
74+
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options]
7575

7676
# Process files in the tests/data directory
77-
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_validator'
77+
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_validator'
7878

7979
# Example: Calculate allele frequencies for a VCF file
80-
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv'
80+
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | VCFX_allele_freq_calc > /data/output.tsv'
8181
```
8282

8383
### Using Docker Compose
@@ -98,7 +98,7 @@ docker-compose run --rm vcfx 'cat /data/valid.vcf | VCFX_allele_freq_calc > /dat
9898
When using Docker directly, you need to mount a directory to access your files:
9999

100100
```bash
101-
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options]
101+
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options]
102102
```
103103

104104
When using Docker Compose, the `tests/data` directory is mounted by default:
@@ -115,7 +115,7 @@ You can modify the docker-compose.yml file to mount a different directory if nee
115115
You can create complex pipelines by chaining VCFX tools:
116116

117117
```bash
118-
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv'
118+
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/classifier_mixed.vcf | VCFX_variant_classifier --append-info | grep "VCF_CLASS=SNP" | VCFX_allele_freq_calc > /data/snp_frequencies.tsv'
119119
```
120120

121121
### Creating Shell Scripts
@@ -126,7 +126,7 @@ For complex workflows, consider creating a shell script:
126126
#!/bin/bash
127127
# save as vcfx_workflow.sh
128128

129-
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/ieeta-pt/vcfx:latest 'cat /data/valid.vcf | \
129+
docker run --rm -v $(pwd)/tests/data:/data ghcr.io/jorgemfs/vcfx:latest 'cat /data/valid.vcf | \
130130
VCFX_validator | \
131131
VCFX_variant_classifier --append-info | \
132132
VCFX_allele_freq_calc > /data/pipeline_output.tsv'
@@ -147,7 +147,7 @@ If you encounter permission issues with files created in the container:
147147

148148
```bash
149149
# Run the container with your user ID
150-
docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/ieeta-pt/vcfx:latest VCFX_tool_name [options]
150+
docker run --rm -v $(pwd)/tests/data:/data -u $(id -u):$(id -g) ghcr.io/jorgemfs/vcfx:latest VCFX_tool_name [options]
151151
```
152152

153153
### Container Not Finding Commands
@@ -156,5 +156,5 @@ If the container can't find VCFX commands, ensure they were properly built in th
156156

157157
```bash
158158
# List available VCFX tools in the container
159-
docker run --rm ghcr.io/ieeta-pt/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*'
159+
docker run --rm ghcr.io/jorgemfs/vcfx:latest 'ls -1 /usr/local/bin/VCFX_*'
160160
```

Dockerfile

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ RUN apt-get update && apt-get install -y \
99
cmake \
1010
git \
1111
libz-dev \
12+
python3 \
13+
python3-dev \
1214
&& rm -rf /var/lib/apt/lists/*
1315

1416
# Create a working directory
@@ -46,14 +48,15 @@ COPY --from=builder /app/build/src /usr/local/bin/
4648
# Create a directory for data
4749
WORKDIR /data
4850

49-
# Add the script that adds tools to PATH
51+
# Add the helper scripts
5052
COPY add_vcfx_tools_to_path.sh /usr/local/bin/
53+
COPY docker_entrypoint.sh /usr/local/bin/
5154

52-
# Make the script executable
53-
RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh
55+
# Make them executable
56+
RUN chmod +x /usr/local/bin/add_vcfx_tools_to_path.sh /usr/local/bin/docker_entrypoint.sh
5457

55-
# Set the entry point
56-
ENTRYPOINT ["/bin/bash", "-c"]
58+
# Use a custom entrypoint that sets up PATH for the tools
59+
ENTRYPOINT ["/usr/local/bin/docker_entrypoint.sh"]
5760

5861
# Default command shows available tools
59-
CMD ["echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"]
62+
CMD ["bash", "-c", "echo 'VCFX Toolkit is ready. Run any VCFX tool by name, for example:' && ls -1 /usr/local/bin/VCFX_* | xargs -n1 basename"]

PUBLISHING.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@ This setup only needs to be done once. After this, the GitHub Actions workflow w
2626

2727
Before pushing any changes, you should test the documentation site locally:
2828

29-
1. Install the required dependencies:
29+
1. Install the required dependencies. It's best to use a Python virtual
30+
environment because some systems mark the system Python as
31+
"externally managed", which can interfere with installation:
3032
```bash
33+
python3 -m venv venv && source venv/bin/activate
3134
pip install mkdocs-material pymdown-extensions
3235
```
3336

0 commit comments

Comments
 (0)