nf-core · edmundmiller · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026
@@ -0,0 +1,106 @@
+name: Lint nf-core Pipelines (Strict Syntax)
+
+on:
+  push:
+    paths:
+      - ".github/workflows/lint_strict_syntax.yml"
+  schedule:
+    # Run daily at 1 AM UTC (after other pipelines finish)
+    - cron: "0 1 * * *"
+  workflow_dispatch:
+    inputs:
+      pipelines:
+        description: "Specific pipeline(s) to lint (comma-separated, leave empty for all)"
+        type: string
+        default: ""
+
+permissions:
+  contents: write
+
+env:
+  SOURCES__GITHUB_PIPELINE__GITHUB__API_TOKEN: ${{ secrets.GH_TOKEN_STATS_PAGE }}
+  DESTINATION__MOTHERDUCK__CREDENTIALS__DATABASE: ${{ github.event_name != 'pull_request' && 'nf_core_stats_bot' || '' }}
+  DESTINATION__MOTHERDUCK__CREDENTIALS__PASSWORD: ${{ github.event_name != 'pull_request' && secrets.MOTHERDUCK_TOKEN || '' }}
+  PYTHONUNBUFFERED: "1"
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: pipeline
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      # TODO: Switch to latest release tag instead of HEAD for better caching
+      # Currently HEAD changes frequently, invalidating pipeline-level caching
+      - name: Get Nextflow latest commit
+        id: nf-commit
+        run: |
+          COMMIT=$(git ls-remote https://github.com/nextflow-io/nextflow.git HEAD | cut -f1)
+          echo "sha=$COMMIT" >> $GITHUB_OUTPUT
+
+      - name: Restore Nextflow build cache
+        id: cache-nextflow
+        uses: actions/cache/restore@v4
+        with:
+          path: nextflow-src/build/releases
+          key: nextflow-build-${{ steps.nf-commit.outputs.sha }}
+
+      - name: Set up Java
+        if: steps.cache-nextflow.outputs.cache-hit != 'true'
+        uses: actions/setup-java@v4
+        with:
+          distribution: "temurin"
+          java-version: "21"
+
+      - name: Clone and build Nextflow
+        id: build-nextflow
+        if: steps.cache-nextflow.outputs.cache-hit != 'true'
+        run: |
+          cd ..
+          git clone --depth 1 https://github.com/nextflow-io/nextflow.git nextflow-src
+          cd nextflow-src
+          make pack
+          cd build/releases/
+          ln -s nextflow-*-dist nextflow
+
+      - name: Save Nextflow build cache
+        if: steps.cache-nextflow.outputs.cache-hit != 'true' && steps.build-nextflow.outcome == 'success' && always()
+        uses: actions/cache/save@v4
+        with:
+          path: nextflow-src/build/releases
+          key: nextflow-build-${{ steps.nf-commit.outputs.sha }}
+
+      - name: Add Nextflow to PATH
+        run: echo "${{ github.workspace }}/nextflow-src/build/releases" >> $GITHUB_PATH
+
+      - name: Verify Nextflow
+        run: nextflow -version
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          python-version: "3.12"
+          enable-cache: true
+
+      - name: Install runitor
+        env:
+          RUNITOR_VERSION: "v1.3.0-build.4"
+        run: |
+          curl -fsSL -o runitor \
+            "https://github.com/bdd/runitor/releases/download/${RUNITOR_VERSION}/runitor-${RUNITOR_VERSION}-linux-amd64"
+          chmod +x runitor
+          ./runitor -version
+          sudo mv runitor /usr/local/bin/
+
+      - name: Run strict-syntax pipeline
+        run: |
+          PIPELINE_FILTER=""
+          if [[ -n "${{ inputs.pipelines }}" ]]; then
+            PIPELINE_FILTER="--pipelines ${{ inputs.pipelines }}"
+          fi
+          runitor -uuid 4080223c-8257-4e7d-ad2b-c34ef493f42f -- \
+            uv run nf_core_stats strict-syntax $PIPELINE_FILTER
@@ -0,0 +1,122 @@
+---
+name: Add DLT Data Source
+description: Scaffold new DLT pipeline for data ingestion to MotherDuck
+---
+
+# Add DLT Data Source
+
+## When to Use
+- Adding new external data source (API, file, webhook)
+- Creating new tables in MotherDuck database
+
+## File Structure
+```
+pipeline/src/nf_core_stats/
+├── __init__.py          # Add command registration
+├── _<source>.py         # Optional: API helpers
+└── <source>_pipeline.py # Main pipeline
+```
+
+## Pipeline Template
+
+```python
+"""DLT pipeline for <source> data."""
+
+from collections.abc import Iterator
+from typing import Annotated
+
+import dlt
+
+from ._logging import log_pipeline_stats, logger
+
+
+@dlt.source(name="<source>")
+def <source>_source():
+    """Initialize <source> data source."""
+    logger.info("Initialized <source> source")
+    return [
+        dlt.resource(<table_name>_resource(), name="<table_name>"),
+    ]
+
+
+@dlt.resource(write_disposition="merge", primary_key=["id"])
+def <table_name>_resource() -> Iterator[dict]:
+    """Collect <table_name> data."""
+    # Fetch data from API/file/etc
+    yield {"id": 1, "field": "value"}
+
+
+def main(
+    *,
+    destination: str = "motherduck",
+    # Add pipeline-specific params here
+):
+    """Run the <source> data ingestion pipeline.
+
+    Args:
+        destination: dlt backend. Use 'motherduck' for production. Can use 'duckdb' for local testing
+    """
+    logger.info("Starting <source> data pipeline...")
+
+    pipeline = dlt.pipeline(
+        pipeline_name="<source>_pipeline",
+        destination=destination,
+        dataset_name="<source>",
+    )
+
+    load_info = pipeline.run(<source>_source())
+    log_pipeline_stats(pipeline, load_info)
+
+    logger.info("<Source> data pipeline completed!")
+```
+
+## CLI Registration
+
+Add to `pipeline/src/nf_core_stats/__init__.py`:
+```python
+from . import <source>_pipeline
+app.command(<source>_pipeline.main, "<source>")
+```
+
+## Workflow Integration
+
+Add to `.github/workflows/run_pipelines.yml` matrix:
+```yaml
+- pipeline: <source>
+  uuid: <generate-new-uuid>  # for runitor monitoring
+```
+
+Or create separate workflow if pipeline needs special dependencies (like Nextflow).
+
+## Write Dispositions
+
+| Mode | Use When |
+|------|----------|
+| `merge` | Update existing rows by primary_key |
+| `replace` | Full table reload each run |
+| `append` | Insert-only, keep all history |
+
+## Secrets Pattern
+
+Environment variable: `SOURCES__<PIPELINE>__<SERVICE>__<KEY>`
+
+Access in code:
+```python
+api_token: str = dlt.secrets["sources.<pipeline>.<service>.<key>"]
+```
+
+## Testing
+
+```bash
+cd pipeline
+uv run nf_core_stats <source> --destination duckdb
+```
+
+## Checklist
+
+1. [ ] Create `<source>_pipeline.py` with source + resources
+2. [ ] Register CLI command in `__init__.py`
+3. [ ] Add to workflow matrix (or create separate workflow)
+4. [ ] Add secrets to GitHub repo settings
+5. [ ] Create Evidence SQL sources for new tables
+6. [ ] Test locally with `--destination duckdb`
@@ -0,0 +1,135 @@
+---
+name: Add Evidence Page
+description: Create new Evidence.dev dashboard page with charts and tables
+---
+
+# Add Evidence Page
+
+## When to Use
+- Adding new visualization/dashboard
+- Creating charts for existing DLT data
+
+## File Structure
+```
+├── pages/<category>/<page>.md    # Page with charts
+└── sources/nfcore_db/<query>.sql # SQL data source
+```
+
+## Page Template
+
+```markdown
+---
+title: Page Title
+sidebar_position: 1
+---
+
+Brief description of what this page shows.
+
+```sql summary_query
+SELECT COUNT(*) as total FROM nfcore_db.<table_name>
+```
+
+<BigValue
+  data={summary_query}
+  value=total
+  title="Total Items"
+/>
+
+```sql time_series
+SELECT
+  date_trunc('week', created_at) as week,
+  COUNT(*) as count
+FROM nfcore_db.<table_name>
+GROUP BY 1
+ORDER BY 1
+```
+
+<LineChart
+  data={time_series}
+  x=week
+  y=count
+  title="Items Over Time"
+/>
+
+<LastRefreshed prefix="Data last updated"/>
+```
+
+## SQL Source Template
+
+File: `sources/nfcore_db/<query>.sql`
+```sql
+SELECT
+  column1,
+  column2,
+  COUNT(*) as count
+FROM <table_name>
+GROUP BY 1, 2
+ORDER BY count DESC
+```
+
+## Component Reference
+
+| Component | Use Case | Key Props |
+|-----------|----------|-----------|
+| BigValue | KPI cards | `data`, `value`, `title`, `sparkline`, `fmt` |
+| LineChart | Time series | `data`, `x`, `y`, `title`, `yAxisTitle` |
+| AreaChart | Stacked trends | `data`, `x`, `y`, `series`, `seriesColors` |
+| BarChart | Comparisons | `data`, `x`, `y`, `type=grouped`, `yFmt` |
+| DataTable | Tabular data | `data`, `search`, `totalRow`, `<Column>` |
+| CalendarHeatmap | Daily heatmap | `data`, `date`, `value` |
+| Tabs/Tab | Tabbed views | `<Tab label="Name">` |
+| DateRange | Date filter | `name`, `data`, `dates`, `defaultValue` |
+
+## DataTable with Columns
+
+```svelte
+<DataTable data={query} search=true totalRow=true>
+  <Column id="name" title="Name"/>
+  <Column id="count" title="Count" align="right"/>
+  <Column id="url" contentType=link linkLabel="View"/>
+  <Column id="score" contentType=colorscale colorScale=negative/>
+</DataTable>
+```
+
+## Stacked AreaChart with Colors
+
+```svelte
+<AreaChart
+  data={query}
+  x=date
+  y={["category_a", "category_b", "category_c"]}
+  seriesColors={['#2ecc71', '#f39c12', '#e74c3c']}
+  title="Distribution Over Time"
+/>
+```
+
+## Date Filtering Pattern
+
+```markdown
+<DateRange name="date_range" data={base_query} dates=timestamp defaultValue="Last 90 Days"/>
+
+```sql filtered_query
+SELECT * FROM nfcore_db.<table>
+WHERE timestamp >= '${inputs.date_range.start}'
+  AND timestamp <= '${inputs.date_range.end}'
+```
+
+<LineChart data={filtered_query} x=timestamp y=value/>
+```
+
+## Testing
+
+```bash
+npm run dev
+# Open http://localhost:3000/path/to/page
+```
+
+## Checklist
+
+1. [ ] Create SQL source in `sources/nfcore_db/`
+2. [ ] Create page in `pages/<category>/`
+3. [ ] Add frontmatter (title, sidebar_position)
+4. [ ] Add summary metrics (BigValue)
+5. [ ] Add time series chart (LineChart/AreaChart)
+6. [ ] Add detail table (DataTable)
+7. [ ] Test with `npm run dev`