From 6af78dfa897fca7eb53c189bc2ac046c971faed5 Mon Sep 17 00:00:00 2001 From: ahmedshahriar Date: Fri, 7 Nov 2025 00:44:56 -0700 Subject: [PATCH 1/2] feat: add example configuration files and update inference class references --- .env.example | 6 ++- .github/workflows/cd.yml | 50 +++++++++++++++++++ .../digital_data_etl_author_name.yaml.example | 4 +- core/settings.py | 2 +- 4 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/cd.yml diff --git a/.env.example b/.env.example index c19ba6b..a272ca5 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,13 @@ OPENAI_MODEL_ID=gpt-4.1-nano OPENAI_API_KEY= -OLLAMA_MODEL_ID=llama3.2:3b#replace with your model +# replace with your model +OLLAMA_MODEL_ID=llama3.2:3b # OLLAMA_API_URL=http://localhost:11434 # Huggingface API Config HUGGINGFACE_ACCESS_TOKEN= -HUGGINGFACE_INFERENCE_MODEL_ID=ahmedshahriar/GhostWriterLlama-3.2-1B-DPO#replace with your model +# replace with your model +HUGGINGFACE_INFERENCE_MODEL_ID=ahmedshahriar/GhostWriterLlama-3.2-1B-DPO # Comet ML (during training) COMET_API_KEY= diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..1592a9d --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,50 @@ +name: Publish Docker image to GHCR + +on: + push: + # Uncomment to restrict to main branch only +# branches: ["main"] + tags: ["v*"] # e.g. v1.2.3 ---> release builds + workflow_dispatch: {} # manual trigger if needed + +permissions: + contents: read + packages: write + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} # -> ghcr.io// + +jobs: + build-and-push: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/configs/digital_data_etl_author_name.yaml.example b/configs/digital_data_etl_author_name.yaml.example index 12702f1..4668287 100644 --- a/configs/digital_data_etl_author_name.yaml.example +++ b/configs/digital_data_etl_author_name.yaml.example @@ -1,10 +1,10 @@ parameters: user_full_name: John Doe # [First Name(s)] [Last Name] links: - # blog Posts + # Blog Posts - https://johndoe.blog/post1 - https://johndoe.blog/post2 - # github Repositories + # GitHub Repositories - https://github.com/johndoe/repo1 - https://github.com/johndoe/repo2 - https://github.com/johndoe/awesome-project diff --git a/core/settings.py b/core/settings.py index 3d1f4f9..0581dfd 100644 --- a/core/settings.py +++ b/core/settings.py @@ -46,7 +46,7 @@ class Settings(BaseSettings): # RAG TEXT_EMBEDDING_MODEL_ID: str = "sentence-transformers/all-MiniLM-L6-v2" - RERANKING_CROSS_ENCODER_MODEL_ID: str = "cross-encoder/ms-marco-MiniLM-L-4-v2" + RERANKING_CROSS_ENCODER_MODEL_ID: str = "cross-encoder/ms-marco-MiniLM-L4-v2" RAG_MODEL_DEVICE: str = ( "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" ) From 4f1b48ff3c26085e0ba3a2419c0d787a0bd1a645 Mon Sep 17 00:00:00 2001 From: ahmedshahriar Date: Fri, 7 Nov 2025 00:46:41 -0700 Subject: [PATCH 2/2] feat: update config files and add GitHub Actions workflow for Docker img --- .env.example | 6 ++- .github/workflows/cd.yml | 50 +++++++++++++++++++ .../digital_data_etl_author_name.yaml.example | 4 +- core/settings.py | 2 +- 4 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/cd.yml diff --git a/.env.example b/.env.example index c19ba6b..a272ca5 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,13 @@ OPENAI_MODEL_ID=gpt-4.1-nano OPENAI_API_KEY= -OLLAMA_MODEL_ID=llama3.2:3b#replace with your model +# replace with your model +OLLAMA_MODEL_ID=llama3.2:3b # OLLAMA_API_URL=http://localhost:11434 # Huggingface API Config HUGGINGFACE_ACCESS_TOKEN= -HUGGINGFACE_INFERENCE_MODEL_ID=ahmedshahriar/GhostWriterLlama-3.2-1B-DPO#replace with your model +# replace with your model +HUGGINGFACE_INFERENCE_MODEL_ID=ahmedshahriar/GhostWriterLlama-3.2-1B-DPO # Comet ML (during training) COMET_API_KEY= diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..1592a9d --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,50 @@ +name: Publish Docker image to GHCR + +on: + push: + # Uncomment to restrict to main branch only +# branches: ["main"] + tags: ["v*"] # e.g. v1.2.3 ---> release builds + workflow_dispatch: {} # manual trigger if needed + +permissions: + contents: read + packages: write + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} # -> ghcr.io// + +jobs: + build-and-push: + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/configs/digital_data_etl_author_name.yaml.example b/configs/digital_data_etl_author_name.yaml.example index 12702f1..4668287 100644 --- a/configs/digital_data_etl_author_name.yaml.example +++ b/configs/digital_data_etl_author_name.yaml.example @@ -1,10 +1,10 @@ parameters: user_full_name: John Doe # [First Name(s)] [Last Name] links: - # blog Posts + # Blog Posts - https://johndoe.blog/post1 - https://johndoe.blog/post2 - # github Repositories + # GitHub Repositories - https://github.com/johndoe/repo1 - https://github.com/johndoe/repo2 - https://github.com/johndoe/awesome-project diff --git a/core/settings.py b/core/settings.py index 3d1f4f9..0581dfd 100644 --- a/core/settings.py +++ b/core/settings.py @@ -46,7 +46,7 @@ class Settings(BaseSettings): # RAG TEXT_EMBEDDING_MODEL_ID: str = "sentence-transformers/all-MiniLM-L6-v2" - RERANKING_CROSS_ENCODER_MODEL_ID: str = "cross-encoder/ms-marco-MiniLM-L-4-v2" + RERANKING_CROSS_ENCODER_MODEL_ID: str = "cross-encoder/ms-marco-MiniLM-L4-v2" RAG_MODEL_DEVICE: str = ( "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" )