Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
OPENAI_MODEL_ID=gpt-4.1-nano
OPENAI_API_KEY=<str>
OLLAMA_MODEL_ID=llama3.2:3b#replace with your model
Copy link

Copilot AI Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing space after the comment delimiter. Should be llama3.2:3b # replace with your model for consistency with other comment formatting.

Copilot uses AI. Check for mistakes.
# OLLAMA_API_URL=http://localhost:11434

# Huggingface API Config
HUGGINGFACE_ACCESS_TOKEN=<str>
Expand All @@ -18,6 +19,11 @@ DATABASE_NAME=ghost_db
QDRANT_DATABASE_HOST=localhost
QDRANT_DATABASE_PORT=6333

# Qdrant Cloud Config
USE_QDRANT_CLOUD=true
QDRANT_CLOUD_URL=your_qdrant_cloud_url
QDRANT_APIKEY=your_qdrant_api_key

# uvicorn Config
UVICORN_HOST=0.0.0.0
UVICORN_PORT=80000
Expand Down
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,10 @@ marimo/_lsp/
__marimo__/

# Local
backup
assets/
backup/
configs/feature_engineering.yaml
configs/digital_data_etl_huyen-chip.yaml
configs/digital_data_etl_jay_alammar.yaml
output/
tests/payloads/payload.json
10 changes: 10 additions & 0 deletions configs/digital_data_etl_author_name.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
parameters:
user_full_name: John Doe # [First Name(s)] [Last Name]
links:
# blog Posts
- https://johndoe.blog/post1
- https://johndoe.blog/post2
# github Repositories
Comment on lines +4 to +7
Copy link

Copilot AI Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent comment formatting: "blog Posts" should be "Blog Posts" or "blog posts" to match the capitalization pattern used for "github Repositories" below (which should likely be "GitHub Repositories").

Suggested change
# blog Posts
- https://johndoe.blog/post1
- https://johndoe.blog/post2
# github Repositories
# Blog Posts
- https://johndoe.blog/post1
- https://johndoe.blog/post2
# GitHub Repositories

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inconsistent capitalization: "github Repositories" should be "GitHub Repositories" to properly capitalize the brand name and maintain consistency.

Suggested change
# github Repositories
# GitHub Repositories

Copilot uses AI. Check for mistakes.
- https://github.com/johndoe/repo1
- https://github.com/johndoe/repo2
- https://github.com/johndoe/awesome-project
4 changes: 4 additions & 0 deletions configs/feature_engineering.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
parameters:
author_full_names:
- John Doe
- Jane Smith
1 change: 1 addition & 0 deletions core/application/rag/query_expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def generate(self, query: Query, expand_to_n: int) -> list[Query]:
prompt = query_expansion_template.create_template(expand_to_n - 1) # excluding the original prompt
# todo: handle token limit exceeded error
# todo: localize the model initialization
# use either Ollama or OpenAI model
model = ChatOllama(model=settings.OLLAMA_MODEL_ID, temperature=0)
# model = ChatOpenAI(model=settings.OPENAI_MODEL_ID,
# api_key=settings.OPENAI_API_KEY,
Expand Down
3 changes: 2 additions & 1 deletion core/application/rag/self_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def generate(self, query: Query) -> Query:
return query

prompt = SelfQueryTemplate().create_template()
# use either Ollama or OpenAI model
model = ChatOllama(model=settings.OLLAMA_MODEL_ID, temperature=0)
# model = ChatOpenAI(model=settings.OPENAI_MODEL_ID,
# api_key=settings.OPENAI_API_KEY,
Expand All @@ -40,7 +41,7 @@ def generate(self, query: Query) -> Query:

# python -m core.application.rag.self_query
if __name__ == "__main__":
query = Query.from_str("I am Huyen Chip. Write an article about the best types of advanced RAG methods.")
query = Query.from_str("I am <Author Name>. Write an article about the best types of advanced RAG methods.")
self_query = SelfQuery()
query = self_query.generate(query)
logger.info(f"Extracted author_id: {query.author_id}")
Expand Down
4 changes: 2 additions & 2 deletions core/infrastructure/inference_pipeline_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from core.application.utils import misc
from core.domain.embedded_chunks import EmbeddedChunk
from core.infrastructure.opik_utils import configure_opik
from core.model.inference import InferenceExecutor, LLMInferenceTransformersLocal
from core.model.inference import InferenceExecutor, LLMInferenceTransformers

configure_opik()

Expand All @@ -25,7 +25,7 @@ class QueryResponse(BaseModel):

@opik.track
def call_llm_service(query: str, context: str | None) -> str:
llm = LLMInferenceTransformersLocal(
llm = LLMInferenceTransformers(
model_id=settings.HUGGINGFACE_INFERENCE_MODEL_ID,
)
answer = InferenceExecutor(llm, query, context).execute()
Expand Down
24 changes: 22 additions & 2 deletions core/model/finetuning/llm_ghostwriter_finetune_dpo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -563,10 +563,19 @@
]
},
"id": "MTQnOypRl4pS",
"jupyter": {
"is_executing": true
},
"outputId": "1683fb7a-37ce-41bc-ec6f-84045308a1c0"
},
"outputs": [],
"source": [
"# use low learning rate for DPO training\n",
"# otherwise it might cause corrupted responses during generation.\n",
"# larger learning rates and longer update steps are more likely to trigger this issue.\n",
"# https://github.com/huggingface/trl/issues/1025#issuecomment-1826268148\n",
"# https://kyunghyuncho.me/a-proper-preference-optimization-loss-and-its-gradient/\n",
"\n",
"trainer = DPOTrainer(\n",
" model=model,\n",
" ref_model=None,\n",
Expand All @@ -577,7 +586,7 @@
" max_length=max_seq_length // 2,\n",
" max_prompt_length=max_seq_length // 2,\n",
" args=DPOConfig(\n",
" learning_rate=2e-6,\n",
" learning_rate=2e-6, # https://github.com/huggingface/trl/issues/1025#issuecomment-1826268148\n",
" lr_scheduler_type=\"linear\",\n",
" per_device_train_batch_size=2,\n",
" per_device_eval_batch_size=2,\n",
Expand Down Expand Up @@ -642,7 +651,18 @@
"inputs = tokenizer([message], return_tensors=\"pt\").to(\"cuda\")\n",
"\n",
"text_streamer = TextStreamer(tokenizer)\n",
"_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=256, use_cache=True)"
"_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=256, use_cache=True)\n",
"\n",
"# \"\"\"\n",
"# <|begin_of_text|>Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
"#\n",
"# ### Instruction:\n",
"# Write a paragraph to introduce supervised fine-tuning.\n",
"#\n",
"# ### Response:\n",
"# Supervised fine-tuning is a technique used in natural language processing and machine learning to improve the performance of a model by training it on a large dataset of labeled examples. This process involves feeding the model examples of correct and incorrect responses, and using this information to adjust the model's parameters to better predict the correct response. Supervised fine-tuning is commonly used in tasks such as text classification, sentiment analysis, and machine translation, where the goal is to train the model to accurately classify or translate between languages. By fine-tuning the model on a labeled dataset, the model learns to better recognize and respond to the specific prompts and expectations of the task at hand.<|end_of_text|>\n",
"#\n",
"# \"\"\""
]
},
{
Expand Down
14 changes: 13 additions & 1 deletion core/model/finetuning/llm_ghostwriter_finetune_sft.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,19 @@
"inputs = tokenizer([message], return_tensors=\"pt\").to(\"cuda\")\n",
"\n",
"text_streamer = TextStreamer(tokenizer)\n",
"_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=256, use_cache=True)"
"_ = model.generate(**inputs, streamer=text_streamer, max_new_tokens=256, use_cache=True)\n",
"\n",
"\n",
"# \"\"\"\n",
"# <|begin_of_text|>Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
"#\n",
"# ### Instruction:\n",
"# Write a paragraph to introduce supervised fine-tuning.\n",
"#\n",
"# ### Response:\n",
"# Supervised fine-tuning is a technique used to improve a language model by teaching it to predict the next word based on examples provided by a labeled dataset. This process involves training the model to understand the relationships between words and how to predict the next one given a set of previous tokens. Supervised fine-tuning is commonly used with language models like GPT-3 and is considered a state-of-the-art approach for text generation.<|end_of_text|>\n",
"#\n",
"# \"\"\""
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions core/model/inference/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .inference import LLMInferenceTransformersLocal
from .inference import LLMInferenceTransformers
from .run import InferenceExecutor

__all__ = ["InferenceExecutor", "LLMInferenceTransformersLocal"]
__all__ = ["InferenceExecutor", "LLMInferenceTransformers"]
3 changes: 1 addition & 2 deletions core/model/inference/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa
return False


class LLMInferenceTransformersLocal(Inference):
class LLMInferenceTransformers(Inference):
"""Huggingface Transformers local model inference"""

_shared_model = None
Expand All @@ -53,7 +53,6 @@ def __init__(

self.model_id = model_id or settings.HUGGINGFACE_INFERENCE_MODEL_ID

# --- Device & dtype (safer defaults on MPS) ---
self._device = device or settings.HF_MODEL_DEVICE
if torch_dtype is None:
if self._device == "mps":
Expand Down
4 changes: 2 additions & 2 deletions core/model/inference/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from loguru import logger

from core.model.inference.inference import LLMInferenceTransformersLocal
from core.model.inference.inference import LLMInferenceTransformers
from core.model.inference.run import InferenceExecutor
from core.settings import settings

Expand All @@ -19,7 +19,7 @@
### Response:
"""
logger.info(f"Running inference for text: '{text}'")
llm = LLMInferenceTransformersLocal(
llm = LLMInferenceTransformers(
model_id=settings.HUGGINGFACE_INFERENCE_MODEL_ID,
)
answer = InferenceExecutor(llm, text).execute()
Expand Down
Loading
Loading