Skip to content

Commit 56e7508

Browse files
authored
Merge pull request AmadeusITGroup#60 from dpomian/docker-image-fix
fix: update uv overrides and docker file to use python >=3.11
2 parents d835535 + 2b85178 commit 56e7508

4 files changed

Lines changed: 19 additions & 8 deletions

File tree

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
2222
&& uv python install 3.11 \
2323
&& uv venv \
2424
&& cd /tmp/build \
25-
&& uv --native-tls pip install --no-cache . \
25+
&& uv --native-tls pip install --no-cache --override /tmp/build/etc/uv-overrides.txt . \
2626
&& uv --native-tls run --script /tmp/build/etc/fetchDefaultModels.py \
2727
&& rm -rf /tmp/build
2828

etc/uv-overrides.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
numba>=0.60.0
2+
llvmlite>=0.43.0

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ dependencies = [
3232
"python-docx>=1.1.2",
3333
"python-dotenv>=1.0.1",
3434
"python-pptx>=1.0.2",
35+
"numba>=0.60.0",
3536
"unstructured>=0.14.8",
3637
"faiss-cpu>=1.11.0",
3738
"langchain_community>=0.3.18",

src/docs2vecs/subcommands/indexer/skills/recursive_character_splitter_skill.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import hashlib
22
from pathlib import Path
3-
from typing import List
4-
from typing import Optional
3+
from typing import List, Optional
54

6-
from langchain.text_splitter import RecursiveCharacterTextSplitter as LCRecursiveCharacterTextSplitter
5+
from langchain_text_splitters import (
6+
RecursiveCharacterTextSplitter as LCRecursiveCharacterTextSplitter,
7+
)
78

89
from docs2vecs.subcommands.indexer.config.config import Config
910
from docs2vecs.subcommands.indexer.document.chunk import Chunk
@@ -14,22 +15,29 @@
1415
class RecursiveCharacterTextSplitter(IndexerSkill):
1516
DEFAULT_CHUNK_SIZE = 1000
1617
DEFAULT_CHUNK_OVERLAP = 100
17-
18+
1819
def __init__(self, config: dict, global_config: Config):
1920
super().__init__(config, global_config)
2021
self._set_config_defaults()
2122

2223
def _set_config_defaults(self):
2324
if "chunk_size" not in self._config:
24-
self._config["chunk_size"] = RecursiveCharacterTextSplitter.DEFAULT_CHUNK_SIZE
25+
self._config["chunk_size"] = (
26+
RecursiveCharacterTextSplitter.DEFAULT_CHUNK_SIZE
27+
)
2528

2629
if "chunk_overlap" not in self._config:
27-
self._config["chunk_overlap"] = RecursiveCharacterTextSplitter.DEFAULT_CHUNK_OVERLAP
30+
self._config["chunk_overlap"] = (
31+
RecursiveCharacterTextSplitter.DEFAULT_CHUNK_OVERLAP
32+
)
2833

2934
def run(self, input: Optional[List[Document]] = None) -> List[Document]:
3035
self.logger.info("Running RecursiveCharacterTextSplitter...")
3136

32-
splitter = LCRecursiveCharacterTextSplitter(chunk_size=self._config["chunk_size"], chunk_overlap=self._config["chunk_overlap"])
37+
splitter = LCRecursiveCharacterTextSplitter(
38+
chunk_size=self._config["chunk_size"],
39+
chunk_overlap=self._config["chunk_overlap"],
40+
)
3341

3442
for doc in input:
3543
self.logger.debug(f"Splitting {doc.filename}...")

0 commit comments

Comments
 (0)