Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
aab0378
added faiss search functionality
TylerWilsonHC-SC Aug 26, 2024
ed2ed23
added test cases for search step
TylerWilsonHC-SC Aug 26, 2024
8cf6377
added demo notebook for faiss search pipeline
TylerWilsonHC-SC Aug 26, 2024
d93fb18
added variance metric to dataset variability
TylerWilsonHC-SC Aug 26, 2024
6a627b9
added cosine similarity metric to dataset variability
TylerWilsonHC-SC Aug 26, 2024
3b9d16b
added dataset variability to faiss-search init
TylerWilsonHC-SC Aug 26, 2024
7dd458c
added normalize feature to dataset variability
TylerWilsonHC-SC Aug 26, 2024
3f0ee75
created pyproject.toml file
TylerWilsonHC-SC Aug 26, 2024
807f59d
add build and wheel to requirements
TylerWilsonHC-SC Aug 26, 2024
1203389
remove requires wheel
TylerWilsonHC-SC Aug 26, 2024
5a93831
placed wheel and build in the pyproject requirements
TylerWilsonHC-SC Aug 26, 2024
c29be0c
removed wheel and build and reformatted file processing
TylerWilsonHC-SC Aug 26, 2024
bdb764a
update gitignore
TylerWilsonHC-SC Aug 26, 2024
51a28a2
created dist files
TylerWilsonHC-SC Aug 26, 2024
572b237
update name in pyproject
TylerWilsonHC-SC Aug 26, 2024
8c2838b
bug fix: normalize function not called correctly
TylerWilsonHC-SC Aug 26, 2024
56bb004
Merge branch 'dataset-variability' into packaging
TylerWilsonHC-SC Aug 26, 2024
b71c095
added test document for dataset variability
TylerWilsonHC-SC Aug 26, 2024
f435c01
added test cases for dataset variability
TylerWilsonHC-SC Aug 26, 2024
1c57c84
Merge branch 'dataset-variability' into packaging
TylerWilsonHC-SC Aug 26, 2024
2266ea7
remove unused import
TylerWilsonHC-SC Aug 26, 2024
579925e
Merge branch 'dataset-variability' into packaging
TylerWilsonHC-SC Aug 26, 2024
bfa5bde
add gguf functionality to _embed_string
TylerWilsonHC-SC Aug 26, 2024
b43b2f8
add gguf functionality to load_embedding_model
TylerWilsonHC-SC Aug 26, 2024
2a26fff
add is_gguf field
TylerWilsonHC-SC Aug 26, 2024
288f150
remove unused variable
TylerWilsonHC-SC Aug 26, 2024
420b08b
setup in docker
TylerWilsonHC-SC Aug 26, 2024
9ab3b8f
error handling for llama-cpp in docker vs. venv setups
TylerWilsonHC-SC Aug 26, 2024
2581035
added overview and installation info
TylerWilsonHC-SC Aug 28, 2024
74433ff
added overview and installation info
TylerWilsonHC-SC Aug 28, 2024
0f8f695
copied documentation from file-processing
TylerWilsonHC-SC Aug 28, 2024
3f9da65
updated documentation for loading gguf files
TylerWilsonHC-SC Aug 28, 2024
7759ce8
added faiss index documentation
TylerWilsonHC-SC Aug 28, 2024
12dc2b2
added the dataset variability to documentation
TylerWilsonHC-SC Aug 28, 2024
c2cb9f7
updated import statements in documentation
TylerWilsonHC-SC Aug 28, 2024
fd44971
added future work and limitations
TylerWilsonHC-SC Aug 28, 2024
2e715f2
updated future work
TylerWilsonHC-SC Aug 28, 2024
ff01d15
added docstrings to the dataset variability class
TylerWilsonHC-SC Aug 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 18 additions & 18 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
{
"name": "My Container",
"build": {
"context": "..",
"dockerfile": "../Dockerfile"
},
"runArgs": [
"--gpus", "all"
],
"forwardPorts": [8888],
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter"
]
}
}
{
"name": "My Container",
"build": {
"context": "..",
"dockerfile": "../Dockerfile"
},
"runArgs": [
"--gpus", "all"
],
"forwardPorts": [8888],
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter"
]
}
}
}
74 changes: 48 additions & 26 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,27 +1,49 @@
# notebooks and generated files
test_import.ipynb

# byte-compiled
__pycache__/

# environments
.venv
venv/

# unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
.pylintrc

# vscode
# notebooks and generated files
test_import.ipynb

# byte-compiled
__pycache__/
*.py[cod]
*$py.class

# Distribution / packaging
.Python
build/
develop-eggs/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# environments
.venv
venv/

# unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
.pylintrc

# vscode
.vscode/*.*
86 changes: 43 additions & 43 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,44 +1,44 @@
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04

# Set working directory
WORKDIR /workspace

# Set non-interactive installation mode and configure timezone
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC

# Environment variables for Llama library
ENV LLAMA_CUBLAS=1
ENV CMAKE_ARGS=-DLLAMA_CUBLAS=on
ENV FORCE_CMAKE=1

# Install Python, build tools, compilers, and git
RUN apt-get update && apt-get install -y \
python3-pip \
python3-dev \
build-essential \
cmake \
libblas-dev \
liblapack-dev \
gfortran \
git \
&& rm -rf /var/lib/apt/lists/*

# Update pip and install wheel
RUN python3 -m pip install --upgrade pip wheel

# Install Requirements
COPY requirements.txt .
RUN python3 -m pip install -r requirements.txt

# Special installation for llama-cpp-python with GPU support
RUN pip install llama-cpp-python==0.2.55 --no-cache-dir --force-reinstall --verbose

# Force specific numpy version
RUN python3 -m pip install numpy==1.26.2 --no-cache-dir --force-reinstall

# Expose Jupyter port
EXPOSE 8888

# Start Jupyter Notebook
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
# Set working directory
WORKDIR /workspace
# Set non-interactive installation mode and configure timezone
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=UTC
# Environment variables for Llama library
ENV LLAMA_CUBLAS=1
ENV CMAKE_ARGS=-DLLAMA_CUBLAS=on
ENV FORCE_CMAKE=1
# Install Python, build tools, compilers, and git
RUN apt-get update && apt-get install -y \
python3-pip \
python3-dev \
build-essential \
cmake \
libblas-dev \
liblapack-dev \
gfortran \
git \
&& rm -rf /var/lib/apt/lists/*
# Update pip and install wheel
RUN python3 -m pip install --upgrade pip wheel
# Install Requirements
COPY requirements.txt .
RUN python3 -m pip install -r requirements.txt
# Special installation for llama-cpp-python with GPU support
RUN pip install llama-cpp-python==0.2.55 --no-cache-dir --force-reinstall --verbose
# Force specific numpy version
RUN python3 -m pip install numpy==1.26.2 --no-cache-dir --force-reinstall
# Expose Jupyter port
EXPOSE 8888
# Start Jupyter Notebook
CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--no-browser", "--allow-root"]
42 changes: 21 additions & 21 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
MIT License

Copyright (c) 2024 hc-sc-ocdo-bdpd

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
MIT License
Copyright (c) 2024 hc-sc-ocdo-bdpd
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Loading