diff --git a/RAG/notebooks/langchain/RAG_Langchain_with_Local_NIM.ipynb b/RAG/notebooks/langchain/RAG_Langchain_with_Local_NIM.ipynb index 550102dc..e31007be 100644 --- a/RAG/notebooks/langchain/RAG_Langchain_with_Local_NIM.ipynb +++ b/RAG/notebooks/langchain/RAG_Langchain_with_Local_NIM.ipynb @@ -125,7 +125,8 @@ "!pip install --upgrade pip\n", "!pip install langchain==0.2.5\n", "!pip install langchain-nvidia-ai-endpoints==0.1.2\n", - "!pip install faiss-gpu==1.7.2 # replace with faiss-cpu if you don't have a gpu" + "!pip install faiss-gpu==1.7.2 # replace with faiss-cpu if you don't have a gpu\n", + "!pip install langchain-core==0.2.6 langchain-community" ] }, { @@ -152,6 +153,36 @@ " os.environ[\"NVIDIA_API_KEY\"] = nvapi_key" ] }, + { + "cell_type": "markdown", + "id": "15e9c168", + "metadata": {}, + "source": [ + "Configure the model options" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a8692b6", + "metadata": {}, + "outputs": [], + "source": [ + "chat_model_options = {\n", + " \"base_url\": \"http://0.0.0.0:8000/v1\",\n", + " \"model\": \"meta/llama3-8b-instruct\",\n", + " \"temperature\": 0.1,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1.0\n", + "}\n", + "\n", + "embeddings_model_options = {\n", + " \"base_url\": \"http://0.0.0.0:8000/v1\",\n", + " \"model\": \"NV-Embed-QA\",\n", + " \"truncate\": \"END\"\n", + "}" + ] + }, { "cell_type": "markdown", "id": "5584e3b1", @@ -169,7 +200,7 @@ "source": [ "from langchain_nvidia_ai_endpoints import ChatNVIDIA\n", "\n", - "llm = ChatNVIDIA(base_url=\"http://0.0.0.0:8000/v1\", model=\"meta/llama3-8b-instruct\", temperature=0.1, max_tokens=1000, top_p=1.0)\n", + "llm = ChatNVIDIA(**chat_model_options)\n", "\n", "result = llm.invoke(\"What is the capital of France?\")\n", "print(result.content)" @@ -342,7 +373,7 @@ " Returns:\n", " None\n", " \"\"\"\n", - " embeddings = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")\n", + " embeddings = NVIDIAEmbeddings(**embedding_model_options)\n", "\n", " for document in documents:\n", " texts = splitter.split_text(document.page_content)\n", @@ -378,11 +409,9 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", "create_embeddings()\n", "\n", - "embedding_model = NVIDIAEmbeddings(model=\"NV-Embed-QA\", truncate=\"END\")\n" + "embedding_model = NVIDIAEmbeddings(**embedding_options)\n" ] }, { @@ -420,7 +449,7 @@ "metadata": {}, "outputs": [], "source": [ - "llm = ChatNVIDIA(base_url=\"http://0.0.0.0:8000/v1\", model=\"meta/llama3-8b-instruct\", temperature=0.1, max_tokens=1000, top_p=1.0)\n", + "llm = ChatNVIDIA(**chat_model_options)\n", "\n", "memory = ConversationBufferMemory(memory_key=\"chat_history\", return_messages=True)\n", "\n",