Skip to content

Commit c1306e6

Browse files
authored
Merge pull request #89 from BillFarber/task/extendExamples
Added an example of a retriever with a contextual query
2 parents 94b2beb + 102a34c commit c1306e6

File tree

4 files changed

+138
-8
lines changed

4 files changed

+138
-8
lines changed

examples/langchain/README.md

+21-7
Original file line numberDiff line numberDiff line change
@@ -57,22 +57,36 @@ AZURE_LLM_DEPLOYMENT_MODEL=gpt-35-turbo
5757

5858
## Testing using a retriever with a basic query
5959

60-
You are now ready to test the example retriever. Run the following to ask a question with the
61-
results augmented via the `marklogic_retriever.py` module in this project; you will be
62-
prompted for an AzureOpenAI API key when you run this, which you can type or paste in:
60+
You are now ready to test the example retriever. Run the following to ask a question
61+
with the results augmented via the `marklogic_similar_query_retriever.py` module in this
62+
project:
6363

6464
python ask_similar_query.py "What is task decomposition?" posts
6565

66-
The retriever uses a [cts.similarQuery](https://docs.marklogic.com/cts.similarQuery) to select from the documents
67-
loaded via `load_data.py`. It defaults to a page length of 10. You can change this by providing a command line
68-
argument - e.g.:
66+
The retriever uses a [cts.similarQuery](https://docs.marklogic.com/cts.similarQuery) to
67+
select from the documents loaded via `load_data.py`. It defaults to a page length of 10.
68+
You can change this by providing a command line argument - e.g.:
6969

7070
python ask_similar_query.py "What is task decomposition?" posts 15
7171

7272
Example of a question for the "sotu" (State of the Union speech) collection:
7373

7474
python ask_similar_query.py "What are economic sanctions?" sotu 20
7575

76-
To use a word query instead of a similar query, along with a set of drop words, specify "word" as the 4th argument:
76+
To use a word query instead of a similar query, along with a set of drop words, specify
77+
"word" as the 4th argument:
7778

7879
python ask_similar_query.py "What are economic sanctions?" sotu 20 word
80+
81+
## Testing using a retriever with a contextual query
82+
83+
There may be times when your langchain application needs to use both a question and a
84+
structured query during the document retrieval process. To see an example of this, run
85+
the following to ask a question. That question is combined with a hard-coded structured
86+
query using the `marklogic_contextual_query_retriever.py` module in this project.
87+
88+
python ask_contextual_query.py "What is task decomposition?" posts
89+
90+
This retriever builds a term-query using words from the question. Then the term-query is
91+
added to the structured query and the merged query is used to select from the documents
92+
loaded via `load_data.py`.
+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Based on example at
2+
# https://python.langchain.com/docs/use_cases/question_answering/quickstart .
3+
4+
import os
5+
import sys
6+
from dotenv import load_dotenv
7+
from langchain import hub
8+
from langchain_openai import AzureChatOpenAI
9+
from langchain.schema import StrOutputParser
10+
from langchain.schema.runnable import RunnablePassthrough
11+
from marklogic import Client
12+
from marklogic_contextual_query_retriever import (
13+
MarkLogicContextualQueryRetriever,
14+
)
15+
16+
17+
def format_docs(docs):
18+
return "\n\n".join(doc.page_content for doc in docs)
19+
20+
21+
question = sys.argv[1]
22+
23+
retriever = MarkLogicContextualQueryRetriever.create(
24+
Client("http://localhost:8003", digest=("langchain-user", "password"))
25+
)
26+
retriever.collections = [sys.argv[2]]
27+
retriever.max_results = int(sys.argv[3]) if len(sys.argv) > 3 else 10
28+
if len(sys.argv) > 4:
29+
retriever.query_type = sys.argv[4]
30+
31+
load_dotenv()
32+
33+
prompt = hub.pull("rlm/rag-prompt")
34+
# Note that the Azure OpenAI API key, the Azure OpenAI Endpoint, and the OpenAI API
35+
# Version, are all read from the environment automatically.
36+
llm = AzureChatOpenAI(
37+
model_name=os.getenv("AZURE_LLM_DEPLOYMENT_NAME"),
38+
azure_deployment=os.getenv("AZURE_LLM_DEPLOYMENT_NAME"),
39+
temperature=0,
40+
max_tokens=None,
41+
timeout=None,
42+
)
43+
44+
contextual_query = {
45+
"query": {
46+
"queries": [
47+
{
48+
"near-query": [
49+
{"word-query": ["role"]},
50+
{"word-query": ["assistant"]},
51+
]
52+
},
53+
]
54+
},
55+
}
56+
chat_context = {"question": question, "contextual_query": contextual_query}
57+
58+
59+
def get_question():
60+
return chat_context["question"]
61+
62+
63+
rag_chain = (
64+
{
65+
"context": retriever | format_docs,
66+
"question": RunnablePassthrough().pick("question"),
67+
}
68+
| prompt
69+
| llm
70+
| StrOutputParser()
71+
)
72+
print(rag_chain.invoke(input=chat_context))

examples/langchain/docker-compose.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
version: '3.8'
2-
name: marklogic_langchain
2+
name: marklogic_python_example_langchain
33

44
services:
55

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from typing import List
2+
from langchain_core.documents import Document
3+
from langchain_core.retrievers import (
4+
BaseRetriever,
5+
)
6+
from marklogic import Client
7+
8+
"""
9+
Modeled after
10+
https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/retrievers/elastic_search_bm25.py ,
11+
which uses a `create` method instead of __init__.
12+
"""
13+
14+
15+
class MarkLogicContextualQueryRetriever(BaseRetriever):
16+
17+
client: Client
18+
max_results: int = 10
19+
collections: List[str] = []
20+
21+
@classmethod
22+
def create(cls, client: Client):
23+
return cls(client=client)
24+
25+
def _get_relevant_documents(
26+
self,
27+
chat_context: object,
28+
) -> List[Document]:
29+
search_words = []
30+
for word in chat_context["question"].split():
31+
search_words.append(word.lower().replace("?", ""))
32+
term_query = {"term-query": {"text": search_words}}
33+
34+
print(f"contextual_query: {chat_context['contextual_query']}")
35+
chat_context["contextual_query"]["query"]["queries"].append(term_query)
36+
37+
print(f"Searching with query: {chat_context['contextual_query']}")
38+
results = self.client.documents.search(
39+
query=chat_context["contextual_query"],
40+
page_length=self.max_results,
41+
collections=self.collections,
42+
)
43+
print(f"Count of matching MarkLogic documents: {len(results)}")
44+
return map(lambda doc: Document(page_content=doc.content), results)

0 commit comments

Comments
 (0)