From efae43f932c4e77537691b1461ee0b0bfd129f68 Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Thu, 10 Aug 2023 23:35:19 -0700 Subject: [PATCH] update `langchain` examples --- examples/langchain-document/README.md | 21 ++++ examples/langchain-document/main.py | 61 +++++++++++ examples/langchain-document/requirements.txt | 109 +++++++++++++++++++ examples/langchain/README.md | 6 + examples/langchain/main.py | 2 +- 5 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 examples/langchain-document/README.md create mode 100644 examples/langchain-document/main.py create mode 100644 examples/langchain-document/requirements.txt diff --git a/examples/langchain-document/README.md b/examples/langchain-document/README.md new file mode 100644 index 00000000..20a73a88 --- /dev/null +++ b/examples/langchain-document/README.md @@ -0,0 +1,21 @@ +# LangChain Document QA + +This example provides an interface for asking questions to a PDF document. + +## Setup + +``` +pip install -r requirements.txt +``` + +## Run + +``` +python main.py +``` + +A prompt will appear, where questions may be asked: + +``` +Query: How many locations does WeWork have? +``` diff --git a/examples/langchain-document/main.py b/examples/langchain-document/main.py new file mode 100644 index 00000000..b9f98c4e --- /dev/null +++ b/examples/langchain-document/main.py @@ -0,0 +1,61 @@ +from langchain.document_loaders import OnlinePDFLoader +from langchain.vectorstores import Chroma +from langchain.embeddings import GPT4AllEmbeddings +from langchain import PromptTemplate +from langchain.llms import Ollama +from langchain.callbacks.manager import CallbackManager +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler +from langchain.chains import RetrievalQA +import sys +import os + +class SuppressStdout: + def __enter__(self): + self._original_stdout = sys.stdout + self._original_stderr = sys.stderr + sys.stdout = open(os.devnull, 'w') + sys.stderr = open(os.devnull, 'w') + + def __exit__(self, exc_type, exc_val, exc_tb): + sys.stdout.close() + sys.stdout = self._original_stdout + sys.stderr = self._original_stderr + +# load the pdf and split it into chunks +loader = OnlinePDFLoader("https://d18rn0p25nwr6d.cloudfront.net/CIK-0001813756/975b3e9b-268e-4798-a9e4-2a9a7c92dc10.pdf") +data = loader.load() + +from langchain.text_splitter import RecursiveCharacterTextSplitter +text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) +all_splits = text_splitter.split_documents(data) + +with SuppressStdout(): + vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings()) + +while True: + query = input("\nQuery: ") + if query == "exit": + break + if query.strip() == "": + continue + + # Prompt + template = """Use the following pieces of context to answer the question at the end. + If you don't know the answer, just say that you don't know, don't try to make up an answer. + Use three sentences maximum and keep the answer as concise as possible. + {context} + Question: {question} + Helpful Answer:""" + QA_CHAIN_PROMPT = PromptTemplate( + input_variables=["context", "question"], + template=template, + ) + + llm = Ollama(model="llama2:13b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) + qa_chain = RetrievalQA.from_chain_type( + llm, + retriever=vectorstore.as_retriever(), + chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}, + ) + + result = qa_chain({"query": query}) \ No newline at end of file diff --git a/examples/langchain-document/requirements.txt b/examples/langchain-document/requirements.txt new file mode 100644 index 00000000..09a54191 --- /dev/null +++ b/examples/langchain-document/requirements.txt @@ -0,0 +1,109 @@ +absl-py==1.4.0 +aiohttp==3.8.5 +aiosignal==1.3.1 +anyio==3.7.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.1.0 +backoff==2.2.1 +beautifulsoup4==4.12.2 +bs4==0.0.1 +cachetools==5.3.1 +certifi==2023.7.22 +cffi==1.15.1 +chardet==5.2.0 +charset-normalizer==3.2.0 +Chroma==0.2.0 +chroma-hnswlib==0.7.2 +chromadb==0.4.5 +click==8.1.6 +coloredlogs==15.0.1 +cryptography==41.0.3 +dataclasses-json==0.5.14 +fastapi==0.99.1 +filetype==1.2.0 +flatbuffers==23.5.26 +frozenlist==1.4.0 +gast==0.4.0 +google-auth==2.22.0 +google-auth-oauthlib==1.0.0 +google-pasta==0.2.0 +gpt4all==1.0.8 +grpcio==1.57.0 +h11==0.14.0 +h5py==3.9.0 +httptools==0.6.0 +humanfriendly==10.0 +idna==3.4 +importlib-resources==6.0.1 +joblib==1.3.2 +keras==2.13.1 +langchain==0.0.261 +langsmith==0.0.21 +libclang==16.0.6 +lxml==4.9.3 +Markdown==3.4.4 +MarkupSafe==2.1.3 +marshmallow==3.20.1 +monotonic==1.6 +mpmath==1.3.0 +multidict==6.0.4 +mypy-extensions==1.0.0 +nltk==3.8.1 +numexpr==2.8.5 +numpy==1.24.3 +oauthlib==3.2.2 +onnxruntime==1.15.1 +openapi-schema-pydantic==1.2.4 +opt-einsum==3.3.0 +overrides==7.4.0 +packaging==23.1 +pdf2image==1.16.3 +pdfminer==20191125 +pdfminer.six==20221105 +Pillow==10.0.0 +posthog==3.0.1 +protobuf==4.24.0 +pulsar-client==3.2.0 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pycparser==2.21 +pycryptodome==3.18.0 +pydantic==1.10.12 +PyPika==0.48.9 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-magic==0.4.27 +PyYAML==6.0.1 +regex==2023.8.8 +requests==2.31.0 +requests-oauthlib==1.3.1 +rsa==4.9 +six==1.16.0 +sniffio==1.3.0 +soupsieve==2.4.1 +SQLAlchemy==2.0.19 +starlette==0.27.0 +sympy==1.12 +tabulate==0.9.0 +tenacity==8.2.2 +tensorboard==2.13.0 +tensorboard-data-server==0.7.1 +tensorflow==2.13.0 +tensorflow-estimator==2.13.0 +tensorflow-hub==0.14.0 +tensorflow-macos==2.13.0 +termcolor==2.3.0 +tokenizers==0.13.3 +tqdm==4.66.1 +typing-inspect==0.9.0 +typing_extensions==4.5.0 +unstructured==0.9.2 +urllib3==1.26.16 +uvicorn==0.23.2 +uvloop==0.17.0 +watchfiles==0.19.0 +websockets==11.0.3 +Werkzeug==2.3.6 +wrapt==1.15.0 +yarl==1.9.2 diff --git a/examples/langchain/README.md b/examples/langchain/README.md index 0319a191..7fa84a3a 100644 --- a/examples/langchain/README.md +++ b/examples/langchain/README.md @@ -13,3 +13,9 @@ pip install -r requirements.txt ``` python main.py ``` + +Running this example will print the response for "hello": + +``` +Hello! It's nice to meet you. hopefully you are having a great day! Is there something I can help you with or would you like to chat? +``` diff --git a/examples/langchain/main.py b/examples/langchain/main.py index 620c13cb..c8cde83b 100644 --- a/examples/langchain/main.py +++ b/examples/langchain/main.py @@ -1,4 +1,4 @@ from langchain.llms import Ollama llm = Ollama(model="llama2") -res = llm.predict("hi!") +res = llm.predict("hello") print (res)