From 11740c30d7ffcd41003a80e983e7affe18ee5d47 Mon Sep 17 00:00:00 2001 From: suoko Date: Mon, 30 Oct 2023 22:40:14 +0100 Subject: [PATCH] fix gradio and folder name --- examples/langchain-docscanner/main.py | 86 ------------------- .../askGradio.py | 0 .../langchain-python-langchain-webui/main.py | 80 +++++++++++++++++ .../readme.md | 0 .../requirements.txt | 0 5 files changed, 80 insertions(+), 86 deletions(-) delete mode 100644 examples/langchain-docscanner/main.py rename examples/{langchain-docscanner => langchain-python-langchain-webui}/askGradio.py (100%) create mode 100644 examples/langchain-python-langchain-webui/main.py rename examples/{langchain-docscanner => langchain-python-langchain-webui}/readme.md (100%) rename examples/{langchain-docscanner => langchain-python-langchain-webui}/requirements.txt (100%) diff --git a/examples/langchain-docscanner/main.py b/examples/langchain-docscanner/main.py deleted file mode 100644 index 597464ba..00000000 --- a/examples/langchain-docscanner/main.py +++ /dev/null @@ -1,86 +0,0 @@ -import gradio as gr - -import sys -import os -from collections.abc import Iterable - -from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader, UnstructuredHTMLLoader - -from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter - -from langchain.chains import RetrievalQA - -from langchain.llms import Ollama - -from langchain.vectorstores import FAISS, Chroma - -from langchain.embeddings import GPT4AllEmbeddings, CacheBackedEmbeddings - -from langchain.storage import LocalFileStore#, RedisStore, UpstashRedisStore, InMemoryStore - -ollama = Ollama(base_url='http://localhost:11434', -#model="codellama") -#model="starcoder") -model="llama2") - -docsUrl = "/home/user/dev/docs" - -documents = [] -for file in os.listdir(docsUrl): - - if file.endswith(".pdf"): - pdf_path = docsUrl + "/" + file - loader = PyPDFLoader(pdf_path) - documents.extend(loader.load()) - print("Found " + pdf_path) - elif file.endswith('.docx') or file.endswith('.doc'): - doc_path = docsUrl + "/" + file - loader = Docx2txtLoader(doc_path) - documents.extend(loader.load()) - print("Found " + doc_path) - elif file.endswith('.txt') or file.endswith('.kt') or file.endswith('.json'): - text_path = docsUrl + "/" + file - loader = TextLoader(text_path) - documents.extend(loader.load()) - print("Found " + text_path) - elif file.endswith('.html') or file.endswith('.htm'): - htm_path = docsUrl + "/" + file - loader = UnstructuredHTMLLoader(htm_path) - documents.extend(loader.load()) - print("Found " + htm_path) - - -text_splitter = CharacterTextSplitter(chunk_size=32, chunk_overlap=32) -all_splits = text_splitter.split_documents(documents) - - - -#fs = LocalFileStore("/home/gabriele/dev/cache/") - -#underlying_embeddings = GPT4AllEmbeddings() -#cached_embedder = CacheBackedEmbeddings.from_bytes_store( -# underlying_embeddings, fs, namespace=underlying_embeddings.model -#) - - - -vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings(embeddings_chunk_size=1000)) -#vectorstore = FAISS.from_documents(documents=all_splits, embedding=cached_embedder) - - -def AI_response(question, history): - docs = vectorstore.similarity_search(question) - len(docs) - qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever()) - #reply=qachain() - #reply=str(qachain({"query": question})) - reply=str(qachain.run(question)) - return reply - - - -demo = gr.ChatInterface(AI_response, title="Put your files in folder" + docsUrl) - -if __name__ == "__main__": - demo.launch(server_name="0.0.0.0", server_port=7860) - diff --git a/examples/langchain-docscanner/askGradio.py b/examples/langchain-python-langchain-webui/askGradio.py similarity index 100% rename from examples/langchain-docscanner/askGradio.py rename to examples/langchain-python-langchain-webui/askGradio.py diff --git a/examples/langchain-python-langchain-webui/main.py b/examples/langchain-python-langchain-webui/main.py new file mode 100644 index 00000000..ce6dd812 --- /dev/null +++ b/examples/langchain-python-langchain-webui/main.py @@ -0,0 +1,80 @@ +#! /usr/bin/python3.10 + +import gradio as gr +import sys +import os +import subprocess +from collections.abc import Iterable +from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader, UnstructuredHTMLLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter +from langchain.chains import RetrievalQA +from langchain.llms import Ollama +from langchain.vectorstores import FAISS, Chroma +from langchain.embeddings import GPT4AllEmbeddings, CacheBackedEmbeddings +from langchain.storage import LocalFileStore#, RedisStore, UpstashRedisStore, InMemoryStore + +docsUrl = "/home/user/dev/docs" +ollamaModel="llama2" + +def get_ollama_names(): + output = subprocess.check_output(["ollama", "list"]) + lines = output.decode("utf-8").splitlines() + names = {} + for line in lines[1:]: + name = line.split()[0].split(':')[0] + names[name] = name + return names + +names = get_ollama_names() + +def greet(name): + global ollamaModel + ollamaModel=name + return f"{name}" + +dropdown = gr.Dropdown(label="Models available", choices=names, value="llama2") +textbox = gr.Textbox(label="You chose") + +def AI_response(question, history): + ollama = Ollama(base_url='http://localhost:11434', model=ollamaModel) + print(ollamaModel) + documents = [] + for file in os.listdir(docsUrl): + if file.endswith(".pdf"): + pdf_path = docsUrl + "/" + file + loader = PyPDFLoader(pdf_path) + documents.extend(loader.load()) + print("Found " + pdf_path) + elif file.endswith('.docx') or file.endswith('.doc'): + doc_path = docsUrl + "/" + file + loader = Docx2txtLoader(doc_path) + documents.extend(loader.load()) + print("Found " + doc_path) + elif file.endswith('.txt') or file.endswith('.kt') or file.endswith('.json'): + text_path = docsUrl + "/" + file + loader = TextLoader(text_path) + documents.extend(loader.load()) + print("Found " + text_path) + elif file.endswith('.html') or file.endswith('.htm'): + htm_path = docsUrl + "/" + file + loader = UnstructuredHTMLLoader(htm_path) + documents.extend(loader.load()) + print("Found " + htm_path) + + text_splitter = CharacterTextSplitter(chunk_size=32, chunk_overlap=32) + all_splits = text_splitter.split_documents(documents) + vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings(embeddings_chunk_size=1000)) + docs = vectorstore.similarity_search(question) + len(docs) + qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever()) + reply=str(qachain.run(question)) + return reply + + + +with gr.Blocks() as demo: + interface = gr.Interface(fn=greet, inputs=[dropdown], outputs=[textbox], title="Choose a LLM model") + chat = gr.ChatInterface(AI_response, title="Put your files in folder " + docsUrl) + +demo.launch() + diff --git a/examples/langchain-docscanner/readme.md b/examples/langchain-python-langchain-webui/readme.md similarity index 100% rename from examples/langchain-docscanner/readme.md rename to examples/langchain-python-langchain-webui/readme.md diff --git a/examples/langchain-docscanner/requirements.txt b/examples/langchain-python-langchain-webui/requirements.txt similarity index 100% rename from examples/langchain-docscanner/requirements.txt rename to examples/langchain-python-langchain-webui/requirements.txt