diff --git a/examples/langchain-python-cypher/README.md b/examples/langchain-python-cypher/README.md new file mode 100644 index 00000000..7e1e1935 --- /dev/null +++ b/examples/langchain-python-cypher/README.md @@ -0,0 +1,16 @@ +## About + +Here, you'll learn how to generate Cypher queries using the Llama3 model to query a Neo4j graph database. The process begins with loading data into the Neo4j database through the Neo4j Admin console. After this, graph nodes and relationships are established using Cypher commands provided in the data_cypher file. Subsequently, the GraphCypherQAChain and Llama3 model, combined with custom prompt engineering, are employed to generate and parse queries into a human-readable format. This approach allows you to extract insights from the graph data, which, in this scenario, pertains to a hypothetical gaming company. + +Sample csv files are provided in the data folder. + +For the question in this script following cypher query is generated and response also formatted into human readable Q&A: +``` +Generated Cypher: +MATCH (g:Gamer)-[:PURCHASED]-(p:Purchase)-[:CONTAINS]->(game:Game) WHERE game.Game_Name = "FIFA 21" RETURN g.Gamer_ID, g.First_Name, g.Last_Name; +Full Context: +[{'g.Gamer_ID': '10', 'g.First_Name': 'Patricia', 'g.Last_Name': 'Martinez'}, {'g.Gamer_ID': '8', 'g.First_Name': 'Laura', 'g.Last_Name': 'Miller'}, {'g.Gamer_ID': '2', 'g.First_Name': 'Jane', 'g.Last_Name': 'Smith'}, {'g.Gamer_ID': '3', 'g.First_Name': 'Robert', 'g.Last_Name': 'Brown'}, {'g.Gamer_ID': '9', 'g.First_Name': 'James', 'g.Last_Name': 'Davis'}, {'g.Gamer_ID': '1', 'g.First_Name': 'John', 'g.Last_Name': 'Doe'}, {'g.Gamer_ID': '8', 'g.First_Name': 'Laura', 'g.Last_Name': 'Miller'}, {'g.Gamer_ID': '10', 'g.First_Name': 'Patricia', 'g.Last_Name': 'Martinez'}, {'g.Gamer_ID': '6', 'g.First_Name': 'Emily', 'g.Last_Name': 'Jones'}] + +> Finished chain. +{'query': "which players are interested in the game 'FIFA 21'?", 'result': 'Based on the provided query results, I can tell you that the following gamers are interested in the game "FIFA 21":\n\n* Patricia Martinez (Gamer ID: 10)\n* Laura Miller (Gamer ID: 8)\n* Jane Smith (Gamer ID: 2)\n* Robert Brown (Gamer ID: 3)\n* James Davis (Gamer ID: 9)\n* John Doe (Gamer ID: 1)\n* Emily Jones (Gamer ID: 6)\n\nNote that there is no additional information provided about their level of interest or any specific details about their involvement with the game.'} +``` \ No newline at end of file diff --git a/examples/langchain-python-cypher/data/gamers.csv b/examples/langchain-python-cypher/data/gamers.csv new file mode 100644 index 00000000..28d23dca --- /dev/null +++ b/examples/langchain-python-cypher/data/gamers.csv @@ -0,0 +1,11 @@ +Gamer_ID,First_Name,Last_Name,Email,Phone,City,Country +1,John,Doe,john.doe@example.com,1234567890,New York,USA +2,Jane,Smith,jane.smith@example.com,0987654321,Los Angeles,USA +3,Robert,Brown,robert.brown@example.com,1231231234,Chicago,USA +4,Linda,Johnson,linda.johnson@example.com,4321432143,Houston,USA +5,Michael,Williams,michael.williams@example.com,5678567856,Phoenix,USA +6,Emily,Jones,emily.jones@example.com,6789678967,Philadelphia,USA +7,David,Garcia,david.garcia@example.com,7890789078,San Antonio,USA +8,Laura,Miller,laura.miller@example.com,8901890189,San Diego,USA +9,James,Davis,james.davis@example.com,9012901290,Dallas,USA +10,Patricia,Martinez,patricia.martinez@example.com,1234123412,San Jose,USA diff --git a/examples/langchain-python-cypher/data/games.csv b/examples/langchain-python-cypher/data/games.csv new file mode 100644 index 00000000..36e36bc0 --- /dev/null +++ b/examples/langchain-python-cypher/data/games.csv @@ -0,0 +1,11 @@ +Game_ID,Game_Name,Category,Price +1,Chess,Strategy,10 +2,FIFA 21,Sports,50 +3,FIFA 21,Action,60 +4,Minecraft,Adventure,30 +5,The Sims,Simulation,40 +6,Fortnite,Battle Royale,0 +7,League of Legends,MOBA,0 +8,Among Us,Party,5 +9,Cyberpunk 2077,RPG,70 +10,FIFA 21,Shooter,0 diff --git a/examples/langchain-python-cypher/data/purchases.csv b/examples/langchain-python-cypher/data/purchases.csv new file mode 100644 index 00000000..8b246a95 --- /dev/null +++ b/examples/langchain-python-cypher/data/purchases.csv @@ -0,0 +1,31 @@ +Purchase_ID,Gamer_ID,Game_ID,Quantity,Purchase_Date +1,1,1,1,2021-07-10 +2,2,2,2,2021-08-15 +3,3,3,1,2021-09-20 +4,4,4,2,2021-10-05 +5,5,5,1,2021-11-11 +6,6,6,1,2021-12-22 +7,7,7,3,2022-01-13 +8,8,8,2,2022-02-14 +9,9,9,1,2022-03-25 +10,10,10,2,2022-04-30 +11,1,3,1,2022-05-18 +12,2,4,2,2022-06-27 +13,3,5,1,2022-07-16 +14,4,6,3,2022-08-03 +15,5,7,2,2022-09-14 +16,6,8,1,2022-10-22 +17,7,9,2,2022-11-11 +18,8,10,1,2022-12-25 +19,9,1,3,2023-01-10 +20,10,2,1,2023-02-19 +21,1,5,1,2023-03-23 +22,2,6,2,2023-04-17 +23,3,7,3,2023-05-20 +24,4,8,1,2023-06-25 +25,5,9,1,2023-07-15 +26,6,10,1,2023-08-30 +27,7,1,2,2023-09-12 +28,8,2,1,2023-10-18 +29,9,3,2,2023-11-20 +30,10,4,1,2023-12-05 diff --git a/examples/langchain-python-cypher/data_cypher b/examples/langchain-python-cypher/data_cypher new file mode 100644 index 00000000..df35d94d --- /dev/null +++ b/examples/langchain-python-cypher/data_cypher @@ -0,0 +1,42 @@ +// Create nodes for Gamers +LOAD CSV WITH HEADERS FROM 'file:///gamers.csv' AS row +CREATE (g:Gamer { + Gamer_ID: row.Gamer_ID, + First_Name: row.First_Name, + Last_Name: row.Last_Name, + Email: row.Email, + Phone: row.Phone, + City: row.City, + Country: row.Country +}); + +// Create nodes for Games +LOAD CSV WITH HEADERS FROM 'file:///games.csv' AS row +CREATE (ga:Game { + Game_ID: row.Game_ID, + Game_Name: row.Game_Name, + Category: row.Category, + Price: row.Price +}); + +// Create nodes for Purchases +LOAD CSV WITH HEADERS FROM 'file:///purchases.csv' AS row +CREATE (p:Purchase { + Purchase_ID: row.Purchase_ID, + Gamer_ID: row.Gamer_ID, + Game_ID: row.Game_ID, + Quantity: row.Quantity, + Purchase_Date: row.Purchase_Date +}); + +// Create relationships between Gamers and Purchases +LOAD CSV WITH HEADERS FROM 'file:///purchases.csv' AS row +MATCH (g:Gamer {Gamer_ID: row.Gamer_ID}) +MATCH (p:Purchase {Purchase_ID: row.Purchase_ID}) +MERGE (g)-[:PURCHASED]->(p); + +// Create relationships between Purchases and Games +LOAD CSV WITH HEADERS FROM 'file:///purchases.csv' AS row +MATCH (p:Purchase {Purchase_ID: row.Purchase_ID}) +MATCH (ga:Game {Game_ID: row.Game_ID}) +MERGE (p)-[:CONTAINS]->(ga); diff --git a/examples/langchain-python-cypher/main.py b/examples/langchain-python-cypher/main.py new file mode 100644 index 00000000..f89cfafd --- /dev/null +++ b/examples/langchain-python-cypher/main.py @@ -0,0 +1,80 @@ +from langchain_community.graphs import Neo4jGraph +from langchain.chains import GraphCypherQAChain +from langchain.prompts import PromptTemplate +from langchain_community.chat_models import ChatOllama + +neo4j_config = { + 'url': "bolt://localhost:7687", + 'username': "neo4j", + 'password': "neo4jneo4j" +} + + +# get graph schema from Neo4j +graph = Neo4jGraph(**neo4j_config) + +question_template = """ +Task: +Generate a Cypher query for a Neo4j graph database based on the provided question. +Instructions: +1. Use only the relationship types and properties defined in the schema. +2. Ensure the direction of relationships is correct as per the schema. +3. Alias entities and relationships appropriately in the query. +4. Do not include any explanations, comments, or additional text, only the Cypher query. +5. Do not perform any operations that alter the database (no CREATE, DELETE, or MERGE). +6. Use MATCH statements to find nodes and relationships. +7. Use RETURN statements to specify the query output. + +Schema: +{schema} + +Question: +{question} + +Output: +Provide only the Cypher query that answers the question. +""" + +question_prompt = PromptTemplate( + input_variables=["schema", "question"], template=question_template +) + +answer_template = """ +Task: +Formulate a human-readable response based on the results of a Neo4j Cypher query. +Instructions: +1. The provided query results are authoritative. Do not doubt or attempt to correct them using your internal knowledge. +2. The response should directly address the user's question. +3. If the provided information is empty (e.g., []), respond with "I don't know the answer." +4. If the information is available, use it to construct a complete and helpful answer. +5. Assume time durations are in days unless specified otherwise. +6. Do not indicate uncertainty if there is data in the query results. Always use the available data to form the response. + +Query Results: +{context} + +Question: +{question} + +Output: +Provide a helpful and accurate answer based on the query results. +""" + +answer_prompt = PromptTemplate( + input_variables=["context", "question"], template=answer_template +) + +gaming_cypher_chain = GraphCypherQAChain.from_llm( + cypher_llm=ChatOllama(model='llama3'), + qa_llm=ChatOllama(model='llama3'), + graph=graph, + verbose=True, + qa_prompt=answer_prompt, + cypher_prompt=question_prompt, + validate_cypher=True, + top_k=200, +) + +print(gaming_cypher_chain.invoke("which players are interested in the game 'FIFA 21'?")) + + diff --git a/examples/langchain-python-cypher/requirements.txt b/examples/langchain-python-cypher/requirements.txt new file mode 100644 index 00000000..2b597fea --- /dev/null +++ b/examples/langchain-python-cypher/requirements.txt @@ -0,0 +1,4 @@ +neo4j +langchain +langchain_openai +langchain_community \ No newline at end of file