PostHog · EDsCODE · Apr 7, 2023
diff --git a/ai.py b/ai.py
@@ -31,6 +31,44 @@ def update_oncalls():
 
 pipeline = MaxPipeline(openai_token=OPENAI_TOKEN)
 
+# TODO: don't repeat, consolidate with ai_chat_thread
+def ai_response(query, product_name):
+    result = pipeline.retrieve_context(query)
+    documents = result["documents"][0].content.replace('\n', '')
+
+    SYSTEM_PROMPT = f"""
+    You are the trusty {product_name} support AI named Max. You are also {product_name}'s Mascot!
+    Please continue the conversation in a way that is helpful to the user and also makes the user feel like they are talking to a human.
+    Only suggest using {product_name} products or services. Do not suggest products or services from other companies.
+    Please answer the question according to the following context from the {product_name} documentation.
+    If you are unsure of the answer, please say "I'm not sure" and encourage the user to ask the current Support Hero or team secondary on-call.
+    Try not to mention <@*> in the response.
+    Current oncalls: {oncalls}
+    """
+
+    CONTEXT_PROMPT = f""" 
+
+    Context:
+    {documents}
+
+    ---
+
+    Now answer the following question:
+
+    """
+
+    prompt = [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": CONTEXT_PROMPT + query},
+        ]
+    print(prompt)
+
+    completion = openai.ChatCompletion.create(
+        model=OPENAI_MODEL, messages=prompt
+    )
+
+    return completion.choices[0].message.content
+
 
 def ai_chat_thread(thread):
     result = pipeline.retrieve_context(thread[0]["content"])

diff --git a/pipeline.py b/pipeline.py
@@ -5,7 +5,7 @@
 from dotenv import load_dotenv
 from haystack import Document
 from haystack.document_stores.weaviate import WeaviateDocumentStore
-from haystack.nodes import EmbeddingRetriever, Shaper
+from haystack.nodes import EmbeddingRetriever, Shaper, Crawler, PreProcessor
 from haystack.pipelines import Pipeline
 
 load_dotenv()
@@ -113,6 +113,30 @@ def retrieve_context(self, query: str):
         result = pipeline.run(query=query, params={"Retriever": {"top_k": 10, "index": "ContextDocument"}}, debug=True)
 
         return result
+
+    def embed_from_url(self, urls: List[str]):
+        # UNTESTED atm
+        pipeline = Pipeline()
+
+        crawler = Crawler(
+          urls=urls,
+          crawler_depth=1,
+        )
+        preprocessor = PreProcessor(
+            clean_empty_lines=True,
+            clean_whitespace=True,
+            clean_header_footer=False,
+            split_by="passage",
+            split_length=500,
+            split_respect_sentence_boundary=False,
+        )
+
+        pipeline.add_node(component=crawler, name="crawler", inputs=['File'])
+        pipeline.add_node(preprocessor, name="preprocessor", inputs=["crawler"])
+        pipeline.add_node(component=self.document_store, name="document_store", inputs=['preprocessor'])
+
+        pipeline.run(params={"document_store": {"index": "ContextDocument"}})
+
 
 def split_markdown_sections(markdown_content):
     header_pattern = re.compile(r"(^#+\s+.*$)", re.MULTILINE)

diff --git a/scrape.py b/scrape.py
@@ -0,0 +1,19 @@
+from pipeline import MaxPipeline
+from dotenv import load_dotenv
+
+import os
+from ai import ai_response
+
+load_dotenv()
+
+pipeline = MaxPipeline(
+    openai_token=os.getenv("OPENAI_TOKEN")
+)
+
+# Scrape and embed
+# pipeline.embed_from_url(urls=["https://keajs.org/docs/intro/what-is-kea"])
+# pipeline.update_embeddings()
+
+
+# Example usage
+print(ai_response("What is a selector in kea?", "kea"))