diff --git a/docs_loader.py b/docs_loader.py
index 41d292a..09f281b 100755
--- a/docs_loader.py
+++ b/docs_loader.py
@@ -35,7 +35,7 @@
 
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
-from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings
 
 # Load environment variables
 default_num_processes = os.getenv('DEFAULT_NUM_PROCESSES')
@@ -266,9 +266,6 @@ def main():
             else:
                 db.add_documents(batch_texts)
                 
-        if db is not None:
-            db.persist()
-
     print(f"Documents are ready! You can now run vaultChat.py to query your model with your private documents")
 
 
diff --git a/install.sh b/install.sh
index bcbc888..275c048 100755
--- a/install.sh
+++ b/install.sh
@@ -117,7 +117,7 @@ chmod +x docs_loader.py vaultChat.py
 # Final instructions for smart and good looking customers
 printf "\n >>> Installation was successful!\n"
 printf "\n >>> Run './docs_loader.py' to prepare your private data.\n"
-printf "\n >>> Important! Run './docs_loader.py' again every time you change documents in the $SOURCE_DIRECTORY directory.\n"
+printf "\n >>> Important! Run './docs_loader.py' again every time you change documents in your directory.\n"
 printf "\n >>> Run './vaultChat.py' to start the application after your private data store creation or update.\n"
 
 # That's all, folks
diff --git a/requirements.txt b/requirements.txt
index 26a13a7..5ecd1b0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,16 @@
-langchain==0.1.14
-gpt4all==2.4.0
-chromadb==0.4.24
-PyMuPDF==1.24.1
+langchain==0.2.6
+gpt4all==2.7.0
+chromadb==0.5.3
+PyMuPDF==1.24.7
 python-dotenv
 extract-msg==0.48.5
 tabulate==0.9.0
 pandoc
 pypandoc==1.13
-tqdm==4.66.2
-sentence_transformers==2.6.1
-langchain_community==0.0.31
+tqdm==4.66.4
+sentence_transformers==3.0.1
+langchain_community==0.2.6
+langchain-huggingface==0.0.3
 unstructured
 markdown
-
+psutil
diff --git a/vaultChat.py b/vaultChat.py
index 210b9a6..463acf4 100755
--- a/vaultChat.py
+++ b/vaultChat.py
@@ -6,11 +6,12 @@
 from dotenv import load_dotenv
 from chromadb.config import Settings
 from langchain.chains import RetrievalQA
-from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 from langchain_community.vectorstores import Chroma
 from langchain_community.llms import Ollama
 import logging
+from datetime import datetime
 
 # Load environment variables from .env file
 load_dotenv()
@@ -23,7 +24,6 @@
 ANONYMIZE_TELEMETRY = os.getenv('ANONYMIZE_TELEMETRY', 'True') == 'True'
 TARGET_SOURCE_CHUNKS = int(os.getenv('TARGET_SOURCE_CHUNKS', 5))
 
-
 # Define anonymize telemetry for Chroma DB
 client = chromadb.Client(Settings(anonymized_telemetry=ANONYMIZE_TELEMETRY))
 
@@ -54,45 +54,87 @@ def main():
 
 def interactive_qa(qa, args, history):
     # Usage instructions
-    print(f"\n\033[31;47m>>> Ready for private chat. Exit the session by typing 'exit' or '/bye'\033[0m")
-    """Run interactive question and answer session."""
+    print(f"\n\033[31;47m>>> Ready for private chat. Exit the session by typing 'exit' or '/bye'. Save the chat by typing '/save <summary_name>'.\033[0m")
+    """Run interactive question and answer sessions with your private data."""
     while True:
         query = input("\n\033[31;47m>>> Enter a question: \033[0m").strip().lower()  # Normalize the input to handle case-insensitivity
         
         if query in ["exit", "/bye"]:
             print("Exiting. Goodbye!")  
             break
+        elif query.startswith("/save"):
+            save_chat_history(history, query)
+            continue
         if not query:
             continue
 
         try:
             start = time.time()
-            result = qa.invoke(query)
+            for output in qa_invoke_streaming(qa, query, history):
+                print(output, end='', flush=True)
             end = time.time()
 
-            print_answer(result, query, args, start, end) 
+            print(f"\n >>> Processing time: {end - start:.2f} seconds")
         except Exception as e:
             logging.error(f"Error processing query: {e}")
 
-def print_answer(result, query, args, start, end):
-    """Prints the answer and relevant sources with colored output for the source document indicator."""
-    answer, docs = result['result'], [] if args.hide_source else result['source_documents']
-    print(f"\n\n> Question: {query}\n{answer}") 
-    # Color code for the source document line. You can change the color by modifying the ANSI code.
-    color_code = "\033[94m"  # Bright blue color
-    reset_code = "\033[0m"  # Resets the color to default
-    for document in docs:
-        # Apply color only to the line indicating the source document
-        source_line = f"{color_code}> {document.metadata['source']}{reset_code}"
-        print(f"\n{source_line}:\n{document.page_content}")
-    print(f"\n >>> Processing time: {end - start:.2f} seconds")
+def qa_invoke_streaming(qa, query, history):
+    """Invoke the QA system with streaming output."""
+    result = qa.invoke(query)
+    history.append(f"### Question: {query}\n{result['result']}\n")
+    
+    yield f"\n\n> Question: {query}\n{result['result']}"
+    
+    if 'source_documents' in result:
+        color_code = "\033[94m"  # Bright blue color
+        reset_code = "\033[0m"  # Resets the color to default
+        sources_text = ""
+        for document in result['source_documents']:
+            source_line = f"{color_code}> {document.metadata['source']}{reset_code}"
+            source_content = f"{source_line}:\n{document.page_content}"
+            sources_text += f"\n{source_content}"
+            yield f"\n{source_content}"
+        history[-1] += sources_text
+
+# def save_chat_history(history, query):
+#     """Save the chat history to a markdown file."""
+#     try:
+#         _, summary_name = query.split(maxsplit=1)
+#         timestamp = datetime.now().strftime("%y%m%d%H%M")
+#         file_name = f"{timestamp}_{summary_name.replace(' ', '_').lower()}.md"
+#         with open(file_name, 'w') as f:
+#             f.write("# Chat History\n\n")
+#             f.writelines(history)
+#         print(f"Chat history saved as {file_name}")
+#     except ValueError:
+#         print("Invalid command. Use /save <summary_name>")
+
+
+def save_chat_history(history, query):
+    """Save the chat history to a markdown file in the 'chats_history' folder."""
+    try:
+        # Create 'chats_history' folder if non-existent
+        os.makedirs("chats_history", exist_ok=True)
+
+        _, summary_name = query.split(maxsplit=1)
+        timestamp = datetime.now().strftime("%y%m%d%H%M")
+        file_name = f"{timestamp}_{summary_name.replace(' ', '_').lower()}.md"
+        file_path = os.path.join("chats_history", file_name)
+
+        with open(file_path, 'w') as f:
+            f.write("# Chat History\n\n")
+            f.writelines(history)
+        
+        print(f"Chat history saved as {file_path}")
+    except ValueError:
+        print("Invalid command. Use /save <summary_name>")
+
 
 def parse_arguments():
     """Parse command-line arguments."""
     parser = argparse.ArgumentParser(description='VaultChat: Ask questions about your documents via a LLM.')
     parser.add_argument("--hide-source", "-S", action='store_true', help='Disable printing of source documents used for answers.')
-    # Use --streaming flag to enable streaming StdOut callback
-    parser.add_argument("--streaming", action='store_true', help='Enable the streaming StdOut callback for LLMs.')
+    parser.add_argument("--streaming", action='store_true', help='Enable the streaming from LLMs.')
     return parser.parse_args()
 
 if __name__ == "__main__":