diff --git a/applications/holochat_local/Dockerfile b/applications/holochat_local/Dockerfile
index 46c1135a8..23187038a 100644
--- a/applications/holochat_local/Dockerfile
+++ b/applications/holochat_local/Dockerfile
@@ -23,6 +23,7 @@ WORKDIR /workspace
 COPY requirements.txt /tmp/requirements.txt
 RUN python3 -m pip install --no-cache-dir -r /tmp/requirements.txt
 
+# Clone Llama.cpp and checkout a stable commit
 RUN git clone https://github.com/ggerganov/llama.cpp.git \
     && cd llama.cpp \
     && git checkout cf9b08485c4c2d4d945c6e74fe20f273a38b6104 \
diff --git a/applications/holochat_local/Makefile b/applications/holochat_local/Makefile
index a28c110c4..1d723e238 100644
--- a/applications/holochat_local/Makefile
+++ b/applications/holochat_local/Makefile
@@ -13,13 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Target that runs all commands in order to run HoloChat
 .PHONY: run_holochat
 run_holochat: build_llamaCpp build_db download_llama start_holochat
 
+# Creates container used for HoloChat and compiles Llama.cpp
 .PHONY: build_llamaCpp
 build_llamaCpp:
 	docker build --ulimit memlock=-1 --ulimit stack=67108864 -t holochat .
 
+# Creates the vector database used by HoloChat
 .PHONY: build_db
 build_db:
 	mkdir -p holochat/embeddings
@@ -36,20 +39,29 @@ build_db:
 		holochat \
 		python3 build_holoscan_db.py \
 
+# Downloads the Llama-2 model used by HoloChat
 .PHONY: download_llama
 download_llama:
 	mkdir -p holochat/docs
 	wget -nc -P ./holochat/models https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf
 
+# Runs HoloChat inside the pytorch container
 .PHONY: start_holochat
 start_holochat:
 	docker run --rm -it \
 	-p 7860:7860 \
 	-p 8080:8080 \
 	--gpus all \
-	--ipc=host --ulimit memlock=-1 \
+	--ipc=host \
+	--ulimit memlock=-1 \
 	--ulimit stack=67108864 \
 	-v ./holochat:/holochat \
 	-w /holochat \
 	holochat \
-	bash -c "/workspace/llama.cpp/build/bin/server -m /holochat/models/phind-codellama-34b-v2.Q5_K_M.gguf --host 0.0.0.0 -ngl 1000 -c 4096 --alias llama_2 & python3 -u chatbot.py"
+	bash -c "/workspace/llama.cpp/build/bin/server \
+			 -m /holochat/models/phind-codellama-34b-v2.Q5_K_M.gguf \
+			 --host 0.0.0.0 \
+			 -ngl 1000 \
+			-c 4096 \
+			--alias llama_2 \
+			& python3 -u chatbot.py"
\ No newline at end of file
diff --git a/applications/holochat_local/README.md b/applications/holochat_local/README.md
index 15329f228..57a22ed85 100644
--- a/applications/holochat_local/README.md
+++ b/applications/holochat_local/README.md
@@ -1,6 +1,6 @@
 # HoloChat-local
 
-HoloChat-local is an AI-driven chatbot, built on top of a local Code Llama model running on IGX Orin. The chatbot leverages vector databases to generate human-like responses and write code.
+HoloChat-local is an AI-driven chatbot, built on top of a locally hosted Code Llama model which acts as developer's copilot in Holoscan development. The Code Llama model leverages a vector database comprised of the Holoscan SDK repository and user guide, enabling HoloChat to answer general questions about Holoscan, as well act as a Holoscan SDK coding assistant.
 <p align="center">
   <kbd style="border: 2px solid black;">
     <img src="holochat_demo.gif" alt="HoloChat Demo" />
diff --git a/applications/holochat_local/holochat/build_holoscan_db.py b/applications/holochat_local/holochat/build_holoscan_db.py
index 3cd5311a0..1e1353742 100644
--- a/applications/holochat_local/holochat/build_holoscan_db.py
+++ b/applications/holochat_local/holochat/build_holoscan_db.py
@@ -35,6 +35,8 @@
 def main():
     content_lists = {file_type: [] for file_type in file_types}
     total_files = 0
+
+    # Loop over each repo and create a Document for each file found
     for repo in repos:
         clone_repository(repo, "")
         for file_type in file_types:
@@ -53,6 +55,7 @@ def main():
                                 )
                             )
 
+    # Loop over the user guide and create a Document for each page
     content_lists[".pdf"] = []
     for doc in docs:
         loader = PyPDFLoader(doc)
@@ -77,6 +80,7 @@ def main():
                 Document(page_content=page_content, metadata={"userguide": doc})
             )
 
+    # Dictionary used to map file type to language
     ext_to_language = {
         ".py": "python",
         ".cpp": "cpp",
@@ -95,6 +99,7 @@ def main():
     model_kwargs = {"device": "cuda"}
     encode_kwargs = {"normalize_embeddings": True}  # set True to compute cosine similarity
 
+    # Create local embedding model cached at ./models
     embedding_model = HuggingFaceBgeEmbeddings(
         model_name=model_name,
         model_kwargs=model_kwargs,
diff --git a/applications/holochat_local/holochat/chatbot.py b/applications/holochat_local/holochat/chatbot.py
index 7f52ef39c..c81b71a9f 100644
--- a/applications/holochat_local/holochat/chatbot.py
+++ b/applications/holochat_local/holochat/chatbot.py
@@ -13,6 +13,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Simple Gradio Chatbot app, for details visit:
+# https://www.gradio.app/guides/creating-a-custom-chatbot-with-blocks
+
 import gradio as gr
 import sklearn
 from llm import LLM
@@ -21,10 +24,6 @@
 
 initial_prompt = "Welcome to HoloChat! How can I assist you today?"
 
-python_prompt = """Create a Python Holoscan 'hello world' app with video
- as input, use HoloViz to print 'Hello World' on each frame, and then output
- it to the user. After the code explain the process step-by-step."""
-
 
 def ask_question(message, chat_history):
     if chat_history is None:
@@ -39,9 +38,7 @@ def ask_question(message, chat_history):
 def stream_response(chat_history, llm):
     if llm is None:
         llm = LLM()
-
     response = llm.answer_question(chat_history)
-
     for chunk in response:
         yield chunk, llm
 
@@ -101,7 +98,12 @@ def main():
                         ["What operating system can I use with the Holoscan SDK?"],
                         ["What hardware does Holoscan support?"],
                         ["How do I create a C++ Holoscan Operator?"],
-                        [python_prompt],
+                        [
+                            "Create a Python Holoscan 'hello world' app with video "
+                            "as input, use HoloViz to print 'Hello World' on each frame, "
+                            "and then output it to the user. After the code explain the "
+                            "process step-by-step."
+                        ],
                     ],
                     inputs=tbInput,
                 )
diff --git a/applications/holochat_local/holochat/llm.py b/applications/holochat_local/holochat/llm.py
index 9da65e930..9602a902e 100644
--- a/applications/holochat_local/holochat/llm.py
+++ b/applications/holochat_local/holochat/llm.py
@@ -22,25 +22,27 @@
 from langchain.embeddings import HuggingFaceBgeEmbeddings
 from langchain.vectorstores import Chroma
 
-# Llama-2 has context length of 4096 token
-# 1 token = ~4 characters, so 3500 * 4 provides plenty of room.
+# Most Llama-2 models are trained with a context length of 4096 tokens
+# 1 token = ~4 characters, so 3300 * 4 provides plenty of room.
 MAX_TOKENS = 3300 * 4
-# Empirically found to be the cutoff of specific questions vs. generic comments about previous answer
+# Empirically found to be the cutoff of a specific questions vs. generic comments about previous answer
+# This ensures no documents are returned for comments such as "Rewrite that code in one block"
 SEARCH_THRESHOLD = 0.35
 NUM_HOLOSCAN_DOCS = 7
 LLAMA_SERVER = "http://127.0.0.1:8080"
-SERVER_TIMEOUT = 60  # seconds
+SERVER_TIMEOUT = 60  # Timeout in seconds to connect to llama.cpp
 
-system_prompt = """You are NVIDIA-GPT, an expert at all things NVIDIA who knows
- the Holoscan user guide, as well as examples from Holohub and the api from the SDK.
- You are an assistant who answers questions step-by-step and always provides your
- reasoning so you have the correct result. Answer the questions based on the provided
- context and augment with your general knowledge where appropriate. Reformat the provided
- code examples as necessary since they were retrieved with a web scrape. 
- Under no circumstances will you make up Holoscan API functions or functionality that does not exist!
- Do not conflate Holoscan Python API with Holoscan C++ API. You ALWAYS end your response with '</s>'.
- Below is NVIDIA Holoscan SDK documentation to assist you in answering questions:
-"""
+system_prompt = (
+    "You are NVIDIA-GPT, an expert at all things NVIDIA who knows "
+    "the Holoscan user guide, as well as examples from Holohub and the api from the SDK. "
+    "You are an assistant who answers questions step-by-step and always provides your "
+    "reasoning so you have the correct result. Answer the questions based on the provided "
+    "context and augment with your general knowledge where appropriate. Reformat the provided "
+    "code examples as necessary since they were retrieved with a web scrape. "
+    "Under no circumstances will you make up Holoscan API functions or functionality that does not "
+    "exist! Do not conflate Holoscan Python API with Holoscan C++ API. You ALWAYS end your response "
+    "with '</s>'. Below is NVIDIA Holoscan SDK documentation to assist you in answering questions:"
+)
 
 
 class LLM:
@@ -54,7 +56,7 @@ def answer_question(self, chat_history):
         docs = self.db.similarity_search_with_score(
             query=question, k=NUM_HOLOSCAN_DOCS, distance_metric="cos"
         )
-        # Filter out poor matches
+        # Filter out poor matches from vector db
         docs = list(
             map(lambda lc_doc: lc_doc[0], filter(lambda lc_doc: lc_doc[1] < SEARCH_THRESHOLD, docs))
         )
@@ -65,6 +67,7 @@ def answer_question(self, chat_history):
             ]  # Get first docs (highest similarity score)
         self.prev_docs = docs  # Save document list
 
+        # Create a prompt to send to the llm (Remove greeting and question)
         llama_prompt = _to_llama_prompt(chat_history[1:-1], question, docs)
         response = self._stream_ai_response(llama_prompt, chat_history)
 
@@ -72,6 +75,7 @@ def answer_question(self, chat_history):
             yield chunk
 
     def _stream_ai_response(self, llama_prompt, chat_history):
+        # Llama-specific request data
         request_data = {
             "prompt": llama_prompt,
             "temperature": 0,
@@ -99,33 +103,49 @@ def _get_database(self):
         model_kwargs = {"device": "cuda"}
         encode_kwargs = {"normalize_embeddings": True}  # set True to compute cosine similarity
 
+        # Construct embedding model and cache to local './models' dir
         embedding_model = HuggingFaceBgeEmbeddings(
             model_name=model_name,
             model_kwargs=model_kwargs,
             encode_kwargs=encode_kwargs,
             cache_folder="./models",
         )
-        # Use past two questions to get docs
         chroma_db = Chroma(persist_directory=CHROMA_DB_PATH, embedding_function=embedding_model)
 
         return chroma_db
 
 
 def _to_llama_prompt(history, question, docs):
+    """
+    Function that takes the chat history, current question, and the documents
+    from the vector db and creates a single string to prompt the Llama model with
+    """
+
+    # Phind v2's prompt prefixes (Note these are dependent on the model used)
     user_prefix = "### User Message:"
     bot_prefix = "### Assistant:"
     bot_rule_prefix = "### System Prompt:"
 
-    opening_prompt = f"""Below is a chat between a user '{user_prefix}', and you, the AI
- assistant '{bot_prefix}'. You follow the given rule '{bot_rule_prefix}' no matter what."""
+    # Explain the context of the information being provided
+    opening_prompt = (
+        f"Below is a chat between a user '{user_prefix}', and you, "
+        "the AI assistant '{bot_prefix}'. You follow the given rule "
+        "'{bot_rule_prefix}' no matter what."
+    )
 
+    # Combine all the vector db docs into a single string
     docs = "\n\n".join(list(map(lambda lc_doc: lc_doc.page_content, docs)))
+    # Add the system prompt with the vector db docs
     opening_prompt += f"\n\n{bot_rule_prefix}\n{system_prompt}\n\n{docs}"
-    ending_prompt = f"""\n\n{user_prefix}\nUsing the previous conversation history,
- the provided NVIDIA Holoscan SDK documentation, AND your own expert knowledge, answer
- the following question (include markdown code snippets for coding questions and do not acknowledge
- that documentation was provided to you):\n{question}"""
-
+    # Define the final portion of the prompt
+    ending_prompt = (
+        f"\n\n{user_prefix}\nUsing the previous conversation history, "
+        "the provided NVIDIA Holoscan SDK documentation, AND your own expert knowledge, answer "
+        "the following question (include markdown code snippets for coding questions and do not "
+        f"acknowledge that documentation was provided to you):\n{question}"
+    )
+
+    # Loop over the chat history and convert it to a single string
     msg_hist = ""
     for msg_pair in history:
         if msg_pair[0]:
@@ -135,23 +155,30 @@ def _to_llama_prompt(history, question, docs):
 
     len_prompt = len(msg_hist) + len(opening_prompt) + len(ending_prompt)
 
-    # Remove previous conversation history if MAX_TOKENS exceeded
+    # Truncate previous conversation history if MAX_TOKENS exceeded
     if len_prompt > MAX_TOKENS:
         excess_tokens = len_prompt - MAX_TOKENS
         msg_hist = msg_hist[excess_tokens:]
         last_msg_idx = msg_hist.find("\n\n" + user_prefix)
         bot_idx = msg_hist.find("\n\n" + bot_prefix)
+        # Truncate to the last user or bot message, which ever allows for a
+        # longer chat history
         if bot_idx < last_msg_idx:
             last_msg_idx = bot_idx
         msg_hist = msg_hist[last_msg_idx:]
 
+    # Create the final prompt
     prompt = opening_prompt + msg_hist + ending_prompt + f"\n\n{bot_prefix}\n"
     return prompt
 
 
 def _wait_for_server():
+    """
+    Method that attempts to connect to the llama.cpp server
+    for up to SERVER_TIMEOUT until throwing an exception
+    """
     attempts = 0
-    while attempts < SEARCH_THRESHOLD / 5:
+    while attempts < SERVER_TIMEOUT / 5:
         try:
             response = requests.get(LLAMA_SERVER)
             # Check for a successful response status code (e.g., 200 OK)
diff --git a/applications/holochat_local/holochat/utils.py b/applications/holochat_local/holochat/utils.py
index e6e4cd2b6..912113911 100644
--- a/applications/holochat_local/holochat/utils.py
+++ b/applications/holochat_local/holochat/utils.py
@@ -25,6 +25,9 @@
 
 
 def clone_repository(repo, token):
+    """
+    Used to clone nvidia-holoscan repos
+    """
     print(f"Cloning repository: {repo}")
     time.sleep(1)
     try:
@@ -39,6 +42,9 @@ def clone_repository(repo, token):
 
 
 def clone_general_repository(repo, token):
+    """
+    Used to clone general repos
+    """
     print(f"Cloning repository: {repo}")
     time.sleep(1)
     try:
@@ -81,8 +87,12 @@ def get_files(files, type):
     return contents
 
 
-# langchain method can't handle 'disallowed_special' - use tiktoken for now
 def get_source_chunks(all_contents, file_type=None, chunk_size=1500, chunk_overlap=150):
+    """
+    Method that splits Documents into chunks for storage. If the language is supported,
+    it is split according to the syntax of that language (Ex: not splitting python
+    functions in the middle)
+    """
     if file_type in ["python", "cpp", "markdown"]:
         splitter = RecursiveCharacterTextSplitter.from_language(
             language=file_type, chunk_size=chunk_size, chunk_overlap=chunk_overlap