Skip to content

Commit

Permalink
feat: Make suggest next questions configurable (#275)
Browse files Browse the repository at this point in the history
---------
Co-authored-by: Marcus Schiesser <[email protected]>
  • Loading branch information
leehuwuj authored Sep 9, 2024
1 parent c16deed commit 8105c5c
Show file tree
Hide file tree
Showing 12 changed files with 182 additions and 320 deletions.
5 changes: 5 additions & 0 deletions .changeset/cyan-buttons-clean.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Add env config for next questions feature
49 changes: 23 additions & 26 deletions helpers/env-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -487,33 +487,30 @@ It\\'s cute animal.
};

const getTemplateEnvs = (template?: TemplateType): EnvVar[] => {
if (template === "multiagent") {
return [
{
name: "MESSAGE_QUEUE_PORT",
},
{
name: "CONTROL_PLANE_PORT",
},
{
name: "HUMAN_CONSUMER_PORT",
},
{
name: "AGENT_QUERY_ENGINE_PORT",
value: "8003",
},
{
name: "AGENT_QUERY_ENGINE_DESCRIPTION",
value: "Query information from the provided data",
},
{
name: "AGENT_DUMMY_PORT",
value: "8004",
},
];
} else {
return [];
const nextQuestionEnvs: EnvVar[] = [
{
name: "NEXT_QUESTION_PROMPT",
description: `Customize prompt to generate the next question suggestions based on the conversation history.
Disable this prompt to disable the next question suggestions feature.`,
value: `"You're a helpful assistant! Your task is to suggest the next question that user might ask.
Here is the conversation history
---------------------
{conversation}
---------------------
Given the conversation history, please give me 3 questions that you might ask next!
Your answer should be wrapped in three sticks which follows the following format:
\`\`\`
<question 1>
<question 2>
<question 3>
\`\`\`"`,
},
];

if (template === "multiagent" || template === "streaming") {
return nextQuestionEnvs;
}
return [];
};

const getObservabilityEnvs = (
Expand Down
7 changes: 7 additions & 0 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,13 @@ export const installPythonTemplate = async ({
cwd: path.join(compPath, "settings", "python"),
});

// Copy services
if (template == "streaming" || template == "multiagent") {
await copy("**", path.join(root, "app", "api", "services"), {
cwd: path.join(compPath, "services", "python"),
});
}

if (template === "streaming") {
// For the streaming template only:
// Select and copy engine code based on data sources and tools
Expand Down
28 changes: 8 additions & 20 deletions templates/components/llamaindex/typescript/streaming/suggestion.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,20 @@
import { ChatMessage, Settings } from "llamaindex";

const NEXT_QUESTION_PROMPT_TEMPLATE = `You're a helpful assistant! Your task is to suggest the next question that user might ask.
Here is the conversation history
---------------------
$conversation
---------------------
Given the conversation history, please give me $number_of_questions questions that you might ask next!
Your answer should be wrapped in three sticks which follows the following format:
\`\`\`
<question 1>
<question 2>\`\`\`
`;
const N_QUESTIONS_TO_GENERATE = 3;

export async function generateNextQuestions(
conversation: ChatMessage[],
numberOfQuestions: number = N_QUESTIONS_TO_GENERATE,
) {
export async function generateNextQuestions(conversation: ChatMessage[]) {
const llm = Settings.llm;
const NEXT_QUESTION_PROMPT = process.env.NEXT_QUESTION_PROMPT;
if (!NEXT_QUESTION_PROMPT) {
return [];
}

// Format conversation
const conversationText = conversation
.map((message) => `${message.role}: ${message.content}`)
.join("\n");
const message = NEXT_QUESTION_PROMPT_TEMPLATE.replace(
"$conversation",
const message = NEXT_QUESTION_PROMPT.replace(
"{conversation}",
conversationText,
).replace("$number_of_questions", numberOfQuestions.toString());
);

try {
const response = await llm.complete({ prompt: message });
Expand Down
78 changes: 78 additions & 0 deletions templates/components/services/python/suggestion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import logging
import os
import re
from typing import List, Optional

from app.api.routers.models import Message
from llama_index.core.prompts import PromptTemplate
from llama_index.core.settings import Settings

logger = logging.getLogger("uvicorn")


class NextQuestionSuggestion:
"""
Suggest the next questions that user might ask based on the conversation history
Disable this feature by removing the NEXT_QUESTION_PROMPT environment variable
"""

@classmethod
def get_configured_prompt(cls) -> Optional[str]:
prompt = os.getenv("NEXT_QUESTION_PROMPT", None)
if not prompt:
return None
return PromptTemplate(prompt)

@classmethod
async def suggest_next_questions_all_messages(
cls,
messages: List[Message],
) -> Optional[List[str]]:
"""
Suggest the next questions that user might ask based on the conversation history
Return None if suggestion is disabled or there is an error
"""
prompt_template = cls.get_configured_prompt()
if not prompt_template:
return None

try:
# Reduce the cost by only using the last two messages
last_user_message = None
last_assistant_message = None
for message in reversed(messages):
if message.role == "user":
last_user_message = f"User: {message.content}"
elif message.role == "assistant":
last_assistant_message = f"Assistant: {message.content}"
if last_user_message and last_assistant_message:
break
conversation: str = f"{last_user_message}\n{last_assistant_message}"

# Call the LLM and parse questions from the output
prompt = prompt_template.format(conversation=conversation)
output = await Settings.llm.acomplete(prompt)
questions = cls._extract_questions(output.text)

return questions
except Exception as e:
logger.error(f"Error when generating next question: {e}")
return None

@classmethod
def _extract_questions(cls, text: str) -> List[str]:
content_match = re.search(r"```(.*?)```", text, re.DOTALL)
content = content_match.group(1) if content_match else ""
return content.strip().split("\n")

@classmethod
async def suggest_next_questions(
cls,
chat_history: List[Message],
response: str,
) -> List[str]:
"""
Suggest the next questions that user might ask based on the chat history and the last response
"""
messages = chat_history + [Message(role="assistant", content=response)]
return await cls.suggest_next_questions_all_messages(messages)
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from asyncio import Task
import json
import logging
from typing import AsyncGenerator
from asyncio import Task
from typing import AsyncGenerator, List

from aiostream import stream
from app.agents.single import AgentRunEvent, AgentRunResult
from app.api.routers.models import ChatData, Message
from app.api.services.suggestion import NextQuestionSuggestion
from fastapi import Request
from fastapi.responses import StreamingResponse

from app.api.routers.models import ChatData
from app.agents.single import AgentRunEvent, AgentRunResult

logger = logging.getLogger("uvicorn")


Expand Down Expand Up @@ -57,26 +57,35 @@ async def content_generator(
# Yield the text response
async def _chat_response_generator():
result = await task
final_response = ""

if isinstance(result, AgentRunResult):
for token in result.response.message.content:
yield VercelStreamResponse.convert_text(token)
final_response += token
yield cls.convert_text(token)

if isinstance(result, AsyncGenerator):
async for token in result:
yield VercelStreamResponse.convert_text(token.delta)
final_response += token.delta
yield cls.convert_text(token.delta)

# Generate next questions if next question prompt is configured
question_data = await cls._generate_next_questions(
chat_data.messages, final_response
)
if question_data:
yield cls.convert_data(question_data)

# TODO: stream NextQuestionSuggestion
# TODO: stream sources

# Yield the events from the event handler
async def _event_generator():
async for event in events():
event_response = _event_to_response(event)
event_response = cls._event_to_response(event)
if verbose:
logger.debug(event_response)
if event_response is not None:
yield VercelStreamResponse.convert_data(event_response)
yield cls.convert_data(event_response)

combine = stream.merge(_chat_response_generator(), _event_generator())

Expand All @@ -85,16 +94,28 @@ async def _event_generator():
if not is_stream_started:
is_stream_started = True
# Stream a blank message to start the stream
yield VercelStreamResponse.convert_text("")
yield cls.convert_text("")

async for output in streamer:
yield output
if await request.is_disconnected():
break


def _event_to_response(event: AgentRunEvent) -> dict:
return {
"type": "agent",
"data": {"agent": event.name, "text": event.msg},
}
@staticmethod
def _event_to_response(event: AgentRunEvent) -> dict:
return {
"type": "agent",
"data": {"agent": event.name, "text": event.msg},
}

@staticmethod
async def _generate_next_questions(chat_history: List[Message], response: str):
questions = await NextQuestionSuggestion.suggest_next_questions(
chat_history, response
)
if questions:
return {
"type": "suggested_questions",
"data": questions,
}
return None
60 changes: 0 additions & 60 deletions templates/types/multiagent/fastapi/app/api/services/suggestion.py

This file was deleted.

Loading

0 comments on commit 8105c5c

Please sign in to comment.