Skip to content

Commit

Permalink
Improving example text: summarization and streaming
Browse files Browse the repository at this point in the history
  • Loading branch information
debymf committed Apr 13, 2024
1 parent cb4a442 commit 92700a0
Show file tree
Hide file tree
Showing 4 changed files with 266 additions and 0 deletions.
142 changes: 142 additions & 0 deletions examples/ExampleStreaming.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
%% Process Generated Text in Real Time by Using ChatGPT in Streaming Mode
% This example shows how to process generated text in real time by using ChatGPT
% in streaming mode.
%
% By default, when you pass a prompt to ChatGPT, it generates a response internally
% and then outputs it in full at the end. To print out and format generated text
% as the model is generating it, use the |StreamFun| name-value argument of the
% |openAIChat| class. The streaming function is a custom function handle that
% tells the model what to do with the output.
%
% The example includes two parts:
%%
% * First, define and use a custom streaming function to print out generated
% text directly as the model generates it.
% * Then, create an HTML UI Component and define and use a custom streaming
% function to update the UI Component in real time as the model generates text.
%%
% To run this example, you need a valid API key from a paid OpenAI API account.

loadenv(".env")
addpath('..')
%% Print Stream Directly to Screen
% In this example, the streamed output is printed directly to the screen.
%
% Define the function to print the returned tokens.

function printToken(token)
fprintf("%s",token);
end
%%
% Create the chat object with the defined function as a handle.

chat = openAIChat(StreamFun=@printToken);
%%
% Generate response to a prompt in streaming mode.

prompt = "What is Model-Based Design?";
generate(chat, prompt, MaxNumTokens=500);
%% Print Stream to HTML UI Component
% In this example, the streamed output is printed to the HTML component.
%
% Create the HTML UI component.

fig = uifigure;
h = uihtml(fig,Position=[50,10,450,400]);
%%
% Initialize the content of the HTML UI component.

resetTable(h);
%%
% Create the chat object with the function handle, which requires the |uihtml|
% object created earlier.

chat = openAIChat(StreamFun=@(x)printStream(h,x));
%%
% Add the user prompt to the table in the HTML UI component.

userPrompt = "Tell me 5 jokes.";
addChat(h,"user",userPrompt,"new")
%%
% Generate response to a prompt in streaming mode.

[txt, message, response] = generate(chat,userPrompt);
%%
% Update the last row with the final output. This is necessary if further update
% is needed to support additional HTML formatting.

addChat(h,"assistant",txt,"current")
%% Helper functions
% |resetTable|:
%%
% # Adds the basic HTML structure and the JavaScript that process the data change
% in MATLAB.
% # The JavaScript gets a reference to the table and changed data and if the
% 3rd element in the data is "new", adds a new row.
% # It populates the new row with two cells and update the cells from the first
% two elements of the data.
% # The new row is then appended to the table.
% # Otherwise, the JavaScript gets reference to the last cell of the last row
% of the table, and update it with the 2nd element of the data.

function resetTable(obj)
%RESETTABLE initialize the HTML UI component in the input argument.
mustBeA(obj,'matlab.ui.control.HTML')
obj.HTMLSource = ['<html><body><table>' ...
'<tr><th>Role</th><th>Content</th></tr></table><script>', ...
'function setup(htmlComponent) {', ...
'htmlComponent.addEventListener("DataChanged", function(event) {', ...
'var table = document.querySelector("table");' ...
'var changedData = htmlComponent.Data;', ...
'if (changedData[2] == "new") {', ...
'var newRow = document.createElement("tr");', ...
'var cell1 = document.createElement("td");', ...
'var cell2 = document.createElement("td");', ...
'cell1.innerHTML = changedData[0];', ...
'cell2.innerHTML = changedData[1];', ...
'newRow.appendChild(cell1);', ...
'newRow.appendChild(cell2);', ...
'table.appendChild(newRow);', ...
'} else { ', ...
'var lastRow = table.rows[table.rows.length - 1];', ...
'var lastCell = lastRow.cells[lastRow.cells.length - 1];', ...
'lastCell.innerHTML = changedData[1];', ...
'}});}</script></body></html>'];
obj.Data = [];
drawnow
end
%%
% |addRow| adds a new row to the table in the HTML UI component

function addChat(obj,role,content,row)
%ADDCHAT adds a new row or updates the last row of the table
mustBeA(obj,'matlab.ui.control.HTML')
content = replace(content,newline,"<br>");
obj.Data = {role,content,row};
drawnow
end
%%
% |printStream| is the streaming function and prints the stream in the table
% in the HTML UI component

function printStream(h,x)
%PRINTSTREAM prints the stream in a new row in the table
if strlength(x) == 0
% if the first token is 0 length, add a new row
tokens = string(x);
h.Data = {"assistant",tokens,"new"};
else
% otherwise append the new token to the previous tokens
% if the new token contains a line break, replace
% it with <br>
if contains(x,newline)
x = replace(x,newline,"<br>");
end
tokens = h.Data{2} + string(x);
% update the existing row.
h.Data = {"assistant",tokens,"current"};
end
drawnow
end
%%
% _Copyright 2024 The MathWorks, Inc._
Binary file modified examples/ExampleStreaming.mlx
Binary file not shown.
124 changes: 124 additions & 0 deletions examples/ExampleSummarization.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
%% Summarize Large Documents Using ChatGPT and MATLAB
% This example shows how to use ChatGPT to summarize documents that are too
% large to be summarized at once.
%
% To summarize short documents using ChatGPT, you can pass the documents directly
% as a prompt together with an instruction to summarize them. However, ChatGPT
% can only process prompts of limited size.
%
% To summarize documents that are larger than this limit, split the documents
% up into smaller documents. Summarize the smaller document chunks, then pass
% all of the summaries to ChatGPT to generate one overall summary.
%%
% * This example includes four steps:
% * Download the complete text of "Alice in Wonderland" by Lewis Carroll from
% Project Gutenberg.
% * Split the documents up into chunks of less than 3000 words. (Section title:
% "Split Document Into Chunks")
% * Use ChatGPT to create summaries of each chunk. ("Summarize Chunks")
% * Then use ChatGPT to create a summary of all of the summaries. ("Summarize
% Document")
%%
% To run this example, you need Text Analytics Toolbox™.
%
% To run this example, you need a valid API key from a paid OpenAI™ API account.

loadenv(".env")
addpath('..')
%% Download Text Data
% Download and read the content from Alice's Adventures in Wonderland by Lewis
% Carroll from Project Gutenberg.
%
% First read the contents of the webpage.

options = weboptions(Timeout=30);
code = webread("https://www.gutenberg.org/files/11/11-h/11-h.htm", options);
longText = extractHTMLText(string(code));
%% Split Document Into Chunks
% Large language models have a limit in terms of how much text they can accept
% as input, so if you try to summarize the complete book, you will likely get
% an error. A workaround is splitting the book into chunks and summarize each
% chunk individually. The chunk size is defined in |limitChunkWords|, which restricts
% the numbers of words in a chunk.

incrementalSummary = longText;
limitChunkWords = 3000;
chunks = createChunks(incrementalSummary, limitChunkWords);
%% Summarize Chunks
% Initialize a ChatGPT session with the role of summarizing text

summarizer = openAIChat("You are a professional summarizer.");
%%
% Looping process to gradually summarize the text chunk by chunk, reducing the
% chunk size with each iteration.

numCalls = 0;
while numel(chunks)>1
summarizedChunks = strings(size(chunks));
numCalls = numCalls + numel(chunks);
%%
% Add a limit to the number of calls, to ensure you are not making more calls
% than what is expected. You can change this value to match what is needed for
% your application.

if numCalls > 20
error("Document is too long to be summarized.")
end

for i = 1:length(chunks)
summarizedChunks(i) = generate(summarizer, "Summarize this content:" + newline + chunks(i));
end
%%
% Merge the summarized chunks to serve as the base for the next iteration.

incrementalSummary = join(summarizedChunks);
%%
% Form new chunks with a reduced size for the subsequent iteration.

chunks = createChunks(incrementalSummary, limitChunkWords);
end
%% Summarize Document
% Compile the final summary by combining the summaries from all the chunks.

fullSummary = generate(summarizer, "The following text is a combination of summaries. " + ...
"Provide a cohese and coherent summary combining these smaller summaries, preserving as much information as possible:" + newline + incrementalSummary);
wrapText(fullSummary)
%% |createChunks| function
% This function segments a long text into smaller parts of a predefined size
% to facilitate easier summarization. It preserves the structure of sentences.
% The |chunkSize| should be large enough to fit at least one sentence.

function chunks = createChunks(text, chunkSize)
% Tokenizing the input text for processing
text = tokenizedDocument(text);

% Splitting the tokenized text into individual sentences
text = splitSentences(text);
chunks = [];
currentChunk = "";
currentChunkSize = 0;

% Iterating through the sentences to aggregate them into chunks until the chunk
% attains the predefined size, after which a new chunk is started
for i=1:length(text)
newChunkSize = currentChunkSize + doclength(text(i));
if newChunkSize < chunkSize
currentChunkSize = currentChunkSize + doclength(text(i));
currentChunk = currentChunk + " " + joinWords(text(i));
else
chunks = [chunks; currentChunk]; %#ok
currentChunkSize = doclength(text(i));
currentChunk = joinWords(text(i));
end
end
end
%% |wrapText| function
% This function splits text into sentences and then concatenates them again
% using |newline| to make it easier to visualize text in this example

function wrappedText = wrapText(text)
wrappedText = splitSentences(text);
wrappedText = join(wrappedText,newline);
end
%%
% _Copyright 2023 The MathWorks, Inc._
Binary file modified examples/ExampleSummarization.mlx
Binary file not shown.

0 comments on commit 92700a0

Please sign in to comment.