-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improving example text: summarization and streaming
- Loading branch information
Showing
4 changed files
with
266 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
%% Process Generated Text in Real Time by Using ChatGPT in Streaming Mode | ||
% This example shows how to process generated text in real time by using ChatGPT | ||
% in streaming mode. | ||
% | ||
% By default, when you pass a prompt to ChatGPT, it generates a response internally | ||
% and then outputs it in full at the end. To print out and format generated text | ||
% as the model is generating it, use the |StreamFun| name-value argument of the | ||
% |openAIChat| class. The streaming function is a custom function handle that | ||
% tells the model what to do with the output. | ||
% | ||
% The example includes two parts: | ||
%% | ||
% * First, define and use a custom streaming function to print out generated | ||
% text directly as the model generates it. | ||
% * Then, create an HTML UI Component and define and use a custom streaming | ||
% function to update the UI Component in real time as the model generates text. | ||
%% | ||
% To run this example, you need a valid API key from a paid OpenAI API account. | ||
|
||
loadenv(".env") | ||
addpath('..') | ||
%% Print Stream Directly to Screen | ||
% In this example, the streamed output is printed directly to the screen. | ||
% | ||
% Define the function to print the returned tokens. | ||
|
||
function printToken(token) | ||
fprintf("%s",token); | ||
end | ||
%% | ||
% Create the chat object with the defined function as a handle. | ||
|
||
chat = openAIChat(StreamFun=@printToken); | ||
%% | ||
% Generate response to a prompt in streaming mode. | ||
|
||
prompt = "What is Model-Based Design?"; | ||
generate(chat, prompt, MaxNumTokens=500); | ||
%% Print Stream to HTML UI Component | ||
% In this example, the streamed output is printed to the HTML component. | ||
% | ||
% Create the HTML UI component. | ||
|
||
fig = uifigure; | ||
h = uihtml(fig,Position=[50,10,450,400]); | ||
%% | ||
% Initialize the content of the HTML UI component. | ||
|
||
resetTable(h); | ||
%% | ||
% Create the chat object with the function handle, which requires the |uihtml| | ||
% object created earlier. | ||
|
||
chat = openAIChat(StreamFun=@(x)printStream(h,x)); | ||
%% | ||
% Add the user prompt to the table in the HTML UI component. | ||
|
||
userPrompt = "Tell me 5 jokes."; | ||
addChat(h,"user",userPrompt,"new") | ||
%% | ||
% Generate response to a prompt in streaming mode. | ||
|
||
[txt, message, response] = generate(chat,userPrompt); | ||
%% | ||
% Update the last row with the final output. This is necessary if further update | ||
% is needed to support additional HTML formatting. | ||
|
||
addChat(h,"assistant",txt,"current") | ||
%% Helper functions | ||
% |resetTable|: | ||
%% | ||
% # Adds the basic HTML structure and the JavaScript that process the data change | ||
% in MATLAB. | ||
% # The JavaScript gets a reference to the table and changed data and if the | ||
% 3rd element in the data is "new", adds a new row. | ||
% # It populates the new row with two cells and update the cells from the first | ||
% two elements of the data. | ||
% # The new row is then appended to the table. | ||
% # Otherwise, the JavaScript gets reference to the last cell of the last row | ||
% of the table, and update it with the 2nd element of the data. | ||
|
||
function resetTable(obj) | ||
%RESETTABLE initialize the HTML UI component in the input argument. | ||
mustBeA(obj,'matlab.ui.control.HTML') | ||
obj.HTMLSource = ['<html><body><table>' ... | ||
'<tr><th>Role</th><th>Content</th></tr></table><script>', ... | ||
'function setup(htmlComponent) {', ... | ||
'htmlComponent.addEventListener("DataChanged", function(event) {', ... | ||
'var table = document.querySelector("table");' ... | ||
'var changedData = htmlComponent.Data;', ... | ||
'if (changedData[2] == "new") {', ... | ||
'var newRow = document.createElement("tr");', ... | ||
'var cell1 = document.createElement("td");', ... | ||
'var cell2 = document.createElement("td");', ... | ||
'cell1.innerHTML = changedData[0];', ... | ||
'cell2.innerHTML = changedData[1];', ... | ||
'newRow.appendChild(cell1);', ... | ||
'newRow.appendChild(cell2);', ... | ||
'table.appendChild(newRow);', ... | ||
'} else { ', ... | ||
'var lastRow = table.rows[table.rows.length - 1];', ... | ||
'var lastCell = lastRow.cells[lastRow.cells.length - 1];', ... | ||
'lastCell.innerHTML = changedData[1];', ... | ||
'}});}</script></body></html>']; | ||
obj.Data = []; | ||
drawnow | ||
end | ||
%% | ||
% |addRow| adds a new row to the table in the HTML UI component | ||
|
||
function addChat(obj,role,content,row) | ||
%ADDCHAT adds a new row or updates the last row of the table | ||
mustBeA(obj,'matlab.ui.control.HTML') | ||
content = replace(content,newline,"<br>"); | ||
obj.Data = {role,content,row}; | ||
drawnow | ||
end | ||
%% | ||
% |printStream| is the streaming function and prints the stream in the table | ||
% in the HTML UI component | ||
|
||
function printStream(h,x) | ||
%PRINTSTREAM prints the stream in a new row in the table | ||
if strlength(x) == 0 | ||
% if the first token is 0 length, add a new row | ||
tokens = string(x); | ||
h.Data = {"assistant",tokens,"new"}; | ||
else | ||
% otherwise append the new token to the previous tokens | ||
% if the new token contains a line break, replace | ||
% it with <br> | ||
if contains(x,newline) | ||
x = replace(x,newline,"<br>"); | ||
end | ||
tokens = h.Data{2} + string(x); | ||
% update the existing row. | ||
h.Data = {"assistant",tokens,"current"}; | ||
end | ||
drawnow | ||
end | ||
%% | ||
% _Copyright 2024 The MathWorks, Inc._ |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
%% Summarize Large Documents Using ChatGPT and MATLAB | ||
% This example shows how to use ChatGPT to summarize documents that are too | ||
% large to be summarized at once. | ||
% | ||
% To summarize short documents using ChatGPT, you can pass the documents directly | ||
% as a prompt together with an instruction to summarize them. However, ChatGPT | ||
% can only process prompts of limited size. | ||
% | ||
% To summarize documents that are larger than this limit, split the documents | ||
% up into smaller documents. Summarize the smaller document chunks, then pass | ||
% all of the summaries to ChatGPT to generate one overall summary. | ||
%% | ||
% * This example includes four steps: | ||
% * Download the complete text of "Alice in Wonderland" by Lewis Carroll from | ||
% Project Gutenberg. | ||
% * Split the documents up into chunks of less than 3000 words. (Section title: | ||
% "Split Document Into Chunks") | ||
% * Use ChatGPT to create summaries of each chunk. ("Summarize Chunks") | ||
% * Then use ChatGPT to create a summary of all of the summaries. ("Summarize | ||
% Document") | ||
%% | ||
% To run this example, you need Text Analytics Toolbox™. | ||
% | ||
% To run this example, you need a valid API key from a paid OpenAI™ API account. | ||
|
||
loadenv(".env") | ||
addpath('..') | ||
%% Download Text Data | ||
% Download and read the content from Alice's Adventures in Wonderland by Lewis | ||
% Carroll from Project Gutenberg. | ||
% | ||
% First read the contents of the webpage. | ||
|
||
options = weboptions(Timeout=30); | ||
code = webread("https://www.gutenberg.org/files/11/11-h/11-h.htm", options); | ||
longText = extractHTMLText(string(code)); | ||
%% Split Document Into Chunks | ||
% Large language models have a limit in terms of how much text they can accept | ||
% as input, so if you try to summarize the complete book, you will likely get | ||
% an error. A workaround is splitting the book into chunks and summarize each | ||
% chunk individually. The chunk size is defined in |limitChunkWords|, which restricts | ||
% the numbers of words in a chunk. | ||
|
||
incrementalSummary = longText; | ||
limitChunkWords = 3000; | ||
chunks = createChunks(incrementalSummary, limitChunkWords); | ||
%% Summarize Chunks | ||
% Initialize a ChatGPT session with the role of summarizing text | ||
|
||
summarizer = openAIChat("You are a professional summarizer."); | ||
%% | ||
% Looping process to gradually summarize the text chunk by chunk, reducing the | ||
% chunk size with each iteration. | ||
|
||
numCalls = 0; | ||
while numel(chunks)>1 | ||
summarizedChunks = strings(size(chunks)); | ||
numCalls = numCalls + numel(chunks); | ||
%% | ||
% Add a limit to the number of calls, to ensure you are not making more calls | ||
% than what is expected. You can change this value to match what is needed for | ||
% your application. | ||
|
||
if numCalls > 20 | ||
error("Document is too long to be summarized.") | ||
end | ||
|
||
for i = 1:length(chunks) | ||
summarizedChunks(i) = generate(summarizer, "Summarize this content:" + newline + chunks(i)); | ||
end | ||
%% | ||
% Merge the summarized chunks to serve as the base for the next iteration. | ||
|
||
incrementalSummary = join(summarizedChunks); | ||
%% | ||
% Form new chunks with a reduced size for the subsequent iteration. | ||
|
||
chunks = createChunks(incrementalSummary, limitChunkWords); | ||
end | ||
%% Summarize Document | ||
% Compile the final summary by combining the summaries from all the chunks. | ||
|
||
fullSummary = generate(summarizer, "The following text is a combination of summaries. " + ... | ||
"Provide a cohese and coherent summary combining these smaller summaries, preserving as much information as possible:" + newline + incrementalSummary); | ||
wrapText(fullSummary) | ||
%% |createChunks| function | ||
% This function segments a long text into smaller parts of a predefined size | ||
% to facilitate easier summarization. It preserves the structure of sentences. | ||
% The |chunkSize| should be large enough to fit at least one sentence. | ||
|
||
function chunks = createChunks(text, chunkSize) | ||
% Tokenizing the input text for processing | ||
text = tokenizedDocument(text); | ||
|
||
% Splitting the tokenized text into individual sentences | ||
text = splitSentences(text); | ||
chunks = []; | ||
currentChunk = ""; | ||
currentChunkSize = 0; | ||
|
||
% Iterating through the sentences to aggregate them into chunks until the chunk | ||
% attains the predefined size, after which a new chunk is started | ||
for i=1:length(text) | ||
newChunkSize = currentChunkSize + doclength(text(i)); | ||
if newChunkSize < chunkSize | ||
currentChunkSize = currentChunkSize + doclength(text(i)); | ||
currentChunk = currentChunk + " " + joinWords(text(i)); | ||
else | ||
chunks = [chunks; currentChunk]; %#ok | ||
currentChunkSize = doclength(text(i)); | ||
currentChunk = joinWords(text(i)); | ||
end | ||
end | ||
end | ||
%% |wrapText| function | ||
% This function splits text into sentences and then concatenates them again | ||
% using |newline| to make it easier to visualize text in this example | ||
|
||
function wrappedText = wrapText(text) | ||
wrappedText = splitSentences(text); | ||
wrappedText = join(wrappedText,newline); | ||
end | ||
%% | ||
% _Copyright 2023 The MathWorks, Inc._ |
Binary file not shown.