Skip to content

Commit

Permalink
add TailFreeSampling_Z, add comment about currently unsupported ollam…
Browse files Browse the repository at this point in the history
…a settings
  • Loading branch information
ccreutzi committed Jun 5, 2024
1 parent 8d351a2 commit e229935
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 25 deletions.
2 changes: 2 additions & 0 deletions +llms/+internal/callOllamaChatAPI.m
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
nvp.Temperature = 1
nvp.TopProbabilityMass = 1
nvp.TopProbabilityNum = Inf
nvp.TailFreeSamplingZ = 1
nvp.NumCompletions = 1
nvp.StopSequences = []
nvp.MaxNumTokens = inf
Expand Down Expand Up @@ -116,6 +117,7 @@
dict("Temperature") = "temperature";
dict("TopProbabilityMass") = "top_p";
dict("TopProbabilityNum") = "top_k";
dict("TailFreeSamplingZ") = "tfs_z";
dict("NumCompletions") = "n";
dict("StopSequences") = "stop";
dict("MaxNumTokens") = "num_predict";
Expand Down
37 changes: 12 additions & 25 deletions ollamaChat.m
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,8 @@
% ResponseFormat - The format of response the model returns.
% "text" (default) | "json"
%
% Mirostat - 0/1/2, eta, tau
%
% RepeatLastN - find a better name! “Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)”
%
% RepeatPenalty
%
% TailFreeSamplingZ
% TailFreeSamplingZ - Reduce the use of less probable tokens, based on
% the second-order differences of ordered probabilities.
%
% StreamFun - Function to callback when streaming the
% result
Expand All @@ -50,29 +45,22 @@
% ollamaChat - Chat completion API from OpenAI.
% generate - Generate a response using the ollamaChat instance.
%
% ollamaChat Properties:
% ollamaChat Properties, in addition to the name-value pairs above:
% Model - Model name (as expected by ollama server)
%
% Temperature - Temperature of generation.
%
% TopProbabilityMass - Top probability mass to consider for generation (top-p sampling).
%
% TopProbabilityNum - Only consider the k most likely tokens for generation (top-k sampling).
%
% StopSequences - Sequences to stop the generation of tokens.
%
% SystemPrompt - System prompt.
%
% ResponseFormat - Specifies the response format, text or json
%
% TimeOut - Connection Timeout in seconds (default: 120 secs)
%

% Ollama model properties not exposed:
% repeat_last_n, repeat_penalty - could not find an example where they made a difference
% mirostat, mirostat_eta, mirostat_tau - looking for the best API design


% Copyright 2024 The MathWorks, Inc.

properties
Model (1,1) string
TopProbabilityNum (1,1) {mustBeReal,mustBePositive} = Inf
TailFreeSamplingZ (1,1) {mustBeReal} = 1
end

methods
Expand All @@ -86,6 +74,7 @@
nvp.StopSequences {llms.utils.mustBeValidStop} = {}
nvp.ResponseFormat (1,1) string {mustBeMember(nvp.ResponseFormat,["text","json"])} = "text"
nvp.TimeOut (1,1) {mustBeReal,mustBePositive} = 120
nvp.TailFreeSamplingZ (1,1) {mustBeReal} = 1
nvp.StreamFun (1,1) {mustBeA(nvp.StreamFun,'function_handle')}
end

Expand All @@ -107,6 +96,7 @@
this.Temperature = nvp.Temperature;
this.TopProbabilityMass = nvp.TopProbabilityMass;
this.TopProbabilityNum = nvp.TopProbabilityNum;
this.TailFreeSamplingZ = nvp.TailFreeSamplingZ;
this.StopSequences = nvp.StopSequences;
this.TimeOut = nvp.TimeOut;
end
Expand All @@ -131,10 +121,6 @@
%
% Seed - An integer value to use to obtain
% reproducible responses
%
% Currently, GPT-4 Turbo with vision does not support the message.name
% parameter, functions/tools, response_format parameter, stop
% sequences, and max_tokens

arguments
this (1,1) ollamaChat
Expand All @@ -158,6 +144,7 @@
this.Model, messagesStruct, ...
Temperature=this.Temperature, ...
TopProbabilityMass=this.TopProbabilityMass, TopProbabilityNum=this.TopProbabilityNum,...
TailFreeSamplingZ=this.TailFreeSamplingZ,...
NumCompletions=nvp.NumCompletions,...
StopSequences=this.StopSequences, MaxNumTokens=nvp.MaxNumTokens, ...
ResponseFormat=this.ResponseFormat,Seed=nvp.Seed, ...
Expand Down
10 changes: 10 additions & 0 deletions tests/tollamaChat.m
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ function extremeTopK(testCase)
testCase.verifyEqual(response1,response2);
end

function extremeTfsZ(testCase)
% setting tfs_z to z=0 leaves no random choice,
% so we expect to get a fixed response.
chat = ollamaChat("mistral",TailFreeSamplingZ=0);
prompt = "Sampling with tfs_z=0 returns a definite answer.";
response1 = generate(chat,prompt);
response2 = generate(chat,prompt);
testCase.verifyEqual(response1,response2);
end

function stopSequences(testCase)
chat = ollamaChat("mistral",TopProbabilityNum=1);
prompt = "Top-k sampling with k=1 returns a definite answer.";
Expand Down

0 comments on commit e229935

Please sign in to comment.