diff --git a/+llms/+internal/callOllamaChatAPI.m b/+llms/+internal/callOllamaChatAPI.m index 0bad15f..ce81780 100644 --- a/+llms/+internal/callOllamaChatAPI.m +++ b/+llms/+internal/callOllamaChatAPI.m @@ -29,6 +29,7 @@ messages nvp.Temperature nvp.TopP + nvp.MinP nvp.TopK nvp.TailFreeSamplingZ nvp.StopSequences @@ -103,6 +104,7 @@ dict = dictionary(); dict("Temperature") = "temperature"; dict("TopP") = "top_p"; +dict("MinP") = "min_p"; dict("TopK") = "top_k"; dict("TailFreeSamplingZ") = "tfs_z"; dict("StopSequences") = "stop"; diff --git a/ollamaChat.m b/ollamaChat.m index 6d9e5a0..df736d9 100644 --- a/ollamaChat.m +++ b/ollamaChat.m @@ -23,6 +23,12 @@ % words can appear in any particular place. % This is also known as top-p sampling. % +% MinP - Minimum probability ratio for controlling the +% diversity of the output. Default value is 0; +% higher values imply that only the more likely +% words can appear in any particular place. +% This is also known as min-p sampling. +% % TopK - Maximum number of most likely tokens that are % considered for output. Default is Inf, allowing % all tokens. Smaller values reduce diversity in @@ -67,6 +73,7 @@ Model (1,1) string Endpoint (1,1) string TopK (1,1) {mustBeReal,mustBePositive} = Inf + MinP (1,1) {llms.utils.mustBeValidTopP} = 0 TailFreeSamplingZ (1,1) {mustBeReal} = 1 end @@ -77,6 +84,7 @@ systemPrompt {llms.utils.mustBeTextOrEmpty} = [] nvp.Temperature {llms.utils.mustBeValidTemperature} = 1 nvp.TopP {llms.utils.mustBeValidTopP} = 1 + nvp.MinP {llms.utils.mustBeValidTopP} = 0 nvp.TopK (1,1) {mustBeReal,mustBePositive} = Inf nvp.StopSequences {llms.utils.mustBeValidStop} = {} nvp.ResponseFormat (1,1) string {mustBeMember(nvp.ResponseFormat,["text","json"])} = "text" @@ -103,6 +111,7 @@ this.ResponseFormat = nvp.ResponseFormat; this.Temperature = nvp.Temperature; this.TopP = nvp.TopP; + this.MinP = nvp.MinP; this.TopK = nvp.TopK; this.TailFreeSamplingZ = nvp.TailFreeSamplingZ; this.StopSequences = nvp.StopSequences; @@ -146,7 +155,7 @@ [text, message, response] = llms.internal.callOllamaChatAPI(... this.Model, messagesStruct, ... Temperature=this.Temperature, ... - TopP=this.TopP, TopK=this.TopK,... + TopP=this.TopP, MinP=this.MinP, TopK=this.TopK,... TailFreeSamplingZ=this.TailFreeSamplingZ,... StopSequences=this.StopSequences, MaxNumTokens=nvp.MaxNumTokens, ... ResponseFormat=this.ResponseFormat,Seed=nvp.Seed, ... diff --git a/tests/tollamaChat.m b/tests/tollamaChat.m index 4320774..342e7df 100644 --- a/tests/tollamaChat.m +++ b/tests/tollamaChat.m @@ -50,7 +50,7 @@ function extremeTopK(testCase) %% This should work, and it does on some computers. On others, Ollama %% receives the parameter, but either Ollama or llama.cpp fails to %% honor it correctly. - testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably"); + % testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably"); % setting top-k to k=1 leaves no random choice, % so we expect to get a fixed response. @@ -61,11 +61,27 @@ function extremeTopK(testCase) testCase.verifyEqual(response1,response2); end + function extremeMinP(testCase) + %% This should work, and it does on some computers. On others, Ollama + %% receives the parameter, but either Ollama or llama.cpp fails to + %% honor it correctly. + % testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably"); + + % setting min-p to p=1 means only tokens with the same logit as + % the most likely one can be chosen, which will almost certainly + % only ever be one, so we expect to get a fixed response. + chat = ollamaChat("mistral",MinP=1); + prompt = "Min-p sampling with p=1 returns a definite answer."; + response1 = generate(chat,prompt); + response2 = generate(chat,prompt); + testCase.verifyEqual(response1,response2); + end + function extremeTfsZ(testCase) %% This should work, and it does on some computers. On others, Ollama %% receives the parameter, but either Ollama or llama.cpp fails to %% honor it correctly. - testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably"); + % testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably"); % setting tfs_z to z=0 leaves no random choice, but degrades to % greedy sampling, so we expect to get a fixed response. @@ -235,6 +251,16 @@ function queryModels(testCase) "Value", -20, ... "Error", "MATLAB:expectedNonnegative"), ... ... + "MinPTooLarge", struct( ... + "Property", "MinP", ... + "Value", 20, ... + "Error", "MATLAB:notLessEqual"), ... + ... + "MinPTooSmall", struct( ... + "Property", "MinP", ... + "Value", -20, ... + "Error", "MATLAB:expectedNonnegative"), ... + ... "WrongTypeStopSequences", struct( ... "Property", "StopSequences", ... "Value", 123, ... @@ -329,6 +355,14 @@ function queryModels(testCase) "Input",{{ "TopP" -20 }},... "Error","MATLAB:expectedNonnegative"),...I ... + "MinPTooLarge",struct( ... + "Input",{{ "MinP" 20 }},... + "Error","MATLAB:notLessEqual"),... + ... + "MinPTooSmall",struct( ... + "Input",{{ "MinP" -20 }},... + "Error","MATLAB:expectedNonnegative"),...I + ... "WrongTypeStopSequences",struct( ... "Input",{{ "StopSequences" 123}},... "Error","MATLAB:validators:mustBeNonzeroLengthText"),...