From 83e72f51c9d223db4c11df62268085084e920275 Mon Sep 17 00:00:00 2001 From: Abirdcfly Date: Mon, 29 Jan 2024 18:37:46 +0800 Subject: [PATCH] feat: bingsearch add more config Signed-off-by: Abirdcfly --- apiserver/pkg/application/application.go | 7 + pkg/appruntime/chain/common.go | 8 +- pkg/appruntime/tools/bingsearch/bing.go | 21 ++- pkg/appruntime/tools/bingsearch/bing_test.go | 27 +++- pkg/appruntime/tools/bingsearch/client.go | 147 ++++++++++++++++--- pkg/appruntime/tools/bingsearch/doc.go | 26 ++++ pkg/appruntime/tools/bingsearch/resp.go | 51 ++++++- 7 files changed, 248 insertions(+), 39 deletions(-) create mode 100644 pkg/appruntime/tools/bingsearch/doc.go diff --git a/apiserver/pkg/application/application.go b/apiserver/pkg/application/application.go index 43e839a30..f66c11d81 100644 --- a/apiserver/pkg/application/application.go +++ b/apiserver/pkg/application/application.go @@ -42,6 +42,7 @@ import ( func addCategory(app *v1alpha1.Application, category []*string) *v1alpha1.Application { if len(category) == 0 { + delete(app.Annotations, v1alpha1.AppCategoryAnnotationKey) return app } if app.Annotations == nil { @@ -413,6 +414,9 @@ func UpdateApplicationConfig(ctx context.Context, c dynamic.Interface, input gen Params: utils.MapAny2Str(v.Params), }) } + if len(input.Tools) == 0 { + qachain.Spec.Tools = make([]agent.Tool, 0) + } }, qachain); err != nil { return nil, err } @@ -455,6 +459,9 @@ func UpdateApplicationConfig(ctx context.Context, c dynamic.Interface, input gen Params: utils.MapAny2Str(v.Params), }) } + if len(input.Tools) == 0 { + llmchain.Spec.Tools = make([]agent.Tool, 0) + } }, llmchain); err != nil { return nil, err } diff --git a/pkg/appruntime/chain/common.go b/pkg/appruntime/chain/common.go index c8b6a30b2..ed9bcd5c6 100644 --- a/pkg/appruntime/chain/common.go +++ b/pkg/appruntime/chain/common.go @@ -126,8 +126,12 @@ func runTools(ctx context.Context, args map[string]any, tools []agent.Tool) map[ switch tool.Name { // nolint:gocritic case "bing": klog.V(3).Infof("tools call bing search: %s", input) - client := bingsearch.NewBingClient(tool.Params[bingsearch.ParamAPIKey]) - data, _, err := client.GetWebPages(ctx, input) + client, err := bingsearch.NewFromToolSpec(&tool) + if err != nil { + klog.Errorf("failed to create bing client: %w", err) + return + } + data, _, err := client.SearchGetDetailData(ctx, input) if err != nil { klog.Errorf("failed to call bing search tool: %w", err) return diff --git a/pkg/appruntime/tools/bingsearch/bing.go b/pkg/appruntime/tools/bingsearch/bing.go index bcc4d2f62..58a3a1ad8 100644 --- a/pkg/appruntime/tools/bingsearch/bing.go +++ b/pkg/appruntime/tools/bingsearch/bing.go @@ -18,6 +18,7 @@ package bingsearch import ( "context" + "strconv" "github.com/tmc/langchaingo/callbacks" "github.com/tmc/langchaingo/tools" @@ -28,6 +29,7 @@ import ( const ( ToolName = "Bing Search API" ParamAPIKey = "apiKey" + ParamCount = "count" ) type Tool struct { @@ -39,9 +41,22 @@ var _ tools.Tool = Tool{} // New creates a new bing search tool to search on internet func New(tool *v1alpha1.Tool) (*Tool, error) { - return &Tool{ - client: NewBingClient(tool.Params[ParamAPIKey]), - }, nil + client, err := NewFromToolSpec(tool) + return &Tool{client: client}, err +} + +func NewFromToolSpec(tool *v1alpha1.Tool) (*BingClient, error) { + var countVal int + apikey := tool.Params[ParamAPIKey] + count, ok := tool.Params[ParamCount] + if ok { + atoi, err := strconv.Atoi(count) + if err != nil { + return nil, err + } + countVal = atoi + } + return NewBingClient(WithAPIKey(apikey), WithCount(countVal)), nil } func (t Tool) Name() string { diff --git a/pkg/appruntime/tools/bingsearch/bing_test.go b/pkg/appruntime/tools/bingsearch/bing_test.go index 4741e164c..643ea6bfd 100644 --- a/pkg/appruntime/tools/bingsearch/bing_test.go +++ b/pkg/appruntime/tools/bingsearch/bing_test.go @@ -26,11 +26,11 @@ import ( "github.com/kubeagi/arcadia/api/app-node/agent/v1alpha1" ) -func TestBingSearch(t *testing.T) { +func TestBingSearchTool(t *testing.T) { t.Parallel() apikey := os.Getenv("BING_KEY") if apikey == "" { - t.Skip("Must set BING_SEARCH_V7_SUBSCRIPTION_KEY to run TestBingSearch") + t.Skip("Must set BING_KEY to run TestBingSearchTool") } rightTool := &v1alpha1.Tool{ Params: map[string]string{ @@ -49,3 +49,26 @@ func TestBingSearch(t *testing.T) { t.Logf("should get err:\n%s", err) require.Error(t, err) } + +func TestBingSearchClient(t *testing.T) { + t.Parallel() + apikey := os.Getenv("BING_KEY") + if apikey == "" { + t.Skip("Must set BING_KEY to run TestBingSearchClient") + } + client := NewBingClient(WithAPIKey(apikey)) + p, _, err := client.SearchGetDetailData(context.Background(), "langchain") + require.NoError(t, err) + require.Equal(t, defaultOptions().count, len(p)) + for i, _p := range p { + t.Logf("get format resp[%d]:\n%#v", i, _p) + } + // more count + client = NewBingClient(WithAPIKey(apikey), WithCount(100)) + p, _, err = client.SearchGetDetailData(context.Background(), "langchain") + require.NoError(t, err) + require.Equal(t, 100, len(p)) + for i, _p := range p { + t.Logf("get format resp[%d]:\n%#v", i, _p) + } +} diff --git a/pkg/appruntime/tools/bingsearch/client.go b/pkg/appruntime/tools/bingsearch/client.go index 5aef50296..fd400ae9a 100644 --- a/pkg/appruntime/tools/bingsearch/client.go +++ b/pkg/appruntime/tools/bingsearch/client.go @@ -23,60 +23,154 @@ import ( "net/http" "net/url" "os" + "strconv" "k8s.io/klog/v2" ) const ( - Endpoint = "https://api.bing.microsoft.com/v7.0/search?mkt=zh-CN&q=" + Endpoint = "https://api.bing.microsoft.com/v7.0/search" AuthHeaderKey = "Ocp-Apim-Subscription-Key" ) type BingClient struct { - apiKey string + options options } -func NewBingClient(apiKey string) *BingClient { - if apiKey == "" { - apiKey = os.Getenv("BING_KEY") +type options struct { + apiKey string + count int + responseFilter string + promote string + mkt string + answerCount int +} + +func defaultOptions() options { + return options{ + apiKey: os.Getenv("BING_KEY"), + count: 5, + responseFilter: "News,Webpages", + promote: "News,Webpages", + mkt: "zh-CN", + answerCount: 2, + } +} + +type Option func(*options) + +func WithAPIKey(apiKey string) Option { + return func(opts *options) { + if len(apiKey) != 0 { + opts.apiKey = apiKey + } } - return &BingClient{ - apiKey: apiKey, +} + +func WithCount(count int) Option { + return func(opts *options) { + if count > 0 { + opts.count = count + } } } +func NewBingClient(opts ...Option) *BingClient { + clientOptions := defaultOptions() + for _, opt := range opts { + opt(&clientOptions) + } + return &BingClient{clientOptions} +} + func (client *BingClient) Search(ctx context.Context, query string) (string, error) { - p, data, err := client.GetWebPages(ctx, query) + p, data, err := client.SearchGetDetailData(ctx, query) if len(p) > 0 { return FormatResults(p), nil } return data, err } -func (client *BingClient) GetWebPages(ctx context.Context, query string) (p []WebPage, data string, err error) { - queryURL := Endpoint + url.QueryEscape(query) - request, err := http.NewRequestWithContext(ctx, http.MethodGet, queryURL, nil) +// SearchGetDetailData will try to parse bing search list type webpages and news. +// Unlike the Search method, it returns a more detailed list of structures, not just a string. +// Note: only parse search list, not single source page. +func (client *BingClient) SearchGetDetailData(ctx context.Context, query string) (resp []WebPage, data string, err error) { + want := client.options.count + remains := want + // count max value is 50, ref: https://learn.microsoft.com/en-us/rest/api/cognitiveservices-bingsearch/bing-web-api-v7-reference#query-parameters + // offset default value is 0, same ref with above + count, offset := 50, 0 + resp = make([]WebPage, 0) + for remains > 0 { + if want < count { + count = want + } + data, err := client.getOnePage(ctx, query, count, offset) + if err != nil { + return nil, "", err + } + if len(data) == 0 { + break + } + resp = append(resp, data...) + offset += len(data) + remains = want - len(resp) + } + if len(resp) > want { + resp = resp[:want] + } + bytes, err := json.Marshal(resp) if err != nil { - return nil, "", fmt.Errorf("creating bingSearch request failed: %w", err) + return nil, "", fmt.Errorf("bingSearch json marshal resp, get err:%w", err) + } + klog.V(3).Infof("bingSearch finally get webpages: %#v", resp) + klog.V(5).Infof("bingSearch get resp: %s", string(bytes)) + return resp, string(bytes), nil +} + +func (client *BingClient) getOnePage(ctx context.Context, query string, count, offset int) (p []WebPage, err error) { + queryURL, err := url.Parse(Endpoint) + if err != nil { + return nil, err + } + q := queryURL.Query() + q.Set("q", query) + q.Set("count", strconv.Itoa(count)) + q.Set("mkt", client.options.mkt) + q.Set("promote", client.options.promote) + q.Set("answerCount", strconv.Itoa(client.options.answerCount)) + q.Set("offset", strconv.Itoa(offset)) + queryURL.RawQuery = q.Encode() + queryfullURL := queryURL.String() + // https://api.bing.microsoft.com/v7.0/search?answerCount=2&count=5&mkt=zh-CN&promote=News%2CWebpages&q=langchain&responseFilter=News%2C%20Webpages + // https://api.bing.microsoft.com/v7.0/search?answerCount=2&count=5&mkt=zh-CN&promote=News%2CWebpages&q=langchain&responseFilter=News,Webpages + // Note: The URL above will return a http 400 error, while the one below will not + queryfullURL += fmt.Sprintf("&responseFilter=%s", client.options.responseFilter) + + request, err := http.NewRequestWithContext(ctx, http.MethodGet, queryfullURL, nil) + if err != nil { + return nil, fmt.Errorf("creating bingSearch request failed: %w", err) } - request.Header.Add(AuthHeaderKey, client.apiKey) + request.Header.Add(AuthHeaderKey, client.options.apiKey) response, err := http.DefaultClient.Do(request) if err != nil { - return nil, "", fmt.Errorf("bingSearch[%s] get error: %w", queryURL, err) + return nil, fmt.Errorf("bingSearch[%s] get error: %w", queryURL, err) } defer response.Body.Close() code := response.StatusCode resp := &RespData{} if err := json.NewDecoder(response.Body).Decode(&resp); err != nil { - return nil, "", fmt.Errorf("bingSearch parse json resp get err:%w, http status code:%d", err, code) + return nil, fmt.Errorf("bingSearch parse json resp get err:%w, http status code:%d", err, code) } if resp.ErrorResp != nil { - return nil, "", fmt.Errorf("bingSearch get error resp from bing server: http status code:%d message:%s, code:%s", code, resp.ErrorResp.Message, resp.ErrorResp.Code) + return nil, fmt.Errorf("bingSearch get error resp from bing server: http status code:%d message:%s, code:%s", code, resp.ErrorResp.Message, resp.ErrorResp.Code) } - if len(resp.WebPages.Value) > 0 { - p = make([]WebPage, len(resp.WebPages.Value)) + webpagesLen := len(resp.WebPages.Value) + newsLen := len(resp.News.NewsValues) + p = make([]WebPage, webpagesLen+newsLen) + if webpagesLen > 0 { for i, v := range resp.WebPages.Value { v := v p[i] = WebPage{ @@ -86,13 +180,18 @@ func (client *BingClient) GetWebPages(ctx context.Context, query string) (p []We } } } - bytes, err := json.Marshal(resp) - if err != nil { - return nil, "", fmt.Errorf("bingSearch json marshal resp, get err:%w", err) + if newsLen > 0 { + for i, v := range resp.News.NewsValues { + v := v + p[i+webpagesLen] = WebPage{ + Title: v.Name, + Description: v.Description, + URL: v.URL, + } + } } - klog.V(3).Infof("bingSearch get webpages: %#v", p) - klog.V(5).Infof("bingSearch get resp: %s", string(bytes)) - return p, string(bytes), nil + klog.V(3).Infof("bingSearch query:%s TotalEstimatedMatches:%d count:%d offset:%d webpages: %#v", query, resp.WebPages.TotalEstimatedMatches, count, offset, p) + return p, nil } func FormatResults(vals []WebPage) (res string) { diff --git a/pkg/appruntime/tools/bingsearch/doc.go b/pkg/appruntime/tools/bingsearch/doc.go new file mode 100644 index 000000000..c4c8b5f64 --- /dev/null +++ b/pkg/appruntime/tools/bingsearch/doc.go @@ -0,0 +1,26 @@ +/* +Copyright 2024 KubeAGI. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* + +Package bingsearch based on [bing official bing-web-api-v7 search API](https://learn.microsoft.com/zh-cn/rest/api/cognitiveservices-bingsearch/bing-web-api-v7-reference), +implements the function of bing search using standard apikey, at the same time adapted `github.com/tmc/langchaingo/tools.Tool` interface, +convenient to use in langchiango agent directly. + +you can create an apikey by https://portal.azure.com/#create/Microsoft.BingSearch +*/ + +package bingsearch diff --git a/pkg/appruntime/tools/bingsearch/resp.go b/pkg/appruntime/tools/bingsearch/resp.go index 850a46ed9..48a8a954a 100644 --- a/pkg/appruntime/tools/bingsearch/resp.go +++ b/pkg/appruntime/tools/bingsearch/resp.go @@ -19,12 +19,13 @@ package bingsearch import "time" type RespData struct { - Type string `json:"_type"` - QueryContext QueryContext `json:"queryContext"` - WebPages WebPages `json:"webPages"` - Entities Entities `json:"entities"` - Videos Videos `json:"videos"` - RankingResponse RankingResponse `json:"rankingResponse"` + Type string `json:"_type,omitempty"` + QueryContext QueryContext `json:"queryContext,omitempty"` + WebPages WebPages `json:"webPages,omitempty"` + Entities Entities `json:"entities,omitempty"` + Videos Videos `json:"videos,omitempty"` + News News `json:"news,omitempty"` + RankingResponse RankingResponse `json:"rankingResponse,omitempty"` ErrorResp *ErrorResp `json:"error,omitempty"` } @@ -152,8 +153,9 @@ type VideoValue struct { } type Thumbnail struct { - Width int `json:"width"` - Height int `json:"height"` + Width int `json:"width"` + Height int `json:"height"` + ContentURL string `json:"contentUrl,omitempty"` } type Publisher struct { @@ -163,3 +165,36 @@ type Publisher struct { type Creator struct { Name string `json:"name"` } + +type News struct { + ID string `json:"id"` + ReadLink string `json:"readLink"` + NewsValues []NewsValue `json:"value"` +} + +type NewsValue struct { + ContractualRules []ContractualRules `json:"contractualRules"` + Name string `json:"name"` + URL string `json:"url"` + Description string `json:"description"` + Provider []Provider `json:"provider"` + DatePublished time.Time `json:"datePublished"` + Category string `json:"category"` + Image Image `json:"image,omitempty"` +} + +type ContractualRules struct { + Type string `json:"_type"` + Text string `json:"text"` +} + +type Provider struct { + Type string `json:"_type"` + Name string `json:"name"` + Image Image `json:"image"` +} + +type Image struct { + ContentURL string `json:"contentUrl"` + Thumbnail Thumbnail `json:"thumbnail"` +}