Skip to content

Commit

Permalink
Merge pull request #755 from Lanture1064/dev
Browse files Browse the repository at this point in the history
chore: add limit to context len in eval data gen
  • Loading branch information
bjwswang authored Feb 26, 2024
2 parents b16742d + aebd054 commit 26c6390
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
3 changes: 3 additions & 0 deletions pkg/arctl/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra
outputDir string
mergeFileName string
merge bool
maxContentLength int
)

cmd := &cobra.Command{
Expand Down Expand Up @@ -179,6 +180,7 @@ func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra
evaluation.WithGroundTruthsColumn(groundTruthsColumn),
evaluation.WithOutput(output),
evaluation.WithWriteHeader(!merge || writeHeader),
evaluation.WithMaxContextLength(maxContentLength),
)
if err != nil {
return err
Expand All @@ -200,6 +202,7 @@ func EvalGenTestDataset(home *string, namespace *string, appName *string) *cobra
cmd.Flags().StringVar(&outputMethod, "output", "", "The way to output the generated dataset rows.We support two ways: \n - stdout: print row \n - csv: save row to csv file")
cmd.Flags().BoolVar(&merge, "merge", false, "Whether to merge all generated test data into a single file")
cmd.Flags().StringVar(&mergeFileName, "merge-file", "ragas.csv", "name of the merged document")
cmd.Flags().IntVar(&maxContentLength, "max-context-length", 512, "The maximum length of the context")

return cmd
}
Expand Down
25 changes: 24 additions & 1 deletion pkg/evaluation/evaluation.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ type genOptions struct {
output Output

writeHeader bool
// maxContextLength is the limit of total lengths of all contexts
maxContextLength int
}

func defaultGenOptions() *genOptions {
Expand Down Expand Up @@ -121,6 +123,12 @@ func WithOutput(output Output) GenOptions {
}
}

func WithMaxContextLength(maxContextLength int) GenOptions {
return func(genOpts *genOptions) {
genOpts.maxContextLength = maxContextLength
}
}

type GenOptions func(*genOptions)

// Generate a test dataset from a file(csv)
Expand Down Expand Up @@ -160,8 +168,23 @@ func (eval *RagasDatasetGenerator) Generate(ctx context.Context, csvData io.Read

// handle context
contexts := make([]string, len(out.References))
contextLength := 0
for refIndex, reference := range out.References {
contexts[refIndex] = reference.SimpleString()
contextLength += len(reference.SimpleString())
refString := reference.SimpleString()
// If the next context will cause the context total length to exceed the maxContentLength, break and stop adding them.
if contextLength >= eval.options.maxContextLength {
if refIndex == 0 {
// If the first context exceeds the maxContentLength, truncate it and use only the first 500 words.
refString = reference.SimpleString()[:eval.options.maxContextLength]
klog.V(5).Infof("1st context length exceeds maxContentLength %d, using only the first %d words", eval.options.maxContextLength, eval.options.maxContextLength)
contexts[refIndex] = refString
} else {
klog.V(5).Infof("Context length exceeds maxContentLength %d, using only the first %d context(s)", eval.options.maxContextLength, refIndex)
}
break
}
contexts[refIndex] = refString
}
ragasRow.Contexts = contexts

Expand Down

0 comments on commit 26c6390

Please sign in to comment.