From 4a1e38ee6c2bb87a53dce1318377842fe7e7a442 Mon Sep 17 00:00:00 2001 From: bjwswang Date: Thu, 14 Mar 2024 08:19:24 +0000 Subject: [PATCH 1/2] chore: use alpine as base image and install packages to support pdf-text conversion Signed-off-by: bjwswang --- Dockerfile | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index f097053c7..fd9f6405a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,12 +21,20 @@ COPY apiserver/ apiserver/ # Build RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o manager main.go RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o apiserver-bin apiserver/main.go -# Use distroless as minimal base image to package the manager binary -# Refer to https://github.com/GoogleContainerTools/distroless for more details -FROM gcr.io/distroless/static:nonroot + +# Use alpine as minimal base image to package the manager binary +FROM alpine:3.19.1 + +RUN apk update \ + # Install packages to support pdf to text conversion + && apk add --no-cache poppler-utils wv unrtf tidyhtml + WORKDIR / COPY --from=builder /workspace/manager . COPY --from=builder /workspace/apiserver-bin ./apiserver -USER 65532:65532 + +RUN adduser -D -u 1000 1000 + +USER 1000 ENTRYPOINT ["/manager"] From 4c04c87ea1ddd79d6e17c1303beed8063b22597d Mon Sep 17 00:00:00 2001 From: bjwswang Date: Thu, 14 Mar 2024 08:29:06 +0000 Subject: [PATCH 2/2] fix: remove tag filter in knowledgebase Signed-off-by: bjwswang --- controllers/base/knowledgebase_controller.go | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/controllers/base/knowledgebase_controller.go b/controllers/base/knowledgebase_controller.go index e42d891a3..0eaaa3544 100644 --- a/controllers/base/knowledgebase_controller.go +++ b/controllers/base/knowledgebase_controller.go @@ -499,15 +499,6 @@ func (r *KnowledgeBaseReconciler) reconcileFileGroup(ctx context.Context, log lo func (r *KnowledgeBaseReconciler) handleFile(ctx context.Context, log logr.Logger, file io.ReadCloser, fileName string, tags map[string]string, kb *arcadiav1alpha1.KnowledgeBase, store *arcadiav1alpha1.VectorStore, embedder *arcadiav1alpha1.Embedder) (err error) { log = log.WithValues("fileName", fileName, "tags", tags) - if tags == nil { - log.Info("file tags is nil, ignore") - return fmt.Errorf("file tags is nil, %w", errFileSkipped) - } - v, ok := tags[arcadiav1alpha1.ObjectTypeTag] - if !ok { - log.Info("file tags object type not found, ignore") - return fmt.Errorf("file tags object type not found, %w", errFileSkipped) - } if !embedder.Status.IsReady() { return errEmbedderNotReady } @@ -530,7 +521,8 @@ func (r *KnowledgeBaseReconciler) handleFile(ctx context.Context, log logr.Logge case ".txt": loader = documentloaders.NewText(dataReader) case ".csv": - if v == arcadiav1alpha1.ObjectTypeQA { + v, ok := tags[arcadiav1alpha1.ObjectTypeTag] + if ok && v == arcadiav1alpha1.ObjectTypeQA { // for qa csv,we skip the text splitter loader = pkgdocumentloaders.NewQACSV(dataReader, fileName) } else {