Skip to content

Commit

Permalink
feat:add progress functionality to the details page.
Browse files Browse the repository at this point in the history
  • Loading branch information
wangxinbiao committed Dec 21, 2023
1 parent 83e22da commit dd3cc71
Show file tree
Hide file tree
Showing 23 changed files with 1,210 additions and 88 deletions.
535 changes: 535 additions & 0 deletions apiserver/graph/generated/generated.go

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions apiserver/graph/generated/models_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 10 additions & 0 deletions apiserver/graph/schema/dataprocessing.gql
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,16 @@ query dataProcessDetails($input: DataProcessDetailsInput){
enable
zh_name
description
llm_config {
name
namespace
model
temperature
top_p
max_tokens
prompt_template
provider
}
preview {
file_name
content {
Expand Down
12 changes: 12 additions & 0 deletions apiserver/graph/schema/dataprocessing.graphqls
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,22 @@ type DataProcessConfigChildren {
enable: String
zh_name: String
description: String
llm_config: LLMConfig
preview: [DataProcessConfigpreView]
file_progress: [DataProcessConfigpreFileProgress]
}

type LLMConfig {
name: String
namespace: String
model: String
temperature: String
top_p: String
max_tokens: String
prompt_template: String
provider: String
}

# 数据处理配置项预览
type DataProcessConfigpreView {
file_name: String
Expand Down
3 changes: 3 additions & 0 deletions data-processing/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ RUN wget https://github.com/explosion/spacy-models/releases/download/zh_core_web
&& pip3 install /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl -i https://pypi.org/simple \
&& rm /tmp/zh_core_web_sm-3.5.0-py3-none-any.whl

ENV DEFAULT_CONFIG=arcadia-config
ENV POD_NAMESPACE=arcadia

EXPOSE 28888

ADD . /arcadia_app/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
import io
import logging
import os
import ulid

import pandas as pd
from common import log_tag_const
from common.config import config
from data_store_clients import minio_store_client
from database_operate import data_process_db_operate
from database_operate import data_process_db_operate, data_process_document_db_operate
from file_handle import csv_handle, pdf_handle, word_handle
from kube import dataset_cr
from utils import file_utils
Expand Down Expand Up @@ -64,13 +65,31 @@ def text_manipulate(
file_name=file_name['name']
)

# 将文件信息存入data_process_task_document表中
for file_name in file_names:
# 新增文档处理进度信息
document_id = ulid.ulid()
document_insert_item = {
'id': document_id,
'task_id': id,
'file_name': file_name['name'],
'status': 'not_start',
'progress': '0',
'creator': req_json['creator']
}
data_process_document_db_operate.add(
document_insert_item,
pool=pool
)
file_name['document_id']=document_id

# 文件处理
task_status = 'process_complete'
# 存放每个文件对应的数据量
data_volumes_file = []

for item in file_names:
result = []
result = None

file_name = item['name']
file_extension = file_name.split('.')[-1].lower()
Expand All @@ -87,6 +106,7 @@ def text_manipulate(
chunk_size=req_json.get('chunk_size'),
chunk_overlap=req_json.get('chunk_overlap'),
file_name=file_name,
document_id=document_id,
support_type=support_type,
conn_pool=pool,
task_id=id,
Expand All @@ -99,13 +119,14 @@ def text_manipulate(
chunk_size=req_json.get('chunk_size'),
chunk_overlap=req_json.get('chunk_overlap'),
file_name=file_name,
document_id=document_id,
support_type=support_type,
conn_pool=pool,
task_id=id,
create_user=req_json['creator']
)

if result.get('status') != 200:
if result is None or result.get('status') != 200:
# 任务失败
task_status = 'process_fail'
break
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.


import ulid
from database_clients import postgresql_pool_client
from utils import date_time_utils

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# Copyright 2023 KubeAGI.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import ulid
from database_clients import postgresql_pool_client
from utils import date_time_utils

def add(
req_json,
pool
):
"""Add a new record"""
now = date_time_utils.now_str()
user = req_json['creator']
program = '数据处理文件进度-新增'

params = {
'id': req_json['id'],
'file_name': req_json['file_name'],
'status': req_json['status'],
'progress': req_json['progress'],
'task_id': req_json['task_id'],
'create_datetime': now,
'create_user': user,
'create_program': program,
'update_datetime': now,
'update_user': user,
'update_program': program
}

sql = """
insert into public.data_process_task_document (
id,
file_name,
status,
progress,
task_id,
create_datetime,
create_user,
create_program,
update_datetime,
update_user,
update_program
)
values (
%(id)s,
%(file_name)s,
%(status)s,
%(progress)s,
%(task_id)s,
%(create_datetime)s,
%(create_user)s,
%(create_program)s,
%(update_datetime)s,
%(update_user)s,
%(update_program)s
)
""".strip()

res = postgresql_pool_client.execute_update(pool, sql, params)
return res

def update_document_status_and_start_time(
req_json,
pool
):
"""Update the status and start time with id"""
now = req_json['start_time']
program = '文件开始处理-修改'

params = {
'id': req_json['id'],
'status': req_json['status'],
'start_time': now,
'chunk_size': req_json['chunk_size'],
'update_datetime': now,
'update_program': program
}

sql = """
update public.data_process_task_document set
status = %(status)s,
start_time = %(start_time)s,
chunk_size = %(chunk_size)s,
update_datetime = %(update_datetime)s,
update_program = %(update_program)s
where
id = %(id)s
""".strip()

res = postgresql_pool_client.execute_update(pool, sql, params)
return res

def update_document_status_and_end_time(
req_json,
pool
):
"""Update the status and start time with id"""
now = req_json['end_time']
program = '文件处理完成-修改'

params = {
'id': req_json['id'],
'status': req_json['status'],
'end_time': now,
'update_datetime': now,
'update_program': program
}

sql = """
update public.data_process_task_document set
status = %(status)s,
end_time = %(end_time)s,
update_datetime = %(update_datetime)s,
update_program = %(update_program)s
where
id = %(id)s
""".strip()

res = postgresql_pool_client.execute_update(pool, sql, params)
return res

def update_document_progress(
req_json,
pool
):
"""Update the progress with id"""
now = date_time_utils.now_str()
program = '文件处理进度-修改'

params = {
'id': req_json['id'],
'progress': req_json['progress'],
'update_datetime': now,
'update_program': program
}

sql = """
update public.data_process_task_document set
progress = %(progress)s,
update_datetime = %(update_datetime)s,
update_program = %(update_program)s
where
id = %(id)s
""".strip()

res = postgresql_pool_client.execute_update(pool, sql, params)
return res

def list_file_by_task_id(
req_json,
pool
):
"""info with id"""
params = {
'task_id': req_json['task_id']
}

sql = """
select
id,
file_name,
status,
start_time,
end_time,
progress
from
public.data_process_task_document
where
task_id = %(task_id)s
""".strip()

res = postgresql_pool_client.execute_query(pool, sql, params)
return res
Loading

0 comments on commit dd3cc71

Please sign in to comment.