diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 000000000..c6dcdc0e8
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,634 @@
+[MAIN]
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Clear in-memory caches upon conclusion of linting. Useful if running pylint
+# in a server-like mode.
+clear-cache-post-run=no
+
+# Load and enable all available extensions. Use --list-extensions to see a list
+# all available extensions.
+#enable-all-extensions=
+
+# In error mode, messages with a category besides ERROR or FATAL are
+# suppressed, and no reports are done by default. Error mode is compatible with
+# disabling specific errors.
+#errors-only=
+
+# Always return a 0 (non-error) status code, even if lint errors are found.
+# This is primarily useful in continuous integration scripts.
+#exit-zero=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+
+# Specify a score threshold under which the program will exit with error.
+fail-under=10
+
+# Interpret the stdin as a python script, whose filename needs to be passed as
+# the module_or_package argument.
+#from-stdin=
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS
+
+# Add files or directories matching the regular expressions patterns to the
+# ignore-list. The regex matches against paths and can be in Posix or Windows
+# format. Because '\\' represents the directory delimiter on Windows systems,
+# it can't be used as an escape character.
+ignore-paths=
+
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
+ignore-patterns=^\.#
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.11
+
+# Discover python modules and packages in the file system subtree.
+recursive=no
+
+# Add paths to the list of the source roots. Supports globbing patterns. The
+# source root is an absolute path or a path relative to the current working
+# directory used to determine a package namespace for modules located under the
+# source root.
+source-roots=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# In verbose mode, extra non-checker-related info will be displayed.
+#verbose=
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style. If left empty, argument names will be checked with the set
+# naming style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style. If left empty, attribute names will be checked with the set naming
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style. If left empty, class attribute names will be checked
+# with the set naming style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style. If left empty, class constant names will be checked with
+# the set naming style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style. If left empty, class names will be checked with the set naming style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style. If left empty, constant names will be checked with the set naming
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style. If left empty, function names will be checked with the set
+# naming style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style. If left empty, inline iteration names will be checked
+# with the set naming style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style. If left empty, method names will be checked with the set naming style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style. If left empty, module names will be checked with the set naming style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct type alias names. If left empty, type
+# alias names will be checked with the set naming style.
+#typealias-rgx=
+
+# Regular expression matching correct type variable names. If left empty, type
+# variable names will be checked with the set naming style.
+#typevar-rgx=
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style. If left empty, variable names will be checked with the set
+# naming style.
+#variable-rgx=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      asyncSetUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow explicit reexports by alias from a package __init__.
+allow-reexport-from-package=no
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
+# UNDEFINED.
+confidence=HIGH,
+           CONTROL_FLOW,
+           INFERENCE,
+           INFERENCE_FAILURE,
+           UNDEFINED
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        use-implicit-booleaness-not-comparison-to-string,
+        use-implicit-booleaness-not-comparison-to-zero
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=
+
+
+[METHOD_ARGS]
+
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+# Regular expression of note tags to take in consideration.
+notes-rgx=
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+msg-template=
+
+# Set the output format. Available formats are: text, parseable, colorized,
+# json2 (improved json format), json (old json format) and msvs (visual
+# studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+#output-format=
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=yes
+
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. No available dictionaries : You need to install
+# both the python package and the system dependency for enchant to work.
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of symbolic message names to ignore for Mixin members.
+ignored-checks-for-mixins=no-member,
+                          not-async-context-manager,
+                          not-context-manager,
+                          attribute-defined-outside-init
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins.
+mixin-class-rgx=.*[Mm]ixin
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
diff --git a/Makefile b/Makefile
index c6bb577bf..841ccbcc2 100644
--- a/Makefile
+++ b/Makefile
@@ -252,4 +252,13 @@ arctl: fmt vet ## Build manager binary.
 gql-gen:
 	@go run github.com/99designs/gqlgen@v0.17.40 generate
 build-graphql-server: gql-gen
-	@CGO_ENABLED=0 GOOS=linux go build -o bin/graphql-server graphql-server/go-server/main.go
\ No newline at end of file
+	@CGO_ENABLED=0 GOOS=linux go build -o bin/graphql-server graphql-server/go-server/main.go
+
+
+# Commands for Data-Processing
+DATA_PROCESSING_IMAGE ?= kubebb/dp-base
+
+.PHONY: docker-build-dp-base
+docker-build-dp-base:
+	docker build -f ./data-process/Dockerfile.base -t $(DATA_PROCESSING_IMAGE):$(VERSION) ./data-process/
+	
\ No newline at end of file
diff --git a/data-process/Dockerfile.base b/data-process/Dockerfile.base
new file mode 100644
index 000000000..4ab63694d
--- /dev/null
+++ b/data-process/Dockerfile.base
@@ -0,0 +1,14 @@
+FROM python:3.10.13-slim
+
+ENV TZ=Asia/Shanghai
+
+RUN sed -i 's/deb.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources
+
+RUN export DEBIAN_FRONTEND=noninteractive \
+    && apt-get update \
+    && apt-get install -y tzdata \
+    && ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
+    && dpkg-reconfigure --frontend noninteractive tzdata \
+    && apt-get install -y python3-distutils curl python3-pip
+
+WORKDIR /happy_work_space
\ No newline at end of file
diff --git a/data-process/README.md b/data-process/README.md
index ab0485585..e248bafb9 100644
--- a/data-process/README.md
+++ b/data-process/README.md
@@ -1,2 +1,37 @@
-# Data Process
-The current documentation is only available in Chinese. Please refer to the content in .zh.md for specific details.
\ No newline at end of file
+# Data Processing 
+
+## Current Version Main Features
+
+Data Processing is used for data processing through MinIO, databases, Web APIs, etc. The data types handled include:
+- txt
+- json  
+- doc
+- html
+- excel
+- csv
+- pdf
+- markdown
+- ppt
+
+### Current Text Type Processing  
+
+The data processing process includes: cleaning abnormal data, filtering, de-duplication, and anonymization.
+
+## Design
+
+![Design](../assets/data_process.drawio.png)
+
+## Local Development
+### Software Requirements
+
+Before setting up the local data-process environment, please make sure the following software is installed:
+
+- Python 3.10.x
+
+### Environment Setup
+
+Install the Python dependencies in the requirements.txt file
+
+### Running
+
+Run the server.py file in the data_manipulation directory
\ No newline at end of file
diff --git a/data-process/data_manipulation/common/config.py b/data-process/data_manipulation/common/config.py
index c086fba28..6a21b72a8 100644
--- a/data-process/data_manipulation/common/config.py
+++ b/data-process/data_manipulation/common/config.py
@@ -21,4 +21,4 @@
 minio_secure = os.getenv('MINIO_SECURE', False)
 
 # zhipuai api_key
-zhipuai_api_key = os.getenv('ZHIPUAI_API_KEY', 'xxxxx')
\ No newline at end of file
+zhipuai_api_key = os.getenv('ZHIPUAI_API_KEY', 'xxxxx')
diff --git a/data-process/data_manipulation/file_handle/csv_handle.py b/data-process/data_manipulation/file_handle/csv_handle.py
index e689bdc99..d1a8de73f 100644
--- a/data-process/data_manipulation/file_handle/csv_handle.py
+++ b/data-process/data_manipulation/file_handle/csv_handle.py
@@ -24,20 +24,13 @@
 ###
 
 import csv
-import ulid
-import pandas as pd
-import os
 import logging
+import os
 
-from transform.text import (
-    clean_transform,
-    privacy_transform
-)
-
-from utils import (
-    date_time_utils,
-    file_utils
-)
+import pandas as pd
+import ulid
+from transform.text import clean_transform, privacy_transform
+from utils import date_time_utils, file_utils
 
 logger = logging.getLogger('csv_handle')
 
@@ -51,6 +44,8 @@
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def text_manipulate(opt={}):
     logger.info("csv text manipulate!")
 
@@ -59,7 +54,7 @@ async def text_manipulate(opt={}):
             处理某条数据时，如果某个方式（比如：去除不可见字符）处理失败了，则直接结束，不在处理，整个文件都视作处理失败
             
     """
-    
+
     try:
         file_name = opt['file_name']
         support_type = opt['support_type']
@@ -87,7 +82,6 @@ async def text_manipulate(opt={}):
 
         text_data = clean_result['data']
 
-        
         # 将清洗后的文件保存为final
         new_file_name = await file_utils.get_file_name({
             'file_name': file_name,
@@ -159,7 +153,7 @@ async def data_clean(opt={}):
         })
 
     logger.info("csv text data clean stop!")
-    
+
     return {
         'status': 200,
         'message': '',
@@ -179,8 +173,8 @@ async def data_clean(opt={}):
 ###
 async def remove_invisible_characters(opt={}):
     return await clean_transform.remove_invisible_characters({
-            'text': opt['text']
-        })
+        'text': opt['text']
+    })
 
 ###
 # 去除邮箱地址
@@ -192,10 +186,12 @@ async def remove_invisible_characters(opt={}):
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def remove_email(opt={}):
     return await privacy_transform.remove_email({
-            'text': opt['text']
-        })
+        'text': opt['text']
+    })
 
 ###
 # 将数据存到CSV中
@@ -207,6 +203,8 @@ async def remove_email(opt={}):
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def save_csv(opt={}):
     file_name = opt['file_name']
     phase_value = opt['phase_value']
diff --git a/data-process/data_manipulation/server.py b/data-process/data_manipulation/server.py
index 0059e6340..bd652a425 100644
--- a/data-process/data_manipulation/server.py
+++ b/data-process/data_manipulation/server.py
@@ -24,36 +24,22 @@
 # 1) 基本功能实现
 ###
 
-from sanic import Sanic
-from sanic.response import json, text
-from sanic_cors import CORS, cross_origin
-from sanic.exceptions import NotFound
-
 import asyncio
-import aiohttp
-
-import sys
-
 import logging
 
-
-from service import (
-    minio_store_process_service
-)
-
-from transform.text import (
-    support_type
-)
-
-from utils import (
-    log_utils
-)
+from sanic import Sanic
+from sanic.response import json
+from sanic_cors import CORS
+from service import minio_store_process_service
+from transform.text import support_type
+from utils import log_utils
 
 ###
 # 初始化日志配置
 ###
 log_utils.init_config({
-    'source_type': 'manipulate_server'
+    'source_type': 'manipulate_server',
+    'log_dir': "log"
 })
 
 
@@ -62,7 +48,7 @@
 app = Sanic(name='data_manipulate')
 CORS(app)
 
-app.config['REQUEST_MAX_SIZE'] = 1024 * 1024 * 1024 # 1G
+app.config['REQUEST_MAX_SIZE'] = 1024 * 1024 * 1024  # 1G
 app.config['REQUEST_TIMEOUT'] = 60 * 60 * 60
 app.config['RESPONSE_TIMEOUT'] = 60 * 60 * 60
 app.config['KEEP_ALIVE_TIMEOUT'] = 60 * 60 * 60
@@ -77,6 +63,8 @@
 # content:
 # 1) 基本功能实现
 ###
+
+
 @app.route('text-manipulate', methods=['POST'])
 async def text_manipulate(request):
     """
@@ -87,7 +75,7 @@ async def text_manipulate(request):
             file_path: 文本路径
 
         Returns:
-            
+
     """
 
     await asyncio.create_task(
@@ -110,13 +98,14 @@ async def text_manipulate(request):
 # content:
 # 1) 基本功能实现
 ###
+
+
 @app.route('text-process-type', methods=['POST'])
 async def text_process_type(request):
     """
         获取数据处理支持的类型
 
         Args:
-            
 
         Returns:
             json: 支持的类型
@@ -127,11 +116,10 @@ async def text_process_type(request):
         'message': '',
         'data': support_type.support_types
     })
-    
 
 if __name__ == '__main__':
     app.run(host='0.0.0.0',
             port=28888,
             access_log=True,
             debug=True,
-            workers=2)
\ No newline at end of file
+            workers=2)
diff --git a/data-process/data_manipulation/service/minio_store_process_service.py b/data-process/data_manipulation/service/minio_store_process_service.py
index beba735ba..c506e0885 100644
--- a/data-process/data_manipulation/service/minio_store_process_service.py
+++ b/data-process/data_manipulation/service/minio_store_process_service.py
@@ -23,24 +23,17 @@
 # 1) 基本功能实现
 ###
 
-from sanic.response import json, raw
-from minio import Minio
-from minio.commonconfig import Tags
-from minio.error import S3Error
-import pandas as pd
 import io
-import os
-
 import logging
+import os
 
-from file_handle import (
-    csv_handle
-)
-
-from utils import (
-    minio_utils,
-    file_utils
-)
+import pandas as pd
+from file_handle import csv_handle
+from minio import Minio
+from minio.commonconfig import Tags
+from minio.error import S3Error
+from sanic.response import json, raw
+from utils import file_utils, minio_utils
 
 logger = logging.getLogger('minio_store_process_service')
 
@@ -54,6 +47,8 @@
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def text_manipulate(request):
 
     request_json = request.json
@@ -63,7 +58,7 @@ async def text_manipulate(request):
 
     # create minio client
     minio_client = await minio_utils.create_client()
-    
+
     # 查询存储桶下的所有对象
     objects = minio_client.list_objects(bucket_name, prefix=folder_prefix)
 
@@ -81,9 +76,9 @@ async def text_manipulate(request):
         if file_extension in ['csv']:
             # 处理CSV文件
             result = await csv_handle.text_manipulate({
-                    'file_name': item,
-                    'support_type': support_type
-                })
+                'file_name': item,
+                'support_type': support_type
+            })
 
     # 将清洗后的文件上传到MinIO中
     # 上传middle文件夹下的文件，并添加tag
@@ -112,7 +107,7 @@ async def text_manipulate(request):
     for item in file_names:
         remove_file_path = await file_utils.get_temp_file_path()
         await file_utils.delete_file(remove_file_path + 'original/' + item)
-    
+
     return json({
         'status': 200,
         'message': '',
@@ -129,6 +124,8 @@ async def text_manipulate(request):
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def download(opt={}):
     objects = opt['objects']
     minio_client = opt['minio_client']
@@ -160,17 +157,21 @@ async def download(opt={}):
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def upload_files_to_minio_with_tags(minio_client, local_folder, minio_bucket, minio_prefix="", tags=None):
     for root, dirs, files in os.walk(local_folder):
         for file in files:
             local_file_path = os.path.join(root, file)
-            minio_object_name = os.path.join(minio_prefix, os.path.relpath(local_file_path, local_folder))
-            
+            minio_object_name = os.path.join(
+                minio_prefix, os.path.relpath(local_file_path, local_folder))
+
             try:
-                minio_client.fput_object(minio_bucket, minio_object_name, local_file_path, tags=tags)
-                
+                minio_client.fput_object(
+                    minio_bucket, minio_object_name, local_file_path, tags=tags)
+
                 # 删除本地文件
                 await file_utils.delete_file(local_file_path)
             except S3Error as e:
-                logger.error(f"Error uploading {minio_object_name} to {minio_bucket}: {e}")
-
+                logger.error(
+                    f"Error uploading {minio_object_name} to {minio_bucket}: {e}")
diff --git a/data-process/data_manipulation/transform/text/QA_transform.py b/data-process/data_manipulation/transform/text/QA_transform.py
index 81391a5a4..85a607bc1 100644
--- a/data-process/data_manipulation/transform/text/QA_transform.py
+++ b/data-process/data_manipulation/transform/text/QA_transform.py
@@ -12,12 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import zhipuai
 import re
 
-from common import (
-    config
-)
+import zhipuai
+from common import config
 
 ###
 # QA生成
@@ -30,6 +28,7 @@
 # 1) 基本功能实现
 ###
 
+
 async def generate_QA(opt={}):
     zhipuai.api_key = config.zhipuai_api_key
 
@@ -76,7 +75,7 @@ async def generate_QA(opt={}):
 # 1) 基本功能实现
 ###
 async def formatSplitText(text):
-    
+
     pattern = re.compile(r'Q\d+:(\s*)(.*?)(\s*)A\d+:(\s*)([\s\S]*?)(?=Q|$)')
 
     # 移除换行符
@@ -93,4 +92,4 @@ async def formatSplitText(text):
                 'a': a
             })
 
-    return result
\ No newline at end of file
+    return result
diff --git a/data-process/data_manipulation/transform/text/clean_transform.py b/data-process/data_manipulation/transform/text/clean_transform.py
index 5e50dc3c0..1fa729891 100644
--- a/data-process/data_manipulation/transform/text/clean_transform.py
+++ b/data-process/data_manipulation/transform/text/clean_transform.py
@@ -26,6 +26,7 @@
 
 import re
 
+
 ###
 # 去除不可见字符
 # @author: wangxinbiao
@@ -38,9 +39,10 @@
 ###
 async def remove_invisible_characters(opt={}):
     text = opt['text']
-    
+
     try:
-        clean_text = re.sub(r'[\x00-\x1F\x7F-\x9F\xAD\r\n\t\b\x0B\x1C\x1D\x1E]', '', text)
+        clean_text = re.sub(
+            r'[\x00-\x1F\x7F-\x9F\xAD\r\n\t\b\x0B\x1C\x1D\x1E]', '', text)
         return {
             'status': 200,
             'message': '',
diff --git a/data-process/data_manipulation/transform/text/duplicates_transform.py b/data-process/data_manipulation/transform/text/duplicates_transform.py
index 69962ef96..91739e76e 100644
--- a/data-process/data_manipulation/transform/text/duplicates_transform.py
+++ b/data-process/data_manipulation/transform/text/duplicates_transform.py
@@ -25,4 +25,3 @@
 ###
 async def remove_duplicates(opt={}):
     return opt['text']
-    
diff --git a/data-process/data_manipulation/transform/text/filtration_transform.py b/data-process/data_manipulation/transform/text/filtration_transform.py
index ba692eff0..b67f27f16 100644
--- a/data-process/data_manipulation/transform/text/filtration_transform.py
+++ b/data-process/data_manipulation/transform/text/filtration_transform.py
@@ -26,6 +26,7 @@
 
 import re
 
+
 ###
 # 检查文档的词数目
 # @author: wangxinbiao
@@ -38,5 +39,3 @@
 ###
 async def word_count(opt={}):
     return 49
-    
-
diff --git a/data-process/data_manipulation/transform/text/privacy_transform.py b/data-process/data_manipulation/transform/text/privacy_transform.py
index 6b3e6df1d..f5277c285 100644
--- a/data-process/data_manipulation/transform/text/privacy_transform.py
+++ b/data-process/data_manipulation/transform/text/privacy_transform.py
@@ -26,6 +26,7 @@
 
 import re
 
+
 ###
 # 去除邮箱地址
 # @author: wangxinbiao
@@ -38,7 +39,7 @@
 ###
 async def remove_email(opt={}):
     text = opt['text']
-    
+
     try:
         email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
 
@@ -50,4 +51,3 @@ async def remove_email(opt={}):
 
     except Exception as ex:
         return ''
-    
\ No newline at end of file
diff --git a/data-process/data_manipulation/utils/date_time_utils.py b/data-process/data_manipulation/utils/date_time_utils.py
index 3bd3121bb..364b87584 100644
--- a/data-process/data_manipulation/utils/date_time_utils.py
+++ b/data-process/data_manipulation/utils/date_time_utils.py
@@ -19,6 +19,7 @@
 def now_str():
     return f"{datetime.datetime.now():%Y-%m-%d %H:%M:%S.%f}"
 
+
 def now_str_for_day():
     return f"{datetime.datetime.now():%Y-%m-%d}"
 
@@ -37,7 +38,7 @@ def timestamp_to_str_second(timestamp):
 
 def chage_datetime_fromat(opt={}):
     my_date_time = datetime.datetime.strptime(
-                        opt['date_time'],
-                        opt['from_format'])
+        opt['date_time'],
+        opt['from_format'])
 
-    return my_date_time.strftime(opt.get('to_format', '%Y-%m-%d %H:%M:%S'))
\ No newline at end of file
+    return my_date_time.strftime(opt.get('to_format', '%Y-%m-%d %H:%M:%S'))
diff --git a/data-process/data_manipulation/utils/file_utils.py b/data-process/data_manipulation/utils/file_utils.py
index d4c50afce..16bcee21e 100644
--- a/data-process/data_manipulation/utils/file_utils.py
+++ b/data-process/data_manipulation/utils/file_utils.py
@@ -19,6 +19,7 @@
 
 import os
 
+
 ###
 # 生成文件名称
 # @author: wangxinbiao
@@ -48,8 +49,10 @@ async def get_file_name(opt={}):
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def get_temp_file_path():
-    current_directory = os.getcwd() 
+    current_directory = os.getcwd()
 
     csv_file_path = os.path.join(current_directory, 'file_handle/temp_file/')
 
@@ -65,5 +68,7 @@ async def get_temp_file_path():
 # content:
 # 1) 基本功能实现
 ###
+
+
 async def delete_file(file_path):
     os.remove(file_path)
diff --git a/data-process/data_manipulation/utils/json_utils.py b/data-process/data_manipulation/utils/json_utils.py
index ed97a60ce..18c40487d 100644
--- a/data-process/data_manipulation/utils/json_utils.py
+++ b/data-process/data_manipulation/utils/json_utils.py
@@ -13,17 +13,18 @@
 # limitations under the License.
 
 
-import ujson
 from pathlib import Path
 
+import ujson
+
 
 def pretty_print(opt={}):
     data = opt.get('data', {})
 
     print(ujson.dumps(data,
-                     ensure_ascii=False,
-                     escape_forward_slashes=False,
-                     indent=4))
+                      ensure_ascii=False,
+                      escape_forward_slashes=False,
+                      indent=4))
 
 
 def get_str_empty(opt={}):
@@ -38,21 +39,19 @@ def get_str_empty(opt={}):
 
 def write_json_file(opt={}):
     file_name = Path(opt['file_name'])
-    with open(file_name, 'w', encoding = 'utf-8') as outfile:
-         dump(opt['data'], outfile, opt)
+    with open(file_name, 'w', encoding='utf-8') as outfile:
+        dump(opt['data'], outfile, opt)
 
 
 def read_json_file(opt={}):
     file_name = Path(opt['file_name'])
     json_result = None
-    with open(file_name, 'r', encoding = 'utf-8') as f:
+    with open(file_name, 'r', encoding='utf-8') as f:
         json_result = ujson.load(f)
 
     return json_result
 
 
-
-
 def dumps(json_data, opt={}):
     indent = opt.get('indent', 2)
     ensure_ascii = opt.get('ensure_ascii', False)
@@ -73,4 +72,4 @@ def dump(json_data, file, opt={}):
                file,
                indent=indent,
                ensure_ascii=ensure_ascii,
-               escape_forward_slashes=escape_forward_slashes)
\ No newline at end of file
+               escape_forward_slashes=escape_forward_slashes)
diff --git a/data-process/data_manipulation/utils/log_utils.py b/data-process/data_manipulation/utils/log_utils.py
index 4d97b19b2..8648ec625 100644
--- a/data-process/data_manipulation/utils/log_utils.py
+++ b/data-process/data_manipulation/utils/log_utils.py
@@ -12,32 +12,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
-import logging
-from logging.handlers import TimedRotatingFileHandler, RotatingFileHandler
+import os
 import datetime
+import logging
+from logging.handlers import RotatingFileHandler, TimedRotatingFileHandler
+
 
-def init_config (opt={}):
+def init_config(opt={}):
     source_type = opt['source_type']
+    log_dir = opt['log_dir']
+    os.makedirs(log_dir, exist_ok=True)
     ###
     # 配置全局日志配置
     ###
     file_handler = TimedRotatingFileHandler(
-        f'log/{source_type}/{source_type}_{datetime.datetime.now().strftime("%Y-%m-%d")}.log', 
-        when="midnight", 
-        interval=1, 
+        f'{log_dir}/{source_type}_{datetime.datetime.now().strftime("%Y-%m-%d")}.log',
+        when="midnight",
+        interval=1,
         backupCount=30
-    ) # 按天生成日志文件，最多保存30天的日志文件
+    )  # 按天生成日志文件，最多保存30天的日志文件
 
     file_handler.setLevel(logging.DEBUG)
 
     # 将error和critical级别的日志单独存放
     error_file_handler = TimedRotatingFileHandler(
-        f'log/{source_type}/error/{source_type}_{datetime.datetime.now().strftime("%Y-%m-%d")}.err.log', 
-        when="midnight", 
-        interval=1, 
+        f'log/{source_type}_{datetime.datetime.now().strftime("%Y-%m-%d")}.err.log',
+        when="midnight",
+        interval=1,
         backupCount=30
-    ) # 按天生成日志文件，最多保存30天的日志文件
+    )  # 按天生成日志文件，最多保存30天的日志文件
 
     error_file_handler.suffix = "%Y-%m-%d"  # 文件名的时间格式
     error_file_handler.setLevel(logging.ERROR)
@@ -50,4 +53,4 @@ def init_config (opt={}):
             error_file_handler,
             logging.StreamHandler()
         ]
-    )
\ No newline at end of file
+    )
diff --git a/data-process/data_manipulation/utils/minio_utils.py b/data-process/data_manipulation/utils/minio_utils.py
index 8dedcc5f8..2d4350216 100644
--- a/data-process/data_manipulation/utils/minio_utils.py
+++ b/data-process/data_manipulation/utils/minio_utils.py
@@ -16,11 +16,9 @@
 # MinIO
 ###
 
+from common import config
 from minio import Minio
 
-from common import (
-    config
-)
 
 async def create_client():
     return Minio(
diff --git a/data-process/docker/base/Dockerfile.base b/data-process/docker/base/Dockerfile.base
deleted file mode 100644
index ec52b9bef..000000000
--- a/data-process/docker/base/Dockerfile.base
+++ /dev/null
@@ -1,43 +0,0 @@
-FROM ubuntu:20.04
-
-ENV TIME_ZONE Asia/Shanghai
-
-RUN cp /etc/apt/sources.list /etc/apt/sources.list.bak && \
-sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list && \
-sed -i s@/security.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list && \
-cat /etc/apt/sources.list.bak >> /etc/apt/sources.list && \
-apt-get clean && \
-apt-get update && \
-DEBIAN_FRONTEND="noninteractive" apt-get install -y vim build-essential zlib1g-dev libncurses5-dev python3.10 python3-tk libgdbm-dev libgdbm-compat-dev libnss3-dev libssl-dev libreadline-dev libbz2-dev libgdbm-dev liblzma-dev  openssl uuid-dev libffi-dev libsqlite3-dev wget tzdata \
-libatk1.0-0 libatk-bridge2.0-0 libcups2 libdbus-1-3 libxkbcommon0 libatspi2.0-0 libxdamage1 libgbm1 libpango-1.0-0 libcairo2 libasound2 tesseract-ocr libtesseract-dev tesseract-ocr-chi-sim && \
-ln -snf /user/share/zoneinfo/$TIME_ZONE /etc/localtime && echo $TIME_ZONE > /etc/timezone && \
-dpkg-reconfigure -f noninteractive tzdata
-
-RUN cd /opt/local
-wget https://www.python.org/ftp/python/3.10.13/Python-3.10.13.tar.xz
-
-
-RUN cd /opt/local/Python-3.10.13 && \
-./configure --enable-optimizations --enable-loadable-sqlite-extensions && \
-make -j 8 && \
-make altinstall
-
-ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH
-ENV PYTHON_VERSION 3.10
-RUN cd /usr/local/bin && \
-ln -sf python$PYTHON_VERSION python3 && \
-ln -sf pip$PYTHON_VERSION pip3   && \
-echo "/usr/local/lib" > /etc/ld.so.conf.d/local.conf
-
-
-RUN mkdir -p /happy_work_space
-WORKDIR /happy_work_space
-
-VOLUME ["/happy_work_space"]
-
-RUN python3.10 -m pip install --upgrade pip setuptools
-
-ADD requirements.txt /
-RUN python3.10 -m pip install -U -r /requirements.txt
-
-ADD lzma.py /usr/local/lib/python3.10/lzma.py
\ No newline at end of file
diff --git a/data-process/docker/base/build_image.sh b/data-process/docker/base/build_image.sh
deleted file mode 100644
index 917a245f7..000000000
--- a/data-process/docker/base/build_image.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-set e
-
-release_image="python:3.10.13"
-
-
-docker build -f ./Dockerfile.base -t ${release_image} --build-arg GIT_VERSION="$gitVersion" .
\ No newline at end of file
diff --git a/data-process/docker/base/lzma.py b/data-process/docker/base/lzma.py
deleted file mode 100644
index acb07805b..000000000
--- a/data-process/docker/base/lzma.py
+++ /dev/null
@@ -1,352 +0,0 @@
-"""Interface to the liblzma compression library.
-
-This module provides a class for reading and writing compressed files,
-classes for incremental (de)compression, and convenience functions for
-one-shot (de)compression.
-
-These classes and functions support both the XZ and legacy LZMA
-container formats, as well as raw compressed data streams.
-"""
-
-__all__ = [
-    "CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",
-    "CHECK_ID_MAX", "CHECK_UNKNOWN",
-    "FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",
-    "FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",
-    "FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",
-    "MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",
-    "MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",
-
-    "LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",
-    "open", "compress", "decompress", "is_check_supported",
-]
-
-import builtins
-import io
-import os
-try:
-    from _lzma import *
-    from _lzma import _encode_filter_properties, _decode_filter_properties
-except ImportError:
-    from backports.lzma import *
-    from backports.lzma import _encode_filter_properties, _decode_filter_properties
-
-import _compression
-
-
-_MODE_CLOSED   = 0
-_MODE_READ     = 1
-# Value 2 no longer used
-_MODE_WRITE    = 3
-
-
-class LZMAFile(_compression.BaseStream):
-
-    """A file object providing transparent LZMA (de)compression.
-
-    An LZMAFile can act as a wrapper for an existing file object, or
-    refer directly to a named file on disk.
-
-    Note that LZMAFile provides a *binary* file interface - data read
-    is returned as bytes, and data to be written must be given as bytes.
-    """
-
-    def __init__(self, filename=None, mode="r", *,
-                 format=None, check=-1, preset=None, filters=None):
-        """Open an LZMA-compressed file in binary mode.
-
-        filename can be either an actual file name (given as a str,
-        bytes, or PathLike object), in which case the named file is
-        opened, or it can be an existing file object to read from or
-        write to.
-
-        mode can be "r" for reading (default), "w" for (over)writing,
-        "x" for creating exclusively, or "a" for appending. These can
-        equivalently be given as "rb", "wb", "xb" and "ab" respectively.
-
-        format specifies the container format to use for the file.
-        If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the
-        default is FORMAT_XZ.
-
-        check specifies the integrity check to use. This argument can
-        only be used when opening a file for writing. For FORMAT_XZ,
-        the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not
-        support integrity checks - for these formats, check must be
-        omitted, or be CHECK_NONE.
-
-        When opening a file for reading, the *preset* argument is not
-        meaningful, and should be omitted. The *filters* argument should
-        also be omitted, except when format is FORMAT_RAW (in which case
-        it is required).
-
-        When opening a file for writing, the settings used by the
-        compressor can be specified either as a preset compression
-        level (with the *preset* argument), or in detail as a custom
-        filter chain (with the *filters* argument). For FORMAT_XZ and
-        FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
-        level. For FORMAT_RAW, the caller must always specify a filter
-        chain; the raw compressor does not support preset compression
-        levels.
-
-        preset (if provided) should be an integer in the range 0-9,
-        optionally OR-ed with the constant PRESET_EXTREME.
-
-        filters (if provided) should be a sequence of dicts. Each dict
-        should have an entry for "id" indicating ID of the filter, plus
-        additional entries for options to the filter.
-        """
-        self._fp = None
-        self._closefp = False
-        self._mode = _MODE_CLOSED
-
-        if mode in ("r", "rb"):
-            if check != -1:
-                raise ValueError("Cannot specify an integrity check "
-                                 "when opening a file for reading")
-            if preset is not None:
-                raise ValueError("Cannot specify a preset compression "
-                                 "level when opening a file for reading")
-            if format is None:
-                format = FORMAT_AUTO
-            mode_code = _MODE_READ
-        elif mode in ("w", "wb", "a", "ab", "x", "xb"):
-            if format is None:
-                format = FORMAT_XZ
-            mode_code = _MODE_WRITE
-            self._compressor = LZMACompressor(format=format, check=check,
-                                              preset=preset, filters=filters)
-            self._pos = 0
-        else:
-            raise ValueError("Invalid mode: {!r}".format(mode))
-
-        if isinstance(filename, (str, bytes, os.PathLike)):
-            if "b" not in mode:
-                mode += "b"
-            self._fp = builtins.open(filename, mode)
-            self._closefp = True
-            self._mode = mode_code
-        elif hasattr(filename, "read") or hasattr(filename, "write"):
-            self._fp = filename
-            self._mode = mode_code
-        else:
-            raise TypeError("filename must be a str, bytes, file or PathLike object")
-
-        if self._mode == _MODE_READ:
-            raw = _compression.DecompressReader(self._fp, LZMADecompressor,
-                trailing_error=LZMAError, format=format, filters=filters)
-            self._buffer = io.BufferedReader(raw)
-
-    def close(self):
-        """Flush and close the file.
-
-        May be called more than once without error. Once the file is
-        closed, any other operation on it will raise a ValueError.
-        """
-        if self._mode == _MODE_CLOSED:
-            return
-        try:
-            if self._mode == _MODE_READ:
-                self._buffer.close()
-                self._buffer = None
-            elif self._mode == _MODE_WRITE:
-                self._fp.write(self._compressor.flush())
-                self._compressor = None
-        finally:
-            try:
-                if self._closefp:
-                    self._fp.close()
-            finally:
-                self._fp = None
-                self._closefp = False
-                self._mode = _MODE_CLOSED
-
-    @property
-    def closed(self):
-        """True if this file is closed."""
-        return self._mode == _MODE_CLOSED
-
-    def fileno(self):
-        """Return the file descriptor for the underlying file."""
-        self._check_not_closed()
-        return self._fp.fileno()
-
-    def seekable(self):
-        """Return whether the file supports seeking."""
-        return self.readable() and self._buffer.seekable()
-
-    def readable(self):
-        """Return whether the file was opened for reading."""
-        self._check_not_closed()
-        return self._mode == _MODE_READ
-
-    def writable(self):
-        """Return whether the file was opened for writing."""
-        self._check_not_closed()
-        return self._mode == _MODE_WRITE
-
-    def peek(self, size=-1):
-        """Return buffered data without advancing the file position.
-
-        Always returns at least one byte of data, unless at EOF.
-        The exact number of bytes returned is unspecified.
-        """
-        self._check_can_read()
-        # Relies on the undocumented fact that BufferedReader.peek() always
-        # returns at least one byte (except at EOF)
-        return self._buffer.peek(size)
-
-    def read(self, size=-1):
-        """Read up to size uncompressed bytes from the file.
-
-        If size is negative or omitted, read until EOF is reached.
-        Returns b"" if the file is already at EOF.
-        """
-        self._check_can_read()
-        return self._buffer.read(size)
-
-    def read1(self, size=-1):
-        """Read up to size uncompressed bytes, while trying to avoid
-        making multiple reads from the underlying stream. Reads up to a
-        buffer's worth of data if size is negative.
-
-        Returns b"" if the file is at EOF.
-        """
-        self._check_can_read()
-        if size < 0:
-            size = io.DEFAULT_BUFFER_SIZE
-        return self._buffer.read1(size)
-
-    def readline(self, size=-1):
-        """Read a line of uncompressed bytes from the file.
-
-        The terminating newline (if present) is retained. If size is
-        non-negative, no more than size bytes will be read (in which
-        case the line may be incomplete). Returns b'' if already at EOF.
-        """
-        self._check_can_read()
-        return self._buffer.readline(size)
-
-    def write(self, data):
-        """Write a bytes object to the file.
-
-        Returns the number of uncompressed bytes written, which is
-        always len(data). Note that due to buffering, the file on disk
-        may not reflect the data written until close() is called.
-        """
-        self._check_can_write()
-        compressed = self._compressor.compress(data)
-        self._fp.write(compressed)
-        self._pos += len(data)
-        return len(data)
-
-    def seek(self, offset, whence=io.SEEK_SET):
-        """Change the file position.
-
-        The new position is specified by offset, relative to the
-        position indicated by whence. Possible values for whence are:
-
-            0: start of stream (default): offset must not be negative
-            1: current stream position
-            2: end of stream; offset must not be positive
-
-        Returns the new file position.
-
-        Note that seeking is emulated, so depending on the parameters,
-        this operation may be extremely slow.
-        """
-        self._check_can_seek()
-        return self._buffer.seek(offset, whence)
-
-    def tell(self):
-        """Return the current file position."""
-        self._check_not_closed()
-        if self._mode == _MODE_READ:
-            return self._buffer.tell()
-        return self._pos
-
-
-def open(filename, mode="rb", *,
-         format=None, check=-1, preset=None, filters=None,
-         encoding=None, errors=None, newline=None):
-    """Open an LZMA-compressed file in binary or text mode.
-
-    filename can be either an actual file name (given as a str, bytes,
-    or PathLike object), in which case the named file is opened, or it
-    can be an existing file object to read from or write to.
-
-    The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb",
-    "a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text
-    mode.
-
-    The format, check, preset and filters arguments specify the
-    compression settings, as for LZMACompressor, LZMADecompressor and
-    LZMAFile.
-
-    For binary mode, this function is equivalent to the LZMAFile
-    constructor: LZMAFile(filename, mode, ...). In this case, the
-    encoding, errors and newline arguments must not be provided.
-
-    For text mode, an LZMAFile object is created, and wrapped in an
-    io.TextIOWrapper instance with the specified encoding, error
-    handling behavior, and line ending(s).
-
-    """
-    if "t" in mode:
-        if "b" in mode:
-            raise ValueError("Invalid mode: %r" % (mode,))
-    else:
-        if encoding is not None:
-            raise ValueError("Argument 'encoding' not supported in binary mode")
-        if errors is not None:
-            raise ValueError("Argument 'errors' not supported in binary mode")
-        if newline is not None:
-            raise ValueError("Argument 'newline' not supported in binary mode")
-
-    lz_mode = mode.replace("t", "")
-    binary_file = LZMAFile(filename, lz_mode, format=format, check=check,
-                           preset=preset, filters=filters)
-
-    if "t" in mode:
-        return io.TextIOWrapper(binary_file, encoding, errors, newline)
-    else:
-        return binary_file
-
-
-def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):
-    """Compress a block of data.
-
-    Refer to LZMACompressor's docstring for a description of the
-    optional arguments *format*, *check*, *preset* and *filters*.
-
-    For incremental compression, use an LZMACompressor instead.
-    """
-    comp = LZMACompressor(format, check, preset, filters)
-    return comp.compress(data) + comp.flush()
-
-
-def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):
-    """Decompress a block of data.
-
-    Refer to LZMADecompressor's docstring for a description of the
-    optional arguments *format*, *check* and *filters*.
-
-    For incremental decompression, use an LZMADecompressor instead.
-    """
-    results = []
-    while True:
-        decomp = LZMADecompressor(format, memlimit, filters)
-        try:
-            res = decomp.decompress(data)
-        except LZMAError:
-            if results:
-                break  # Leftover data is not a valid LZMA/XZ stream; ignore it.
-            else:
-                raise  # Error on the first iteration; bail out.
-        results.append(res)
-        if not decomp.eof:
-            raise LZMAError("Compressed data ended before the "
-                            "end-of-stream marker was reached")
-        data = decomp.unused_data
-        if not data:
-            break
-    return b"".join(results)
\ No newline at end of file
diff --git a/data-process/docker/base/requirements.txt b/data-process/docker/base/requirements.txt
deleted file mode 100644
index 023663fd2..000000000
--- a/data-process/docker/base/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-backports.lzma==0.0.14
\ No newline at end of file
diff --git a/llms/Dockerfile.server b/llms/Dockerfile.server
index 7f29f6168..1e504629e 100644
--- a/llms/Dockerfile.server
+++ b/llms/Dockerfile.server
@@ -2,8 +2,7 @@ FROM python:3.9-slim
 
 ENV TZ=Asia/Shanghai
 
-RUN sed -i 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
-RUN sed -i 's/security.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list
+RUN sed -i 's/deb.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources
 
 RUN export DEBIAN_FRONTEND=noninteractive \
     && apt-get update \