Merge pull request #536 from ymcui/gradio_notebook

Add Gradio notebook
ymcui · Jun 8, 2023 · 2769e14 · 2769e14
2 parents 5e7a50e + 2beb733
commit 2769e14
Show file tree

Hide file tree

Showing 5 changed files with 171 additions and 19 deletions.
diff --git a/README.md b/README.md
@@ -201,12 +201,12 @@ chinese_llama_lora_7b/
 | [**LlamaChat**](https://github.com/alexrozanski/LlamaChat)   | macOS下的图形交互界面（需搭配llama.cpp模型） | MacOS |  ✅   |  ❌   |    ✅     |    ✅     | [链接](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/使用LlamaChat图形界面（macOS）) |
 | [**LangChain**](https://github.com/hwchase17/langchain)      | LLM应用开发框架，适用于进行二次开发          | 通用  | ✅<sup>†</sup> |  ✅   | ✅<sup>†</sup> |    ❌     | [链接](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/与LangChain进行集成) |
 | [**privateGPT**](https://github.com/imartinez/privateGPT) | 基于LangChain的多文档本地问答框架 | 通用 | ✅ | ✅ | ✅ | ❌ | [链接](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/使用privateGPT进行多文档问答) |
+| [**Colab Gradio Demo**](https://github.com/ymcui/Chinese-LLaMA-Alpaca/blob/main/notebooks/gradio_web_demo.ipynb) | 在Colab中启动基于Gradio的交互式Web服务，体验模型效果 | 通用 | ✅ | ✅ | ✅ | ❌ | [链接](https://colab.research.google.com/github/ymcui/Chinese-LLaMA-Alpaca/blob/main/notebooks/gradio_web_demo.ipynb) |
 
 <sup>†</sup>: LangChain框架支持，但教程中未实现；详细说明请参考LangChain官方文档。
 
 具体内容请参考本项目 >>> [📚 GitHub Wiki](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/模型推理与部署)
 
-
 ## 系统效果
 
 ### 生成效果评测

diff --git a/README_EN.md b/README_EN.md
@@ -205,6 +205,7 @@ We mainly provide the following three ways for inference and local deployment.
 | [**LlamaChat**](https://github.com/alexrozanski/LlamaChat)   | a macOS app that allows you to chat with LLaMA, Alpaca, etc. |  MacOS   |  ✅   |  ❌   |      ✅       |  ✅   | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Using-LlamaChat-Interface) |
 | [**LangChain**](https://github.com/hwchase17/langchain)      | LLM application development framework, suitable for secondary development | General | ✅<sup>†</sup> |  ✅   | ✅<sup>†</sup> |    ❌     | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Integrated-with-LangChain) |
 | [**privateGPT**](https://github.com/imartinez/privateGPT) | LangChain-based multi-document QA framework | General | ✅ | ✅ | ✅ | ❌ | [link](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/Use-privateGPT-for-multi-document-QA) |
+| [**Colab Gradio Demo**](https://github.com/ymcui/Chinese-LLaMA-Alpaca/blob/main/notebooks/gradio_web_demo.ipynb) | Running a Gradio web demo in Colab | General | ✅ | ✅ | ✅ | ❌ | [link](https://colab.research.google.com/github/ymcui/Chinese-LLaMA-Alpaca/blob/main/notebooks/gradio_web_demo.ipynb) |
 
 <sup>†</sup>: Supported by LangChain, but not implemented in the tutorial. Please refer to the official LangChain Documentation for details.
 

diff --git a/notebooks/README.md b/notebooks/README.md
@@ -30,4 +30,12 @@ Colab上的指令精调脚本运行示例（仅供流程参考）。
 
 Example of running instruction fine-tuning script on Colab.
 
-建议查看最新Wiki教程 / Check latest wiki：[中文教程](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/指令精调脚本) / [English Tutorial](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/SFT-Script)
+建议查看最新Wiki教程 / Check latest wiki：[中文教程](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/指令精调脚本) / [English Tutorial](https://github.com/ymcui/Chinese-LLaMA-Alpaca/wiki/SFT-Script)
+
+### gradio_web_demo.ipynb
+
+Colab上的Gradio演示示例。
+
+Example of running the Gradio demo on Colab.
+
+在Colab中打开 / Open the notebook in Colab: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ymcui/Chinese-LLaMA-Alpaca/blob/main/notebooks/gradio_web_demo.ipynb) 
diff --git a/notebooks/gradio_web_demo.ipynb b/notebooks/gradio_web_demo.ipynb
diff --git a/scripts/inference/gradio_demo.py b/scripts/inference/gradio_demo.py
@@ -2,15 +2,18 @@
 import gradio as gr
 import argparse
 import os
-import mdtex2html
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--base_model', default=None, type=str, required=True)
 parser.add_argument('--lora_model', default=None, type=str,help="If None, perform inference on the base model")
 parser.add_argument('--tokenizer_path',default=None,type=str)
 parser.add_argument('--gpus', default="0", type=str)
+parser.add_argument('--share', default=True, help='share gradio domain name')
+parser.add_argument('--load_in_8bit',action='store_true', help='use 8 bit model')
 parser.add_argument('--only_cpu',action='store_true',help='only use CPU for inference')
 args = parser.parse_args()
+share = args.share
+load_in_8bit = args.load_in_8bit
 if args.only_cpu is True:
     args.gpus = ""
 os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
@@ -19,19 +22,6 @@
 from transformers import LlamaForCausalLM, LlamaTokenizer, GenerationConfig
 from peft import PeftModel
 
-def postprocess(self, y):
-    if y is None:
-        return []
-    for i, (message, response) in enumerate(y):
-        y[i] = (
-            None if message is None else mdtex2html.convert((message)),
-            None if response is None else mdtex2html.convert(response),
-        )
-    return y
-
-
-gr.Chatbot.postprocess = postprocess
-
 generation_config = dict(
     temperature=0.2,
     top_k=40,
@@ -54,7 +44,7 @@ def postprocess(self, y):
 
 base_model = LlamaForCausalLM.from_pretrained(
     args.base_model, 
-    load_in_8bit=False,
+    load_in_8bit=load_in_8bit,
     torch_dtype=load_type,
     low_cpu_mem_usage=True,
     device_map='auto',
@@ -147,7 +137,7 @@ def predict(
 with gr.Blocks() as demo:
     gr.HTML("""<h1 align="center">Chinese LLaMA & Alpaca LLM</h1>""")
     current_file_path = os.path.abspath(os.path.dirname(__file__))
-    gr.Image(f'{current_file_path}/../pics/banner.png', label = 'Chinese LLaMA & Alpaca LLM')
+    gr.Image(f'{current_file_path}/../../pics/banner.png', label = 'Chinese LLaMA & Alpaca LLM')
     gr.Markdown("> 为了促进大模型在中文NLP社区的开放研究，本项目开源了中文LLaMA模型和指令精调的Alpaca大模型。这些模型在原版LLaMA的基础上扩充了中文词表并使用了中文数据进行二次预训练，进一步提升了中文基础语义理解能力。同时，中文Alpaca模型进一步使用了中文指令数据进行精调，显著提升了模型对指令的理解和执行能力")
     chatbot = gr.Chatbot()
     with gr.Row():
@@ -173,4 +163,4 @@ def predict(
     submitBtn.click(reset_user_input, [], [user_input])
 
     emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
-demo.queue().launch(share=False, inbrowser=True, server_name = '0.0.0.0', server_port=19324)
+demo.queue().launch(share=share, inbrowser=True, server_name = '0.0.0.0', server_port=19324)