From f09a2ff418c502d17a4591b613861d045687c326 Mon Sep 17 00:00:00 2001
From: Yang Yong <yongyang1030@163.com>
Date: Thu, 18 Jan 2024 22:23:38 +0800
Subject: [PATCH] Add LightllmApi KeyError log & Update doc (#816)

* Add LightllmApi KeyError log

* Update LightllmApi doc
---
 docs/en/advanced_guides/evaluation_lightllm.md    | 1 +
 docs/zh_cn/advanced_guides/evaluation_lightllm.md | 1 +
 opencompass/models/lightllm_api.py                | 6 ++++--
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/docs/en/advanced_guides/evaluation_lightllm.md b/docs/en/advanced_guides/evaluation_lightllm.md
index 8d3116041..8584bf75f 100644
--- a/docs/en/advanced_guides/evaluation_lightllm.md
+++ b/docs/en/advanced_guides/evaluation_lightllm.md
@@ -27,6 +27,7 @@ python -m lightllm.server.api_server --model_dir /path/llama2-7B     \
 ```
 
 \*\*Note: \*\* tp can be configured to enable TensorParallel inference on several gpus, suitable for the inference of very large models.
+\*\*Note: \*\* The max_total_token_num in the above command will affect the throughput performance during testing. It can be configured according to the documentation on the [Lightllm homepage](https://github.com/ModelTC/lightllm). As long as it does not run out of memory, it is often better to set it as high as possible.
 
 You can use the following Python script to quickly test whether the current service has been successfully started.
 
diff --git a/docs/zh_cn/advanced_guides/evaluation_lightllm.md b/docs/zh_cn/advanced_guides/evaluation_lightllm.md
index b2efb121b..b5a2489eb 100644
--- a/docs/zh_cn/advanced_guides/evaluation_lightllm.md
+++ b/docs/zh_cn/advanced_guides/evaluation_lightllm.md
@@ -27,6 +27,7 @@ python -m lightllm.server.api_server --model_dir /path/llama2-7B     \
 ```
 
 **注：** 上述命令可以通过 tp 的数量设置，在 tp 张卡上进行 TensorParallel 推理，适用于较大的模型的推理。
+**注：** 上述命令中的 max_total_token_num，会影响测试过程中的吞吐性能，可以根据 [Lightllm 主页](https://github.com/ModelTC/lightllm) 上的文档，进行设置。只要不爆显存，往往设置越大越好。
 
 可以使用下面的 Python 脚本简单测试一下当前服务是否已经起成功
 
diff --git a/opencompass/models/lightllm_api.py b/opencompass/models/lightllm_api.py
index eb752ae95..b0686e646 100644
--- a/opencompass/models/lightllm_api.py
+++ b/opencompass/models/lightllm_api.py
@@ -78,6 +78,8 @@ def _generate(self, input: str, max_out_len: int) -> str:
             except requests.JSONDecodeError:
                 self.logger.error('JsonDecode error, got',
                                   str(raw_response.content))
+            except KeyError:
+                self.logger.error(f'KeyError. Response: {str(response)}')
             max_num_retries += 1
 
         raise RuntimeError('Calling LightllmAPI failed after retrying for '
@@ -123,11 +125,11 @@ def _get_ppl(self, input: str, max_out_len: int) -> float:
                 response = raw_response.json()
 
                 assert ('prompt_token_ids' in response and 'prompt_logprobs'
-                        in response), 'prompt_token_ids and prompt_logprobs \
+                        in response), f'prompt_token_ids and prompt_logprobs \
                     must be in the output. \
                     Please consider adding \
                     --return_all_prompt_logprobs argument \
-                    when starting your lightllm service.'
+                    when starting lightllm service. Response: {str(response)}'
 
                 prompt_token_ids = response['prompt_token_ids'][1:]
                 prompt_logprobs = [