diff --git a/index.html b/index.html
index 04bf194..1b64e09 100644
--- a/index.html
+++ b/index.html
@@ -741,7 +741,7 @@
Metric |
- Final Sum Score ↓ |
+ Final Avg Score ↓ |
VideoFeedback-test |
EvalCrafter |
GenAI-Bench |
@@ -750,60 +750,60 @@
- MantisScore (reg) | 278.3 | 75.7 | 51.1 | 78.5 | 73.0 |
+ MantisScore (reg) | 69.6 | 75.7 | 51.1 | 78.5 | 73.0 |
- MantisScore-(gen) | 222.4 | 77.1 | 27.6 | 59.0 | 58.7 |
+ MantisScore-(gen) | 55.6 | 77.1 | 27.6 | 59.0 | 58.7 |
- Gemini-1.5-Pro | 158.8 | 22.1 | 22.9 | 60.9 | 52.9 |
+ Gemini-1.5-Pro | 39.7 | 22.1 | 22.9 | 60.9 | 52.9 |
- Gemini-1.5-Flash | 157.5 | 20.8 | 17.3 | 67.1 | 52.3 |
+ Gemini-1.5-Flash | 39.4 | 20.8 | 17.3 | 67.1 | 52.3 |
- GPT-4o | 155.4 | 23.1 | 28.7 | 52.0 | 51.7 |
+ GPT-4o | 38.9 | 23.1 | 28.7 | 52.0 | 51.7 |
- CLIP-sim | 126.8 | 8.9 | 36.2 | 34.2 | 47.4 |
+ CLIP-sim | 31.7 | 8.9 | 36.2 | 34.2 | 47.4 |
- DINO-sim | 121.3 | 7.5 | 32.1 | 38.5 | 43.3 |
+ DINO-sim | 30.3 | 7.5 | 32.1 | 38.5 | 43.3 |
- SSIM-sim | 118.0 | 13.4 | 26.9 | 34.1 | 43.5 |
+ SSIM-sim | 29.5 | 13.4 | 26.9 | 34.1 | 43.5 |
- CLIP-Score | 114.4 | -7.2 | 21.7 | 45.0 | 54.9 |
+ CLIP-Score | 28.6 | -7.2 | 21.7 | 45.0 | 54.9 |
- LLaVA-1.5-7B | 108.3 | 8.5 | 10.5 | 49.9 | 39.4 |
+ LLaVA-1.5-7B | 27.1 | 8.5 | 10.5 | 49.9 | 39.4 |
- LLaVA-1.6-7B | 93.3 | -3.1 | 13.2 | 44.5 | 38.7 |
+ LLaVA-1.6-7B | 23.3 | -3.1 | 13.2 | 44.5 | 38.7 |
- X-CLIP-Score | 92.9 | -1.9 | 13.3 | 41.4 | 40.1 |
+ X-CLIP-Score | 23.2 | -1.9 | 13.3 | 41.4 | 40.1 |
- PIQE | 78.3 | -10.1 | -1.2 | 34.5 | 55.1 |
+ PIQE | 19.6 | -10.1 | -1.2 | 34.5 | 55.1 |
- BRISQUE | 75.9 | -20.3 | 3.9 | 38.5 | 53.7 |
+ BRISQUE | 19.0 | -20.3 | 3.9 | 38.5 | 53.7 |
- Idefics1 | 73.0 | 6.5 | 0.3 | 34.6 | 31.7 |
+ Idefics1 | 18.3 | 6.5 | 0.3 | 34.6 | 31.7 |
- MSE-dyn | 42.5 | -5.5 | -17.0 | 28.4 | 36.5 |
+ MSE-dyn | 10.6 | -5.5 | -17.0 | 28.4 | 36.5 |
- SSIM-dyn | 36.7 | -12.9 | -26.4 | 31.4 | 44.5 |
+ SSIM-dyn | 9.2 | -12.9 | -26.4 | 31.4 | 44.5 |
-
+
The best MantisScore is in bold and the best in baselines is underlined.
- "-" means the answer of MLLM is meaningless or in wrong format.
+