diff --git a/gallery/index.yaml b/gallery/index.yaml
index d66b32366c8..eb486a31275 100644
--- a/gallery/index.yaml
+++ b/gallery/index.yaml
@@ -4636,6 +4636,24 @@
     - filename: calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
       sha256: 989eccacd52b6d9ebf2c06c35c363da19aadb125659a10df299b7130bc293e77
       uri: huggingface://mradermacher/calme-2.1-phi3.5-4b-i1-GGUF/calme-2.1-phi3.5-4b.i1-Q4_K_M.gguf
+- !!merge <<: *phi-3
+  name: "phi-3.5-vision-instruct"
+  urls:
+    - https://huggingface.co/microsoft/Phi-3.5-vision-instruct
+    - https://huggingface.co/abetlen/Phi-3.5-vision-instruct-gguf
+  description: |
+    Phi-3.5-vision is a lightweight, state-of-the-art open multimodal model built upon datasets which include - synthetic data and filtered publicly available websites - with a focus on very high-quality, reasoning dense data both on text and vision. The model belongs to the Phi-3 model family, and the multimodal version comes with 128K context length (in tokens) it can support. The model underwent a rigorous enhancement process, incorporating both supervised fine-tuning and direct preference optimization to ensure precise instruction adherence and robust safety measures.
+  overrides:
+    mmproj: Phi-3.5-3.8B-vision-instruct-mmproj-F16.gguf
+    parameters:
+      model: Phi-3.5-3.8B-vision-instruct-Q8_0.gguf
+  files:
+    - filename: Phi-3.5-3.8B-vision-instruct-Q8_0.gguf
+      sha256: ad0a1ee23ea9d88e932b493a4c077dea95c0f52a0f57a604509504c6ebc3df12
+      uri: huggingface://abetlen/Phi-3.5-vision-instruct-gguf/Phi-3.5-3.8B-vision-instruct-Q8_0.gguf
+    - filename: Phi-3.5-3.8B-vision-instruct-mmproj-F16.gguf
+      sha256: ab8449cc7527c21d7082a6ca8266f67a71b459019f67e814ae1683700e61f3f9
+      uri: huggingface://abetlen/Phi-3.5-vision-instruct-gguf/Phi-3.5-3.8B-vision-instruct-mmproj-F16.gguf
 - &hermes-2-pro-mistral
   ### START Hermes
   url: "github:mudler/LocalAI/gallery/hermes-2-pro-mistral.yaml@master"