Skip to content

Commit

Permalink
feat: OpenVINO acceleration for embeddings in transformer backend (#2190
Browse files Browse the repository at this point in the history
)

OpenVINO acceleration for embeddings

New argument type: OVModelForFeatureExtraction
  • Loading branch information
fakezeta authored Apr 30, 2024
1 parent 3754f15 commit e38610e
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions backend/python/transformers/transformers_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,21 @@ def LoadModel(self, request, context):
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
device=device_map)
self.OV = True
elif request.Type == "OVModelForFeatureExtraction":
from optimum.intel.openvino import OVModelForFeatureExtraction
from openvino.runtime import Core

if "GPU" in Core().available_devices:
device_map="GPU"
else:
device_map="CPU"
self.model = OVModelForFeatureExtraction.from_pretrained(model_name,
compile=True,
trust_remote_code=request.TrustRemoteCode,
ov_config={"PERFORMANCE_HINT": "CUMULATIVE_THROUGHPUT"},
export=True,
device=device_map)
self.OV = True
else:
self.model = AutoModel.from_pretrained(model_name,
trust_remote_code=request.TrustRemoteCode,
Expand Down

0 comments on commit e38610e

Please sign in to comment.