Skip to content

Commit

Permalink
remove attention_mask creation as ORTModelForxxx's corresponding proc…
Browse files Browse the repository at this point in the history
…essors will create it
  • Loading branch information
IlyasMoutawwakil committed Jun 6, 2024
1 parent d584a88 commit 305b41a
Showing 1 changed file with 10 additions and 47 deletions.
57 changes: 10 additions & 47 deletions optimum/onnxruntime/modeling_ort.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ def __init__(
**kwargs,
)

# why are these not lists ?
self.input_names = {input_key.name: idx for idx, input_key in enumerate(model.get_inputs())}
self.input_dtypes = {input_key.name: input_key.type for input_key in model.get_inputs()}

Expand Down Expand Up @@ -740,7 +739,7 @@ def _output_shape_inference(self, axis_name: Union[str, int], dimensions: Dict[s
# exception.
return int(eval(" ".join(tokens)))

# this method is bloated with state arguments (that are accesible using self) why ?
# TODO: this method is bloated with state arguments (that are accesible using self) why ?
def _prepare_io_binding(
self,
model: ort.InferenceSession,
Expand Down Expand Up @@ -1013,15 +1012,12 @@ def forward(
use_torch = isinstance(input_ids, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)

if attention_mask is None:
if use_torch:
attention_mask = torch.ones_like(input_ids)
else:
attention_mask = np.ones_like(input_ids)

if self.device.type == "cuda" and self.use_io_binding:
io_binding, output_shapes, output_buffers = self.prepare_io_binding(
input_ids, attention_mask, token_type_ids, ordered_input_names=self._ordered_input_names
input_ids,
attention_mask,
token_type_ids,
ordered_input_names=self._ordered_input_names,
)

# run inference with binding & synchronize in case of multiple CUDA streams
Expand All @@ -1037,7 +1033,7 @@ def forward(
onnx_outputs = self.model.run(None, onnx_inputs)
model_outputs = self._prepare_onnx_outputs(use_torch, *onnx_outputs)

# why do we only return last_hidden_state? why not all outputs?
# TODO: why do we only return last_hidden_state? why not all outputs?
# that way, there will be less need for ORTModelForCustomTask in cases where
# we just want to extend model outputs with attentions, hidden_states, etc.
last_hidden_state = model_outputs["last_hidden_state"]
Expand Down Expand Up @@ -1161,15 +1157,12 @@ def forward(
use_torch = isinstance(input_ids, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)

if attention_mask is None:
if use_torch:
attention_mask = torch.ones_like(input_ids)
else:
attention_mask = np.ones_like(input_ids)

if self.device.type == "cuda" and self.use_io_binding:
io_binding, output_shapes, output_buffers = self.prepare_io_binding(
input_ids, attention_mask, token_type_ids, ordered_input_names=self._ordered_input_names
input_ids,
attention_mask,
token_type_ids,
ordered_input_names=self._ordered_input_names,
)

# run inference with binding & synchronize in case of multiple CUDA streams
Expand Down Expand Up @@ -1253,12 +1246,6 @@ def forward(
use_torch = isinstance(input_ids, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)

if attention_mask is None:
if use_torch:
attention_mask = torch.ones_like(input_ids)
else:
attention_mask = np.ones_like(input_ids)

if self.device.type == "cuda" and self.use_io_binding:
io_binding, output_shapes, output_buffers = self.prepare_io_binding(
input_ids,
Expand Down Expand Up @@ -1366,12 +1353,6 @@ def forward(
use_torch = isinstance(input_ids, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)

if attention_mask is None:
if use_torch:
attention_mask = torch.ones_like(input_ids)
else:
attention_mask = np.ones_like(input_ids)

if self.device.type == "cuda" and self.use_io_binding:
io_binding, output_shapes, output_buffers = self.prepare_io_binding(
input_ids,
Expand Down Expand Up @@ -1462,12 +1443,6 @@ def forward(
use_torch = isinstance(input_ids, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)

if attention_mask is None:
if use_torch:
attention_mask = torch.ones_like(input_ids)
else:
attention_mask = np.ones_like(input_ids)

if self.device.type == "cuda" and self.use_io_binding:
io_binding, output_shapes, output_buffers = self.prepare_io_binding(
input_ids,
Expand Down Expand Up @@ -1551,12 +1526,6 @@ def forward(
use_torch = isinstance(input_ids, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)

if attention_mask is None:
if use_torch:
attention_mask = torch.ones_like(input_ids)
else:
attention_mask = np.ones_like(input_ids)

if self.device.type == "cuda" and self.use_io_binding:
io_binding, output_shapes, output_buffers = self.prepare_io_binding(
input_ids,
Expand Down Expand Up @@ -1862,12 +1831,6 @@ def forward(
use_torch = isinstance(model_input, torch.Tensor)
self.raise_on_numpy_input_io_binding(use_torch)

if attention_mask is None:
if use_torch:
attention_mask = torch.ones_like(model_input)
else:
attention_mask = np.ones_like(model_input)

if self.device.type == "cuda" and self.use_io_binding:
io_binding, output_shapes, output_buffers = self.prepare_io_binding(
model_input,
Expand Down

0 comments on commit 305b41a

Please sign in to comment.