From 03bff531e09e19a91c8d7a713bc7d6d96a35b85e Mon Sep 17 00:00:00 2001 From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:16:09 +0000 Subject: [PATCH] improve doc --- optimum/neuron/modeling_seq2seq.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/neuron/modeling_seq2seq.py b/optimum/neuron/modeling_seq2seq.py index 690a9a126..e936e8f5f 100644 --- a/optimum/neuron/modeling_seq2seq.py +++ b/optimum/neuron/modeling_seq2seq.py @@ -424,12 +424,12 @@ def _combine_encoder_decoder_config(self, encoder_config: "PretrainedConfig", de results = [tokenizer.decode(t, skip_special_tokens=True) for t in output] ``` Example of text-to-text generation with tensor parallelism: - (For large models, in order to fit into Neuron cores, we need to applly tensor parallelism. Hers below is an example ran on `inf2.24xlarge`.) + *(For large models, in order to fit into Neuron cores, we need to applly tensor parallelism. Hers below is an example ran on `inf2.24xlarge`.)* ```python from transformers import {processor_class} from optimum.neuron import {model_class} # 1. compile - if __name__ == "__main__": # `if __name__ == "__main__"` is compulsory for parallel tracing since the API will spawn multiple processes + if __name__ == "__main__": # compulsory for parallel tracing since the API will spawn multiple processes neuron_model = {model_class}.from_pretrained( {checkpoint_tp}, export=True, tensor_parallel_size=8, dynamic_batch_size=False, batch_size=1, sequence_length=128, num_beams=4, )