diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py
index 81ace9e9c1d19..104e59bc193a4 100644
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@@ -39,6 +39,7 @@
 from unittest.mock import MagicMock
 
 import numpy as np
+import numpy.typing as npt
 import pandas as pd
 import psutil
 import yaml
@@ -1565,14 +1566,14 @@ def format_pt_inputs(self, pt_inputs: Any) -> Sequence[torch.Tensor]:
     def format_pt_outputs(self, pt_outputs: Any) -> Sequence[torch.Tensor]:
         ...
 
-    def adapt_pt_inputs_to_onnx(self, pt_inputs) -> Mapping[str, np.ndarray]:
+    def adapt_pt_inputs_to_onnx(self, pt_inputs) -> Mapping[str, npt.NDArray]:
         pt_inputs = self.format_pt_inputs(pt_inputs)
         return {
             ort_input.name: pt_input.cpu().numpy()
             for ort_input, pt_input in zip(self.onnx_session.get_inputs(), pt_inputs)
         }
 
-    def adapt_onnx_outputs_to_pt(self, onnx_outputs: List[np.ndarray]) -> Any:
+    def adapt_onnx_outputs_to_pt(self, onnx_outputs: List[npt.NDArray]) -> Any:
         pt_outputs = [
             torch.from_numpy(onnx_output).to(current_device)
             for onnx_output in onnx_outputs
diff --git a/torch/ao/quantization/experimental/linear.py b/torch/ao/quantization/experimental/linear.py
index 34b0ca8e3921d..0093550472e0c 100644
--- a/torch/ao/quantization/experimental/linear.py
+++ b/torch/ao/quantization/experimental/linear.py
@@ -1,5 +1,6 @@
 # mypy: allow-untyped-defs
 import numpy as np
+import numpy.typing as npt
 
 import torch
 from torch.ao.nn.quantized.modules.utils import WeightedQuantizedModule
@@ -148,7 +149,7 @@ def forward(self, activation: torch.Tensor) -> torch.FloatTensor:
         weight_rows = self.weight_transposed.size()[0]
         weight_cols = self.weight_transposed.size()[1]
 
-        decomposed_weight: np.ndarray = np.empty(
+        decomposed_weight: npt.NDArray = np.empty(
             shape=(weight_rows, weight_cols), dtype=object
         )
         for row in range(weight_rows):
diff --git a/torch/onnx/_internal/exporter/_core.py b/torch/onnx/_internal/exporter/_core.py
index 7d49a654a9c00..09fae0ad2b88e 100644
--- a/torch/onnx/_internal/exporter/_core.py
+++ b/torch/onnx/_internal/exporter/_core.py
@@ -42,7 +42,7 @@
 if typing.TYPE_CHECKING:
     import os
 
-    import numpy as np
+    import numpy.typing as npt
 
 
 # Define utilities to convert PyTorch data types so users do not need to specify manually
@@ -100,7 +100,7 @@ def __init__(self, tensor: torch.Tensor, name: str | None = None):
             tensor, dtype=_torch_dtype_to_onnx_dtype(tensor.dtype), name=name
         )
 
-    def numpy(self) -> np.ndarray:
+    def numpy(self) -> npt.NDArray:
         self.raw: torch.Tensor
         if self.dtype == ir.DataType.BFLOAT16:
             return self.raw.view(torch.uint16).numpy(force=True)
@@ -114,7 +114,7 @@ def numpy(self) -> np.ndarray:
             return self.raw.view(torch.uint8).numpy(force=True)
         return self.raw.numpy(force=True)
 
-    def __array__(self, dtype: Any = None, copy: bool | None = None) -> np.ndarray:
+    def __array__(self, dtype: Any = None, copy: bool | None = None) -> npt.NDArray:
         del copy  # Unused, but needed for the signature
         if dtype is None:
             return self.numpy()
diff --git a/torch/onnx/verification.py b/torch/onnx/verification.py
index a21f1ffbba778..f489252f5a7b2 100644
--- a/torch/onnx/verification.py
+++ b/torch/onnx/verification.py
@@ -21,6 +21,7 @@
 from typing import Any, Callable, Collection, Mapping, Sequence, Tuple, Union
 
 import numpy as np
+import numpy.typing as npt
 
 import torch
 import torch._C._onnx as _C_onnx
@@ -98,7 +99,7 @@ def _flatten_tuples(elem):
 
 
 # TODO(justinchuby): Add type checking by narrowing down the return type when input is None
-def _to_numpy(elem) -> list | np.ndarray:
+def _to_numpy(elem) -> list | npt.NDArray:
     if isinstance(elem, torch.Tensor):
         if elem.requires_grad:
             return elem.detach().cpu().numpy()
diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py
index 34ab66cd8077a..5f23ec4475544 100644
--- a/torch/testing/_internal/common_methods_invocations.py
+++ b/torch/testing/_internal/common_methods_invocations.py
@@ -13,6 +13,7 @@
 
 import torch
 import numpy as np
+import numpy.typing as npt
 from torch import inf, nan
 
 from typing import Any, Dict, List, Tuple, Union, Sequence
@@ -11342,7 +11343,7 @@ def _tanh_gelu_ref(X):
         return _gelu_ref(X)
 
 
-def reference_one_hot(a: np.ndarray, num_classes: int = -1) -> np.ndarray:
+def reference_one_hot(a: npt.NDArray, num_classes: int = -1) -> npt.NDArray:
     if num_classes == -1:
         num_classes = int(np.amax(a) + 1)
 
@@ -11362,11 +11363,11 @@ def reference_mse_loss(input, target, reduction="mean"):
         return se
 
 
-def reference_layer_norm(inp: np.ndarray, normalized_shape: Tuple[int], weight=None, bias=None, eps=1e-5):
+def reference_layer_norm(inp: npt.NDArray, normalized_shape: Tuple[int], weight=None, bias=None, eps=1e-5):
     return reference_native_layer_norm(inp, normalized_shape, weight, bias, eps)[0]
 
 
-def reference_native_layer_norm(inp: np.ndarray, normalized_shape: Tuple[int], weight, bias, eps):
+def reference_native_layer_norm(inp: npt.NDArray, normalized_shape: Tuple[int], weight, bias, eps):
     feature_size = np.prod(normalized_shape)
     inp_view = inp.reshape(-1, feature_size)  # type: ignore[call-overload]
     mean = inp_view.mean(axis=-1, keepdims=True)
@@ -11383,7 +11384,7 @@ def reference_native_layer_norm(inp: np.ndarray, normalized_shape: Tuple[int], w
     return Y.reshape(*inp.shape), mean.reshape(stat_shape), (1.0 / np.sqrt(var + eps)).reshape(stat_shape)
 
 
-def reference_rms_norm(inp: np.ndarray, normalized_shape: Tuple[int], weight=None, eps=None):
+def reference_rms_norm(inp: npt.NDArray, normalized_shape: Tuple[int], weight=None, eps=None):
     if eps is None:
         eps = torch.finfo(numpy_to_torch_dtype(inp.dtype)).eps
     feature_size = np.prod(normalized_shape)
@@ -11395,7 +11396,7 @@ def reference_rms_norm(inp: np.ndarray, normalized_shape: Tuple[int], weight=Non
     return Y.reshape(*inp.shape)
 
 
-def reference_group_norm(inp: np.ndarray, num_groups: int, weight=None, bias=None, eps=1e-5):
+def reference_group_norm(inp: npt.NDArray, num_groups: int, weight=None, bias=None, eps=1e-5):
     inp_view = inp
     if np.prod(inp.shape) != 0:
         inp_view = inp.reshape((inp.shape[0], num_groups, -1))
@@ -11481,7 +11482,7 @@ def reference_std_var(f):
     g = reference_reduction_numpy(f)
 
     @wraps(g)
-    def wrapper(x: np.ndarray, *args, **kwargs):
+    def wrapper(x: npt.NDArray, *args, **kwargs):
         assert not ('unbiased' in kwargs and 'correction' in kwargs)
 
         if 'unbiased' in kwargs:
diff --git a/torch/testing/_internal/opinfo/utils.py b/torch/testing/_internal/opinfo/utils.py
index 41973dc2c0518..05468e10da2c9 100644
--- a/torch/testing/_internal/opinfo/utils.py
+++ b/torch/testing/_internal/opinfo/utils.py
@@ -6,6 +6,7 @@
 from typing import Sequence
 
 import numpy as np
+import numpy.typing as npt
 
 import torch
 from torch.testing._internal.common_cuda import TEST_CUDA
@@ -206,7 +207,7 @@ def reference_reduction_numpy(f, supports_keepdims=True):
     """
 
     @wraps(f)
-    def wrapper(x: np.ndarray, *args, **kwargs):
+    def wrapper(x: npt.NDArray, *args, **kwargs):
         # Copy keys into a set
         keys = set(kwargs.keys())
 
diff --git a/torch/utils/tensorboard/_utils.py b/torch/utils/tensorboard/_utils.py
index 30984cfadf17f..8acaf1696cb1f 100644
--- a/torch/utils/tensorboard/_utils.py
+++ b/torch/utils/tensorboard/_utils.py
@@ -1,5 +1,6 @@
 # mypy: allow-untyped-defs
 import numpy as np
+import numpy.typing as npt
 
 
 # Functions for converting
@@ -21,7 +22,7 @@ def figure_to_image(figures, close=True):
     def render_to_rgb(figure):
         canvas = plt_backend_agg.FigureCanvasAgg(figure)
         canvas.draw()
-        data: np.ndarray = np.frombuffer(canvas.buffer_rgba(), dtype=np.uint8)
+        data: npt.NDArray = np.frombuffer(canvas.buffer_rgba(), dtype=np.uint8)
         w, h = figure.canvas.get_width_height()
         image_hwc = data.reshape([h, w, 4])[:, :, 0:3]
         image_chw = np.moveaxis(image_hwc, source=2, destination=0)