From 1c58aacbc8a0d40408368ce7551aa1601300eea7 Mon Sep 17 00:00:00 2001 From: Wanchao Liang Date: Tue, 23 Jul 2024 12:34:17 -0700 Subject: [PATCH] [dtensor] move ops to private (#131211) as titled Differential Revision: [D60132519](https://our.internmc.facebook.com/intern/diff/D60132519) Pull Request resolved: https://github.com/pytorch/pytorch/pull/131211 Approved by: https://github.com/XilunWu, https://github.com/wz337 ghstack dependencies: #131212 --- test/distributed/_tensor/test_common_rules.py | 2 +- test/distributed/_tensor/test_embedding_ops.py | 2 +- test/distributed/_tensor/test_op_strategy.py | 8 ++++---- test/distributed/_tensor/test_tensor_ops.py | 2 +- test/distributed/_tensor/test_view_ops.py | 2 +- torch/distributed/_spmd/batch_dim_utils.py | 2 +- torch/distributed/_spmd/experimental_ops.py | 2 +- torch/distributed/_tensor/ops/__init__.py | 18 +++++++++--------- .../ops/{common_rules.py => _common_rules.py} | 0 .../_tensor/ops/{conv_ops.py => _conv_ops.py} | 0 .../{basic_strategy.py => _einsum_strategy.py} | 0 .../{embedding_ops.py => _embedding_ops.py} | 0 ...xperimental_ops.py => _experimental_ops.py} | 0 .../_tensor/ops/{math_ops.py => _math_ops.py} | 0 .../ops/{matrix_ops.py => _matrix_ops.py} | 2 +- .../{pointwise_ops.py => _pointwise_ops.py} | 0 .../ops/{random_ops.py => _random_ops.py} | 0 .../ops/{tensor_ops.py => _tensor_ops.py} | 4 ++-- .../_tensor/ops/{view_ops.py => _view_ops.py} | 0 torch/distributed/tensor/parallel/loss.py | 4 ++-- 20 files changed, 24 insertions(+), 24 deletions(-) rename torch/distributed/_tensor/ops/{common_rules.py => _common_rules.py} (100%) rename torch/distributed/_tensor/ops/{conv_ops.py => _conv_ops.py} (100%) rename torch/distributed/_tensor/ops/{basic_strategy.py => _einsum_strategy.py} (100%) rename torch/distributed/_tensor/ops/{embedding_ops.py => _embedding_ops.py} (100%) rename torch/distributed/_tensor/ops/{experimental_ops.py => _experimental_ops.py} (100%) rename torch/distributed/_tensor/ops/{math_ops.py => _math_ops.py} (100%) rename torch/distributed/_tensor/ops/{matrix_ops.py => _matrix_ops.py} (99%) rename torch/distributed/_tensor/ops/{pointwise_ops.py => _pointwise_ops.py} (100%) rename torch/distributed/_tensor/ops/{random_ops.py => _random_ops.py} (100%) rename torch/distributed/_tensor/ops/{tensor_ops.py => _tensor_ops.py} (99%) rename torch/distributed/_tensor/ops/{view_ops.py => _view_ops.py} (100%) diff --git a/test/distributed/_tensor/test_common_rules.py b/test/distributed/_tensor/test_common_rules.py index 895fb4186020b..77b5d91405a73 100644 --- a/test/distributed/_tensor/test_common_rules.py +++ b/test/distributed/_tensor/test_common_rules.py @@ -4,7 +4,7 @@ import torch from torch.distributed._tensor import DeviceMesh from torch.distributed._tensor._op_schema import OpSchema -from torch.distributed._tensor.ops.common_rules import einop_rule, pointwise_rule +from torch.distributed._tensor.ops._common_rules import einop_rule, pointwise_rule from torch.distributed._tensor.placement_types import DTensorSpec, TensorMeta from torch.testing._internal.common_utils import run_tests from torch.testing._internal.distributed._tensor.common_dtensor import ( diff --git a/test/distributed/_tensor/test_embedding_ops.py b/test/distributed/_tensor/test_embedding_ops.py index 4eb78136aabf5..7822962864cdf 100644 --- a/test/distributed/_tensor/test_embedding_ops.py +++ b/test/distributed/_tensor/test_embedding_ops.py @@ -167,7 +167,7 @@ def test_sharded_embedding_rowwise(self): self._run_embedding_op_test(mesh, 0, [6, 7, 6], 13, 22) self._run_embedding_op_test(mesh, 0, [34], 15, 14, padding_idx=10) - from torch.distributed._tensor.ops.embedding_ops import _MaskPartial + from torch.distributed._tensor.ops._embedding_ops import _MaskPartial # test collectives embedding_mod = torch.nn.Embedding(10, 20, device=self.device_type) diff --git a/test/distributed/_tensor/test_op_strategy.py b/test/distributed/_tensor/test_op_strategy.py index d6513f5c750fd..302e2675cc899 100644 --- a/test/distributed/_tensor/test_op_strategy.py +++ b/test/distributed/_tensor/test_op_strategy.py @@ -6,7 +6,7 @@ from torch.distributed._tensor import DeviceMesh, DTensor from torch.distributed._tensor._collective_utils import redistribute_cost from torch.distributed._tensor._op_schema import OpSchema, OpStrategy, PlacementStrategy -from torch.distributed._tensor.ops.basic_strategy import ( +from torch.distributed._tensor.ops._einsum_strategy import ( EinsumDims, gen_einsum_strategies, ) @@ -169,7 +169,7 @@ def test_redistribute_cost_mesh_1d(self): def test_redistribute_cost_latency(self): # test cost model on addmm op - from torch.distributed._tensor.ops.matrix_ops import addmm_strategy + from torch.distributed._tensor.ops._matrix_ops import addmm_strategy mesh = self.build_device_mesh() shard0_placement = (Shard(0),) @@ -246,7 +246,7 @@ def test_redistribute_cost_mesh_2d(self): self.assertTrue(allreduce_cost > reduce_scatter_cost) def test_mm_strategies(self): - from torch.distributed._tensor.ops.matrix_ops import mm_strategy + from torch.distributed._tensor.ops._matrix_ops import mm_strategy mesh = self.build_device_mesh() lhs_tensor = torch.randn(6, 8) @@ -292,7 +292,7 @@ def test_mm_strategies(self): self.assertFalse(output_sharding.needs_redistribute) def test_bmm_strategies(self): - from torch.distributed._tensor.ops.matrix_ops import bmm_strategy + from torch.distributed._tensor.ops._matrix_ops import bmm_strategy mesh = self.build_device_mesh() lhs_tensor = torch.randn(8, 6, 8) diff --git a/test/distributed/_tensor/test_tensor_ops.py b/test/distributed/_tensor/test_tensor_ops.py index 539a038372e26..1cb8ae51104d6 100644 --- a/test/distributed/_tensor/test_tensor_ops.py +++ b/test/distributed/_tensor/test_tensor_ops.py @@ -445,7 +445,7 @@ def test_gather(self): # case 2 input sharding: input sharded, index replicated, output mask partial # only works when index has size 1 on the gather dimension and # input is sharded on the gather dimension - from torch.distributed._tensor.ops.embedding_ops import _MaskPartial + from torch.distributed._tensor.ops._embedding_ops import _MaskPartial gather_dim = 1 global_input = torch.randn(12, 8, 16) diff --git a/test/distributed/_tensor/test_view_ops.py b/test/distributed/_tensor/test_view_ops.py index 2ea89e34789bf..8ace53d97131b 100644 --- a/test/distributed/_tensor/test_view_ops.py +++ b/test/distributed/_tensor/test_view_ops.py @@ -9,7 +9,7 @@ from torch import rand, randn, Tensor from torch.distributed._tensor import DeviceMesh, distribute_tensor, Replicate, Shard from torch.distributed._tensor.debug import CommDebugMode -from torch.distributed._tensor.ops.view_ops import ( +from torch.distributed._tensor.ops._view_ops import ( Broadcast, dim_maps, Flatten, diff --git a/torch/distributed/_spmd/batch_dim_utils.py b/torch/distributed/_spmd/batch_dim_utils.py index 244cc26c55ed4..012b2414eb6c3 100644 --- a/torch/distributed/_spmd/batch_dim_utils.py +++ b/torch/distributed/_spmd/batch_dim_utils.py @@ -6,7 +6,7 @@ import torch.utils._pytree as pytree from torch import Tensor from torch.distributed._tensor import DeviceMesh, Replicate, Shard -from torch.distributed._tensor.ops.view_ops import dim_maps, DimSpec, InputDim +from torch.distributed._tensor.ops._view_ops import dim_maps, DimSpec, InputDim from torch.distributed._tensor.placement_types import _Partial, DTensorSpec diff --git a/torch/distributed/_spmd/experimental_ops.py b/torch/distributed/_spmd/experimental_ops.py index f8c8f8804c579..1624c43afa6dc 100644 --- a/torch/distributed/_spmd/experimental_ops.py +++ b/torch/distributed/_spmd/experimental_ops.py @@ -5,7 +5,7 @@ import torch from torch.distributed._tensor._op_schema import OpSchema, OutputSharding -from torch.distributed._tensor.ops.common_rules import pointwise_rule +from torch.distributed._tensor.ops._common_rules import pointwise_rule from torch.distributed._tensor.ops.utils import register_prop_rule from torch.distributed._tensor.placement_types import ( _Partial, diff --git a/torch/distributed/_tensor/ops/__init__.py b/torch/distributed/_tensor/ops/__init__.py index eaccc8aa8d3f6..dec4665b1c8b9 100644 --- a/torch/distributed/_tensor/ops/__init__.py +++ b/torch/distributed/_tensor/ops/__init__.py @@ -1,10 +1,10 @@ # Copyright (c) Meta Platforms, Inc. and affiliates -from .conv_ops import * # noqa: F403 -from .embedding_ops import * # noqa: F403 -from .experimental_ops import * # noqa: F403 -from .math_ops import * # noqa: F403 -from .matrix_ops import * # noqa: F403 -from .pointwise_ops import * # noqa: F403 -from .random_ops import * # noqa: F403 -from .tensor_ops import * # noqa: F403 -from .view_ops import * # noqa: F403 +from ._conv_ops import * # noqa: F403 +from ._embedding_ops import * # noqa: F403 +from ._experimental_ops import * # noqa: F403 +from ._math_ops import * # noqa: F403 +from ._matrix_ops import * # noqa: F403 +from ._pointwise_ops import * # noqa: F403 +from ._random_ops import * # noqa: F403 +from ._tensor_ops import * # noqa: F403 +from ._view_ops import * # noqa: F403 diff --git a/torch/distributed/_tensor/ops/common_rules.py b/torch/distributed/_tensor/ops/_common_rules.py similarity index 100% rename from torch/distributed/_tensor/ops/common_rules.py rename to torch/distributed/_tensor/ops/_common_rules.py diff --git a/torch/distributed/_tensor/ops/conv_ops.py b/torch/distributed/_tensor/ops/_conv_ops.py similarity index 100% rename from torch/distributed/_tensor/ops/conv_ops.py rename to torch/distributed/_tensor/ops/_conv_ops.py diff --git a/torch/distributed/_tensor/ops/basic_strategy.py b/torch/distributed/_tensor/ops/_einsum_strategy.py similarity index 100% rename from torch/distributed/_tensor/ops/basic_strategy.py rename to torch/distributed/_tensor/ops/_einsum_strategy.py diff --git a/torch/distributed/_tensor/ops/embedding_ops.py b/torch/distributed/_tensor/ops/_embedding_ops.py similarity index 100% rename from torch/distributed/_tensor/ops/embedding_ops.py rename to torch/distributed/_tensor/ops/_embedding_ops.py diff --git a/torch/distributed/_tensor/ops/experimental_ops.py b/torch/distributed/_tensor/ops/_experimental_ops.py similarity index 100% rename from torch/distributed/_tensor/ops/experimental_ops.py rename to torch/distributed/_tensor/ops/_experimental_ops.py diff --git a/torch/distributed/_tensor/ops/math_ops.py b/torch/distributed/_tensor/ops/_math_ops.py similarity index 100% rename from torch/distributed/_tensor/ops/math_ops.py rename to torch/distributed/_tensor/ops/_math_ops.py diff --git a/torch/distributed/_tensor/ops/matrix_ops.py b/torch/distributed/_tensor/ops/_matrix_ops.py similarity index 99% rename from torch/distributed/_tensor/ops/matrix_ops.py rename to torch/distributed/_tensor/ops/_matrix_ops.py index 2815d14d9490a..8b919254f116c 100644 --- a/torch/distributed/_tensor/ops/matrix_ops.py +++ b/torch/distributed/_tensor/ops/_matrix_ops.py @@ -9,7 +9,7 @@ PlacementList, PlacementStrategy, ) -from torch.distributed._tensor.ops.basic_strategy import gen_einsum_strategies +from torch.distributed._tensor.ops._einsum_strategy import gen_einsum_strategies from torch.distributed._tensor.ops.utils import ( expand_to_full_mesh_op_strategy, generate_redistribute_costs, diff --git a/torch/distributed/_tensor/ops/pointwise_ops.py b/torch/distributed/_tensor/ops/_pointwise_ops.py similarity index 100% rename from torch/distributed/_tensor/ops/pointwise_ops.py rename to torch/distributed/_tensor/ops/_pointwise_ops.py diff --git a/torch/distributed/_tensor/ops/random_ops.py b/torch/distributed/_tensor/ops/_random_ops.py similarity index 100% rename from torch/distributed/_tensor/ops/random_ops.py rename to torch/distributed/_tensor/ops/_random_ops.py diff --git a/torch/distributed/_tensor/ops/tensor_ops.py b/torch/distributed/_tensor/ops/_tensor_ops.py similarity index 99% rename from torch/distributed/_tensor/ops/tensor_ops.py rename to torch/distributed/_tensor/ops/_tensor_ops.py index e87fbb53c7c6b..223ff0674ec6e 100644 --- a/torch/distributed/_tensor/ops/tensor_ops.py +++ b/torch/distributed/_tensor/ops/_tensor_ops.py @@ -15,8 +15,8 @@ StrategyType, TupleStrategy, ) -from torch.distributed._tensor.ops.common_rules import pointwise_rule -from torch.distributed._tensor.ops.embedding_ops import _MaskPartial +from torch.distributed._tensor.ops._common_rules import pointwise_rule +from torch.distributed._tensor.ops._embedding_ops import _MaskPartial from torch.distributed._tensor.ops.utils import ( expand_to_full_mesh_op_strategy, is_tensor_dim_sharded, diff --git a/torch/distributed/_tensor/ops/view_ops.py b/torch/distributed/_tensor/ops/_view_ops.py similarity index 100% rename from torch/distributed/_tensor/ops/view_ops.py rename to torch/distributed/_tensor/ops/_view_ops.py diff --git a/torch/distributed/tensor/parallel/loss.py b/torch/distributed/tensor/parallel/loss.py index 82295c4f4e4f2..ead6ccaea889f 100644 --- a/torch/distributed/tensor/parallel/loss.py +++ b/torch/distributed/tensor/parallel/loss.py @@ -9,8 +9,8 @@ import torch.distributed.distributed_c10d as c10d from torch import Tensor from torch.distributed._tensor import DTensor, Replicate, Shard -from torch.distributed._tensor.ops.embedding_ops import _MaskPartial -from torch.distributed._tensor.ops.math_ops import ( +from torch.distributed._tensor.ops._embedding_ops import _MaskPartial +from torch.distributed._tensor.ops._math_ops import ( _skip_dim, Reduction, replicate_reduction_dims,