From e71c3af1821d3ca74bf3e9048134d0a4e3482f2d Mon Sep 17 00:00:00 2001 From: zero323 Date: Thu, 23 Jan 2020 18:22:04 +0100 Subject: [PATCH] Add dynamic annotations for internal modules --- doc/api-coverage.rst | 18 ++--- third_party/3/pyspark/cloudpickle.pyi | 67 +++++++++++++++++ third_party/3/pyspark/daemon.pyi | 6 ++ third_party/3/pyspark/find_spark_home.pyi | 0 third_party/3/pyspark/heapq3.pyi | 10 +++ third_party/3/pyspark/java_gateway.pyi | 8 ++ .../ml/param/_shared_params_code_gen.pyi | 1 + third_party/3/pyspark/shell.pyi | 11 +++ third_party/3/pyspark/shuffle.pyi | 74 +++++++++++++++++++ third_party/3/pyspark/worker.pyi | 35 +++++++++ 10 files changed, 221 insertions(+), 9 deletions(-) create mode 100644 third_party/3/pyspark/cloudpickle.pyi create mode 100644 third_party/3/pyspark/daemon.pyi create mode 100644 third_party/3/pyspark/find_spark_home.pyi create mode 100644 third_party/3/pyspark/heapq3.pyi create mode 100644 third_party/3/pyspark/java_gateway.pyi create mode 100644 third_party/3/pyspark/ml/param/_shared_params_code_gen.pyi create mode 100644 third_party/3/pyspark/shell.pyi create mode 100644 third_party/3/pyspark/shuffle.pyi create mode 100644 third_party/3/pyspark/worker.pyi diff --git a/doc/api-coverage.rst b/doc/api-coverage.rst index a7a0ce64..3e06924a 100644 --- a/doc/api-coverage.rst +++ b/doc/api-coverage.rst @@ -10,21 +10,21 @@ API Coverage +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.broadcast`_ | ✔ | ✔ | Mixed | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.cloudpickle | ✘ | ✘ | Internal | +| pyspark.cloudpickle | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.conf`_ | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.context`_ | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.daemon | ✘ | ✘ | Internal | +| pyspark.daemon | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.files`_ | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.find\_spark\_home | ✘ | ✘ | Internal | +| pyspark.find\_spark\_home | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.heapq3 | ✘ | ✘ | Internal | +| pyspark.heapq3 | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.java\_gateway | ✘ | ✘ | Internal | +| pyspark.java\_gateway | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.join`_ | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ @@ -50,7 +50,7 @@ API Coverage +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.ml.param`_ | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.ml.param.\_shared\_params\_code\_gen | ✘ | ✘ | Internal | +| pyspark.ml.param.\_shared\_params\_code\_gen | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.ml.param.shared`_ | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ @@ -124,9 +124,9 @@ API Coverage +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.serializers`_ | ✔ | ✘ | | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.shell | ✘ | ✘ | Internal | +| pyspark.shell | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.shuffle | ✘ | ✘ | Internal | +| pyspark.shuffle | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.sql`_ | ✔ | ✘ | | +------------------------------------------------+---------------------+--------------------+------------+ @@ -212,7 +212,7 @@ API Coverage +------------------------------------------------+---------------------+--------------------+------------+ | `pyspark.version`_ | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ -| pyspark.worker | ✘ | ✘ | Internal | +| pyspark.worker | ✔ | ✘ | Internal | +------------------------------------------------+---------------------+--------------------+------------+ diff --git a/third_party/3/pyspark/cloudpickle.pyi b/third_party/3/pyspark/cloudpickle.pyi new file mode 100644 index 00000000..aa72489c --- /dev/null +++ b/third_party/3/pyspark/cloudpickle.pyi @@ -0,0 +1,67 @@ +import pickle +from functools import partial as partial +from pickle import _Pickler as Pickler # type: ignore[attr-defined] +from typing import Any, Optional + +DEFAULT_PROTOCOL: Any +string_types: Any +PY3: bool +PY2: bool + +def cell_set(cell: Any, value: Any): ... + +STORE_GLOBAL: Any +DELETE_GLOBAL: Any +LOAD_GLOBAL: Any +GLOBAL_OPS: Any +HAVE_ARGUMENT: Any +EXTENDED_ARG: Any + +def islambda(func: Any): ... + +class CloudPickler(Pickler): + dispatch: Any = ... + globals_ref: Any = ... + def __init__(self, file: Any, protocol: Optional[Any] = ...) -> None: ... + def dump(self, obj: Any): ... + def save_memoryview(self, obj: Any) -> None: ... + def save_module(self, obj: Any) -> None: ... + def save_codeobject(self, obj: Any) -> None: ... + def save_function(self, obj: Any, name: Optional[Any] = ...): ... + def save_dynamic_class(self, obj: Any) -> None: ... + def save_function_tuple(self, func: Any) -> None: ... + @classmethod + def extract_code_globals(cls, co: Any): ... + def extract_func_data(self, func: Any): ... + def save_builtin_function(self, obj: Any): ... + def save_global(self, obj: Any, name: Optional[Any] = ..., pack: Any = ...): ... + def save_instancemethod(self, obj: Any) -> None: ... + def save_inst(self, obj: Any) -> None: ... + def save_property(self, obj: Any) -> None: ... + def save_classmethod(self, obj: Any) -> None: ... + def save_itemgetter(self, obj: Any): ... + attrs: Any = ... + index: Any = ... + def save_attrgetter(self, obj: Any): ... + def save_file(self, obj: Any): ... + def save_ellipsis(self, obj: Any) -> None: ... + def save_not_implemented(self, obj: Any) -> None: ... + def save_weakset(self, obj: Any) -> None: ... + def save_logger(self, obj: Any) -> None: ... + def save_root_logger(self, obj: Any) -> None: ... + def save_mappingproxy(self, obj: Any) -> None: ... + def inject_addons(self) -> None: ... + +def is_tornado_coroutine(func: Any): ... +def dump(obj: Any, file: Any, protocol: Optional[Any] = ...) -> None: ... +def dumps(obj: Any, protocol: Optional[Any] = ...): ... +load = pickle.load +loads = pickle.loads + +def subimport(name: Any): ... +def dynamic_subimport(name: Any, vars: Any): ... +def instance(cls): ... + +class _empty_cell_value: + @classmethod + def __reduce__(cls): ... diff --git a/third_party/3/pyspark/daemon.pyi b/third_party/3/pyspark/daemon.pyi new file mode 100644 index 00000000..15accf8c --- /dev/null +++ b/third_party/3/pyspark/daemon.pyi @@ -0,0 +1,6 @@ +from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_int as write_int, write_with_length as write_with_length # type: ignore[attr-defined] +from typing import Any + +def compute_real_exit_code(exit_code: Any): ... +def worker(sock: Any, authenticated: Any): ... +def manager() -> None: ... diff --git a/third_party/3/pyspark/find_spark_home.pyi b/third_party/3/pyspark/find_spark_home.pyi new file mode 100644 index 00000000..e69de29b diff --git a/third_party/3/pyspark/heapq3.pyi b/third_party/3/pyspark/heapq3.pyi new file mode 100644 index 00000000..c05140c8 --- /dev/null +++ b/third_party/3/pyspark/heapq3.pyi @@ -0,0 +1,10 @@ +from typing import Any, Optional + +def heappush(heap: Any, item: Any) -> None: ... +def heappop(heap: Any): ... +def heapreplace(heap: Any, item: Any): ... +def heappushpop(heap: Any, item: Any): ... +def heapify(x: Any) -> None: ... +def merge(iterables: Any, key: Optional[Any] = ..., reverse: bool = ...) -> None: ... +def nsmallest(n: Any, iterable: Any, key: Optional[Any] = ...): ... +def nlargest(n: Any, iterable: Any, key: Optional[Any] = ...): ... diff --git a/third_party/3/pyspark/java_gateway.pyi b/third_party/3/pyspark/java_gateway.pyi new file mode 100644 index 00000000..de63c7d0 --- /dev/null +++ b/third_party/3/pyspark/java_gateway.pyi @@ -0,0 +1,8 @@ +from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length # type: ignore[attr-defined] +from typing import Any, Optional + +xrange = range + +def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ... +def local_connect_and_auth(port: Any, auth_secret: Any): ... +def ensure_callback_server_started(gw: Any) -> None: ... diff --git a/third_party/3/pyspark/ml/param/_shared_params_code_gen.pyi b/third_party/3/pyspark/ml/param/_shared_params_code_gen.pyi new file mode 100644 index 00000000..4af5857b --- /dev/null +++ b/third_party/3/pyspark/ml/param/_shared_params_code_gen.pyi @@ -0,0 +1 @@ +header: str diff --git a/third_party/3/pyspark/shell.pyi b/third_party/3/pyspark/shell.pyi new file mode 100644 index 00000000..acfce776 --- /dev/null +++ b/third_party/3/pyspark/shell.pyi @@ -0,0 +1,11 @@ +from pyspark import SparkConf as SparkConf +from pyspark.context import SparkContext as SparkContext +from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession +from typing import Any + +spark: Any +sc: Any +sql: Any +sqlContext: Any +sqlCtx = sqlContext +code: Any diff --git a/third_party/3/pyspark/shuffle.pyi b/third_party/3/pyspark/shuffle.pyi new file mode 100644 index 00000000..d00fa0ad --- /dev/null +++ b/third_party/3/pyspark/shuffle.pyi @@ -0,0 +1,74 @@ +from pyspark.serializers import AutoBatchedSerializer as AutoBatchedSerializer, BatchedSerializer as BatchedSerializer, CompressedSerializer as CompressedSerializer, FlattenedValuesSerializer as FlattenedValuesSerializer, PickleSerializer as PickleSerializer +from pyspark.util import fail_on_stopiteration as fail_on_stopiteration +from typing import Any, Optional + +process: Any + +def get_used_memory(): ... + +MemoryBytesSpilled: int +DiskBytesSpilled: int + +class Aggregator: + createCombiner: Any = ... + mergeValue: Any = ... + mergeCombiners: Any = ... + def __init__(self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any) -> None: ... + +class SimpleAggregator(Aggregator): + def __init__(self, combiner: Any): ... + +class Merger: + agg: Any = ... + def __init__(self, aggregator: Any) -> None: ... + def mergeValues(self, iterator: Any) -> None: ... + def mergeCombiners(self, iterator: Any) -> None: ... + def items(self) -> None: ... + +class ExternalMerger(Merger): + MAX_TOTAL_PARTITIONS: int = ... + memory_limit: Any = ... + serializer: Any = ... + localdirs: Any = ... + partitions: Any = ... + batch: Any = ... + scale: Any = ... + data: Any = ... + pdata: Any = ... + spills: int = ... + def __init__(self, aggregator: Any, memory_limit: int = ..., serializer: Optional[Any] = ..., localdirs: Optional[Any] = ..., scale: int = ..., partitions: int = ..., batch: int = ...) -> None: ... + def mergeValues(self, iterator: Any) -> None: ... + def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ... + def items(self): ... + +class ExternalSorter: + memory_limit: Any = ... + local_dirs: Any = ... + serializer: Any = ... + def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ... + def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ... + +class ExternalList: + LIMIT: int = ... + values: Any = ... + count: Any = ... + def __init__(self, values: Any) -> None: ... + def __iter__(self) -> Any: ... + def __len__(self): ... + def append(self, value: Any) -> None: ... + def __del__(self) -> None: ... + +class ExternalListOfList(ExternalList): + count: Any = ... + def __init__(self, values: Any) -> None: ... + def append(self, value: Any) -> None: ... + def __iter__(self) -> Any: ... + +class GroupByKey: + iterator: Any = ... + def __init__(self, iterator: Any) -> None: ... + def __iter__(self) -> Any: ... + +class ExternalGroupBy(ExternalMerger): + SORT_KEY_LIMIT: int = ... + def flattened_serializer(self): ... diff --git a/third_party/3/pyspark/worker.pyi b/third_party/3/pyspark/worker.pyi new file mode 100644 index 00000000..e8292540 --- /dev/null +++ b/third_party/3/pyspark/worker.pyi @@ -0,0 +1,35 @@ +from pyspark import shuffle as shuffle +from pyspark.broadcast import Broadcast as Broadcast +from pyspark.files import SparkFiles as SparkFiles +from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth +from pyspark.rdd import PythonEvalType as PythonEvalType +from pyspark.resourceinformation import ResourceInformation as ResourceInformation +from pyspark.serializers import BatchedSerializer as BatchedSerializer, PickleSerializer as PickleSerializer, SpecialLengths as SpecialLengths, UTF8Deserializer as UTF8Deserializer, read_bool as read_bool, read_int as read_int, read_long as read_long, write_int as write_int, write_long as write_long, write_with_length as write_with_length # type: ignore[attr-defined] +from pyspark.sql.pandas.serializers import ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer, CogroupUDFSerializer as CogroupUDFSerializer +from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type +from pyspark.sql.types import StructType as StructType +from pyspark.taskcontext import BarrierTaskContext as BarrierTaskContext, TaskContext as TaskContext +from pyspark.util import fail_on_stopiteration as fail_on_stopiteration +from typing import Any + +has_resource_module: bool +basestring = str +pickleSer: Any +utf8_deserializer: Any + +def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ... +def add_path(path: Any) -> None: ... +def read_command(serializer: Any, file: Any): ... +def chain(f: Any, g: Any): ... +def wrap_udf(f: Any, return_type: Any): ... +def wrap_scalar_pandas_udf(f: Any, return_type: Any): ... +def wrap_pandas_iter_udf(f: Any, return_type: Any): ... +def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ... +def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ... +def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ... +def wrap_window_agg_pandas_udf(f: Any, return_type: Any, runner_conf: Any, udf_index: Any): ... +def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ... +def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ... +def read_single_udf(pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any): ... +def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ... +def main(infile: Any, outfile: Any) -> None: ...