Skip to content
This repository has been archived by the owner on Nov 22, 2022. It is now read-only.

Commit

Permalink
Add dynamic annotations for internal modules
Browse files Browse the repository at this point in the history
  • Loading branch information
zero323 committed Jan 23, 2020
1 parent a66989d commit e71c3af
Show file tree
Hide file tree
Showing 10 changed files with 221 additions and 9 deletions.
18 changes: 9 additions & 9 deletions doc/api-coverage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ API Coverage
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.broadcast`_ ||| Mixed |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.cloudpickle | || Internal |
| pyspark.cloudpickle | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.conf`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.context`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.daemon | || Internal |
| pyspark.daemon | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.files`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.find\_spark\_home | || Internal |
| pyspark.find\_spark\_home | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.heapq3 | || Internal |
| pyspark.heapq3 | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.java\_gateway | || Internal |
| pyspark.java\_gateway | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.join`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
Expand All @@ -50,7 +50,7 @@ API Coverage
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.ml.param`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.ml.param.\_shared\_params\_code\_gen | || Internal |
| pyspark.ml.param.\_shared\_params\_code\_gen | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.ml.param.shared`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
Expand Down Expand Up @@ -124,9 +124,9 @@ API Coverage
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.serializers`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.shell | || Internal |
| pyspark.shell | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.shuffle | || Internal |
| pyspark.shuffle | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.sql`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
Expand Down Expand Up @@ -212,7 +212,7 @@ API Coverage
+------------------------------------------------+---------------------+--------------------+------------+
| `pyspark.version`_ ||| |
+------------------------------------------------+---------------------+--------------------+------------+
| pyspark.worker | || Internal |
| pyspark.worker | || Internal |
+------------------------------------------------+---------------------+--------------------+------------+


Expand Down
67 changes: 67 additions & 0 deletions third_party/3/pyspark/cloudpickle.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import pickle
from functools import partial as partial
from pickle import _Pickler as Pickler # type: ignore[attr-defined]
from typing import Any, Optional

DEFAULT_PROTOCOL: Any
string_types: Any
PY3: bool
PY2: bool

def cell_set(cell: Any, value: Any): ...

STORE_GLOBAL: Any
DELETE_GLOBAL: Any
LOAD_GLOBAL: Any
GLOBAL_OPS: Any
HAVE_ARGUMENT: Any
EXTENDED_ARG: Any

def islambda(func: Any): ...

class CloudPickler(Pickler):
dispatch: Any = ...
globals_ref: Any = ...
def __init__(self, file: Any, protocol: Optional[Any] = ...) -> None: ...
def dump(self, obj: Any): ...
def save_memoryview(self, obj: Any) -> None: ...
def save_module(self, obj: Any) -> None: ...
def save_codeobject(self, obj: Any) -> None: ...
def save_function(self, obj: Any, name: Optional[Any] = ...): ...
def save_dynamic_class(self, obj: Any) -> None: ...
def save_function_tuple(self, func: Any) -> None: ...
@classmethod
def extract_code_globals(cls, co: Any): ...
def extract_func_data(self, func: Any): ...
def save_builtin_function(self, obj: Any): ...
def save_global(self, obj: Any, name: Optional[Any] = ..., pack: Any = ...): ...
def save_instancemethod(self, obj: Any) -> None: ...
def save_inst(self, obj: Any) -> None: ...
def save_property(self, obj: Any) -> None: ...
def save_classmethod(self, obj: Any) -> None: ...
def save_itemgetter(self, obj: Any): ...
attrs: Any = ...
index: Any = ...
def save_attrgetter(self, obj: Any): ...
def save_file(self, obj: Any): ...
def save_ellipsis(self, obj: Any) -> None: ...
def save_not_implemented(self, obj: Any) -> None: ...
def save_weakset(self, obj: Any) -> None: ...
def save_logger(self, obj: Any) -> None: ...
def save_root_logger(self, obj: Any) -> None: ...
def save_mappingproxy(self, obj: Any) -> None: ...
def inject_addons(self) -> None: ...

def is_tornado_coroutine(func: Any): ...
def dump(obj: Any, file: Any, protocol: Optional[Any] = ...) -> None: ...
def dumps(obj: Any, protocol: Optional[Any] = ...): ...
load = pickle.load
loads = pickle.loads

def subimport(name: Any): ...
def dynamic_subimport(name: Any, vars: Any): ...
def instance(cls): ...

class _empty_cell_value:
@classmethod
def __reduce__(cls): ...
6 changes: 6 additions & 0 deletions third_party/3/pyspark/daemon.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_int as write_int, write_with_length as write_with_length # type: ignore[attr-defined]
from typing import Any

def compute_real_exit_code(exit_code: Any): ...
def worker(sock: Any, authenticated: Any): ...
def manager() -> None: ...
Empty file.
10 changes: 10 additions & 0 deletions third_party/3/pyspark/heapq3.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from typing import Any, Optional

def heappush(heap: Any, item: Any) -> None: ...
def heappop(heap: Any): ...
def heapreplace(heap: Any, item: Any): ...
def heappushpop(heap: Any, item: Any): ...
def heapify(x: Any) -> None: ...
def merge(iterables: Any, key: Optional[Any] = ..., reverse: bool = ...) -> None: ...
def nsmallest(n: Any, iterable: Any, key: Optional[Any] = ...): ...
def nlargest(n: Any, iterable: Any, key: Optional[Any] = ...): ...
8 changes: 8 additions & 0 deletions third_party/3/pyspark/java_gateway.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length # type: ignore[attr-defined]
from typing import Any, Optional

xrange = range

def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ...
def local_connect_and_auth(port: Any, auth_secret: Any): ...
def ensure_callback_server_started(gw: Any) -> None: ...
1 change: 1 addition & 0 deletions third_party/3/pyspark/ml/param/_shared_params_code_gen.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
header: str
11 changes: 11 additions & 0 deletions third_party/3/pyspark/shell.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pyspark import SparkConf as SparkConf
from pyspark.context import SparkContext as SparkContext
from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession
from typing import Any

spark: Any
sc: Any
sql: Any
sqlContext: Any
sqlCtx = sqlContext
code: Any
74 changes: 74 additions & 0 deletions third_party/3/pyspark/shuffle.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from pyspark.serializers import AutoBatchedSerializer as AutoBatchedSerializer, BatchedSerializer as BatchedSerializer, CompressedSerializer as CompressedSerializer, FlattenedValuesSerializer as FlattenedValuesSerializer, PickleSerializer as PickleSerializer
from pyspark.util import fail_on_stopiteration as fail_on_stopiteration
from typing import Any, Optional

process: Any

def get_used_memory(): ...

MemoryBytesSpilled: int
DiskBytesSpilled: int

class Aggregator:
createCombiner: Any = ...
mergeValue: Any = ...
mergeCombiners: Any = ...
def __init__(self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any) -> None: ...

class SimpleAggregator(Aggregator):
def __init__(self, combiner: Any): ...

class Merger:
agg: Any = ...
def __init__(self, aggregator: Any) -> None: ...
def mergeValues(self, iterator: Any) -> None: ...
def mergeCombiners(self, iterator: Any) -> None: ...
def items(self) -> None: ...

class ExternalMerger(Merger):
MAX_TOTAL_PARTITIONS: int = ...
memory_limit: Any = ...
serializer: Any = ...
localdirs: Any = ...
partitions: Any = ...
batch: Any = ...
scale: Any = ...
data: Any = ...
pdata: Any = ...
spills: int = ...
def __init__(self, aggregator: Any, memory_limit: int = ..., serializer: Optional[Any] = ..., localdirs: Optional[Any] = ..., scale: int = ..., partitions: int = ..., batch: int = ...) -> None: ...
def mergeValues(self, iterator: Any) -> None: ...
def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ...
def items(self): ...

class ExternalSorter:
memory_limit: Any = ...
local_dirs: Any = ...
serializer: Any = ...
def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ...
def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ...

class ExternalList:
LIMIT: int = ...
values: Any = ...
count: Any = ...
def __init__(self, values: Any) -> None: ...
def __iter__(self) -> Any: ...
def __len__(self): ...
def append(self, value: Any) -> None: ...
def __del__(self) -> None: ...

class ExternalListOfList(ExternalList):
count: Any = ...
def __init__(self, values: Any) -> None: ...
def append(self, value: Any) -> None: ...
def __iter__(self) -> Any: ...

class GroupByKey:
iterator: Any = ...
def __init__(self, iterator: Any) -> None: ...
def __iter__(self) -> Any: ...

class ExternalGroupBy(ExternalMerger):
SORT_KEY_LIMIT: int = ...
def flattened_serializer(self): ...
35 changes: 35 additions & 0 deletions third_party/3/pyspark/worker.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pyspark import shuffle as shuffle
from pyspark.broadcast import Broadcast as Broadcast
from pyspark.files import SparkFiles as SparkFiles
from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth
from pyspark.rdd import PythonEvalType as PythonEvalType
from pyspark.resourceinformation import ResourceInformation as ResourceInformation
from pyspark.serializers import BatchedSerializer as BatchedSerializer, PickleSerializer as PickleSerializer, SpecialLengths as SpecialLengths, UTF8Deserializer as UTF8Deserializer, read_bool as read_bool, read_int as read_int, read_long as read_long, write_int as write_int, write_long as write_long, write_with_length as write_with_length # type: ignore[attr-defined]
from pyspark.sql.pandas.serializers import ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer, CogroupUDFSerializer as CogroupUDFSerializer
from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type
from pyspark.sql.types import StructType as StructType
from pyspark.taskcontext import BarrierTaskContext as BarrierTaskContext, TaskContext as TaskContext
from pyspark.util import fail_on_stopiteration as fail_on_stopiteration
from typing import Any

has_resource_module: bool
basestring = str
pickleSer: Any
utf8_deserializer: Any

def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ...
def add_path(path: Any) -> None: ...
def read_command(serializer: Any, file: Any): ...
def chain(f: Any, g: Any): ...
def wrap_udf(f: Any, return_type: Any): ...
def wrap_scalar_pandas_udf(f: Any, return_type: Any): ...
def wrap_pandas_iter_udf(f: Any, return_type: Any): ...
def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ...
def wrap_window_agg_pandas_udf(f: Any, return_type: Any, runner_conf: Any, udf_index: Any): ...
def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
def read_single_udf(pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any): ...
def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ...
def main(infile: Any, outfile: Any) -> None: ...

0 comments on commit e71c3af

Please sign in to comment.