From 5f0d2f6c349f12675c5531ac8e227c0f540a38e9 Mon Sep 17 00:00:00 2001 From: zero323 Date: Sat, 31 Aug 2019 19:34:11 +0200 Subject: [PATCH] Add Scalar to Struct UDF --- third_party/3/pyspark/sql/_typing.pyi | 10 ++++++++-- third_party/3/pyspark/sql/functions.pyi | 19 ++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/third_party/3/pyspark/sql/_typing.pyi b/third_party/3/pyspark/sql/_typing.pyi index b692db6d..d76ceca1 100644 --- a/third_party/3/pyspark/sql/_typing.pyi +++ b/third_party/3/pyspark/sql/_typing.pyi @@ -37,11 +37,17 @@ PandasGroupedMapUDFType = Literal[201] PandasGroupedAggUDFType = Literal[202] PandasMapIterUDFType = Literal[205] -class PandasVariadicScalarFunction(Protocol): +class PandasVariadicScalarToScalarFunction(Protocol): def __call__(self, *_: pandas.core.series.Series) -> pandas.core.series.Series: ... -PandasScalarFunction = Union[Callable[[pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], PandasVariadicScalarFunction] +PandasScalarToScalarFunction = Union[PandasVariadicScalarToScalarFunction, Callable[[pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.series.Series]] + +class PandasVariadicScalarToStructFunction(Protocol): + def __call__(self, *_: pandas.core.series.Series) -> pandas.core.frame.DataFrame: + ... + +PandasScalarToStructFunction = Union[PandasVariadicScalarToStructFunction, Callable[[pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame], Callable[[pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series, pandas.core.series.Series], pandas.core.frame.DataFrame]] PandasScalarIterFunction = Callable[[Iterable[Union[pandas.core.series.Series, Tuple[pandas.core.series.Series, ...], pandas.core.frame.DataFrame]]], Iterable[pandas.core.series.Series]] diff --git a/third_party/3/pyspark/sql/functions.pyi b/third_party/3/pyspark/sql/functions.pyi index 0db08d3c..963486c0 100644 --- a/third_party/3/pyspark/sql/functions.pyi +++ b/third_party/3/pyspark/sql/functions.pyi @@ -6,7 +6,7 @@ from typing import Any, Optional, Union, Dict, Callable import pandas.core.frame # type: ignore import pandas.core.series # type: ignore -from pyspark.sql._typing import ColumnOrName, DataTypeOrString, AtomicDataTypeOrString, PandasScalarUDFType, PandasScalarIterUDFType, PandasGroupedMapUDFType, PandasGroupedAggUDFType, PandasScalarFunction, PandasScalarIterFunction, PandasGroupedMapFunction, PandasGroupedAggFunction, PandasMapIterFunction, PandasMapIterUDFType, UserDefinedFunctionLike, GroupedMapPandasUserDefinedFunction, MapIterPandasUserDefinedFunction +from pyspark.sql._typing import ColumnOrName, DataTypeOrString, AtomicDataTypeOrString, PandasScalarUDFType, PandasScalarIterUDFType, PandasGroupedMapUDFType, PandasGroupedAggUDFType, PandasScalarToScalarFunction, PandasScalarToStructFunction, PandasScalarIterFunction, PandasGroupedMapFunction, PandasGroupedAggFunction, PandasMapIterFunction, PandasMapIterUDFType, UserDefinedFunctionLike, GroupedMapPandasUserDefinedFunction, MapIterPandasUserDefinedFunction from pyspark.sql.column import Column from pyspark.sql.dataframe import DataFrame from pyspark.sql.types import ArrayType, DataType, StructType @@ -255,13 +255,22 @@ class PandasUDFType: MAP_ITER: PandasMapIterUDFType @overload -def pandas_udf(f: PandasScalarFunction, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> UserDefinedFunctionLike: ... +def pandas_udf(f: PandasScalarToScalarFunction, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> UserDefinedFunctionLike: ... @overload -def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarUDFType) -> Callable[[PandasScalarFunction], UserDefinedFunctionLike]: ... +def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... @overload -def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarFunction], UserDefinedFunctionLike]: ... +def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... @overload -def pandas_udf(*, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> Callable[[PandasScalarFunction], UserDefinedFunctionLike]: ... +def pandas_udf(*, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ... + +@overload +def pandas_udf(f: PandasScalarToStructFunction, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> UserDefinedFunctionLike: ... +@overload +def pandas_udf(f: Union[StructType, str], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf(f: Union[StructType, str], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... +@overload +def pandas_udf(*, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ... @overload def pandas_udf(f: PandasScalarIterFunction, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarIterUDFType) -> UserDefinedFunctionLike: ...