diff --git a/README.rst b/README.rst index fecf40ed..a82f9c7d 100644 --- a/README.rst +++ b/README.rst @@ -158,6 +158,8 @@ API Coverage +------------------------------------------------+---------------------+--------------------+------------+ | pyspark.ml.tests | ✘ | ✘ | Tests | +------------------------------------------------+---------------------+--------------------+------------+ +| pyspark.ml.tree | x | ✔ | | ++------------------------------------------------+---------------------+--------------------+------------+ | pyspark.ml.tuning | ✘ | ✔ | | +------------------------------------------------+---------------------+--------------------+------------+ | pyspark.ml.util | ✘ | ✔ | | diff --git a/third_party/3/pyspark/ml/classification.pyi b/third_party/3/pyspark/ml/classification.pyi index 09acc8ec..27c8bd12 100644 --- a/third_party/3/pyspark/ml/classification.pyi +++ b/third_party/3/pyspark/ml/classification.pyi @@ -7,7 +7,8 @@ from pyspark.ml._typing import JM, M, P, T, ParamMap from pyspark.ml.base import Estimator, Model, Transformer from pyspark.ml.linalg import Matrix, Vector from pyspark.ml.param.shared import * -from pyspark.ml.regression import DecisionTreeModel, DecisionTreeParams, DecisionTreeRegressionModel, GBTParams, HasVarianceImpurity, RandomForestParams, TreeEnsembleModel +from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, _TreeEnsembleModel, _RandomForestParams, _GBTParams, _HasVarianceImpurity, _TreeClassifierParams, _TreeEnsembleParams +from pyspark.ml.regression import DecisionTreeRegressionModel from pyspark.ml.util import * from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaPredictorParams, JavaWrapper, JavaTransformer from pyspark.sql.dataframe import DataFrame @@ -137,30 +138,29 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary): class BinaryLogisticRegressionTrainingSummary(BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary): ... -class TreeClassifierParams: - supportedImpurities: List[str] - impurity: Param[str] - def __init__(self) -> None: ... - def getImpurity(self) -> str: ... +class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): ... -class DecisionTreeClassifier(JavaProbabilisticClassifier[DecisionTreeClassificationModel], HasWeightCol, DecisionTreeParams, TreeClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[DecisionTreeClassifier]): - def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> None: ... - def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> DecisionTreeClassifier: ... +class DecisionTreeClassifier(JavaProbabilisticClassifier[DecisionTreeClassificationModel], _DecisionTreeClassifierParams, JavaMLWritable, JavaMLReadable[DecisionTreeClassifier]): + def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., weightCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> None: ... + def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., weightCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> DecisionTreeClassifier: ... def setMaxDepth(self, value: int) -> DecisionTreeClassifier: ... def setMaxBins(self, value: int) -> DecisionTreeClassifier: ... def setMinInstancesPerNode(self, value: int) -> DecisionTreeClassifier: ... + def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeClassifier: ... def setMinInfoGain(self, value: float) -> DecisionTreeClassifier: ... def setMaxMemoryInMB(self, value: int) -> DecisionTreeClassifier: ... def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ... def setImpurity(self, value: str) -> DecisionTreeClassifier: ... -class DecisionTreeClassificationModel(DecisionTreeModel, JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[DecisionTreeClassificationModel]): +class DecisionTreeClassificationModel(_DecisionTreeModel, JavaProbabilisticClassificationModel[Vector], _DecisionTreeClassifierParams, JavaMLWritable, JavaMLReadable[DecisionTreeClassificationModel]): @property def featureImportances(self) -> Vector: ... -class RandomForestClassifier(JavaProbabilisticClassifier[RandomForestClassificationModel], HasSeed, RandomForestParams, TreeClassifierParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable[RandomForestClassifier]): - def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., leafCol: str = ...) -> None: ... - def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., seed: Optional[int] = ..., impurity: str = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., subsamplingRate: float = ..., leafCol: str = ...) -> RandomForestClassifier: ... +class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): ... + +class RandomForestClassifier(JavaProbabilisticClassifier[RandomForestClassificationModel], _RandomForestClassifierParams, JavaMLWritable, JavaMLReadable[RandomForestClassifier]): + def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> None: ... + def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., probabilityCol: str = ..., rawPredictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., seed: Optional[int] = ..., impurity: str = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., subsamplingRate: float = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> RandomForestClassifier: ... def setMaxDepth(self, value: int) -> RandomForestClassifier: ... def setMaxBins(self, value: int) -> RandomForestClassifier: ... def setMinInstancesPerNode(self, value: int) -> RandomForestClassifier: ... @@ -172,20 +172,20 @@ class RandomForestClassifier(JavaProbabilisticClassifier[RandomForestClassificat def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ... def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ... -class RandomForestClassificationModel(TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[RandomForestClassificationModel]): +class RandomForestClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], _RandomForestClassifierParams, JavaMLWritable, JavaMLReadable[RandomForestClassificationModel]): @property def featureImportances(self) -> Vector: ... @property def trees(self) -> List[DecisionTreeClassificationModel]: ... -class GBTClassifierParams(GBTParams, HasVarianceImpurity): +class GBTClassifierParams(_GBTParams, _HasVarianceImpurity): supportedLossTypes: List[str] lossType: Param[str] def getLossType(self) -> str: ... -class GBTClassifier(JavaProbabilisticClassifier[GBTClassificationModel], GBTClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[GBTClassifier]): - def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> None: ... - def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> GBTClassifier: ... +class GBTClassifier(JavaProbabilisticClassifier[GBTClassificationModel], GBTClassifierParams, JavaMLWritable, JavaMLReadable[GBTClassifier]): + def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> None: ... + def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., subsamplingRate: float = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> GBTClassifier: ... def setMaxDepth(self, value: int) -> GBTClassifier: ... def setMaxBins(self, value: int) -> GBTClassifier: ... def setMinInstancesPerNode(self, value: int) -> GBTClassifier: ... @@ -198,7 +198,7 @@ class GBTClassifier(JavaProbabilisticClassifier[GBTClassificationModel], GBTClas def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ... def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ... -class GBTClassificationModel(TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], JavaMLWritable, JavaMLReadable[GBTClassificationModel]): +class GBTClassificationModel(_TreeEnsembleModel, JavaProbabilisticClassificationModel[Vector], GBTClassifierParams, JavaMLWritable, JavaMLReadable[GBTClassificationModel]): @property def featureImportances(self) -> Vector: ... @property diff --git a/third_party/3/pyspark/ml/regression.pyi b/third_party/3/pyspark/ml/regression.pyi index b68f6135..603478a5 100644 --- a/third_party/3/pyspark/ml/regression.pyi +++ b/third_party/3/pyspark/ml/regression.pyi @@ -6,6 +6,7 @@ from pyspark.ml._typing import P, T from pyspark.ml.param.shared import * from pyspark.ml.linalg import Vector from pyspark.ml.util import * +from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, _TreeEnsembleModel, _TreeEnsembleParams, _RandomForestParams, _GBTParams, _HasVarianceImpurity, _TreeRegressorParams from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaPredictionModel, JavaPredictor, JavaWrapper from pyspark.sql.dataframe import DataFrame @@ -82,110 +83,45 @@ class IsotonicRegressionModel(JavaModel, _IsotonicRegressionBase, JavaMLWritable @property def predictions(self) -> Vector: ... -class DecisionTreeParams(Params): - leafCol: Param[str] - maxDepth: Param[int] - maxBins: Param[int] - minInstancesPerNode: Param[int] - minInfoGain: Param[float] - maxMemoryInMB: Param[int] - cacheNodeIds: Param[bool] - def __init__(self) -> None: ... - def setLeafCol(self: P, value: str) -> P: ... - def getLeafCol(self) -> str: ... - def setMaxDepth(self: P, value: int) -> P: ... - def getMaxDepth(self) -> int: ... - def setMaxBins(self: P, value: int) -> P: ... - def getMaxBins(self) -> int: ... - def setMinInstancesPerNode(self: P, value: int) -> P: ... - def getMinInstancesPerNode(self) -> int: ... - def setMinInfoGain(self: P, value: float) -> P: ... - def getMinInfoGain(self) -> float: ... - def setMaxMemoryInMB(self: P, value: int) -> P: ... - def getMaxMemoryInMB(self) -> int: ... - def setCacheNodeIds(self: P, value: bool) -> P: ... - def getCacheNodeIds(self) -> bool: ... +class _DecisionTreeRegressorParams(_DecisionTreeParams, _TreeRegressorParams, HasVarianceCol): ... -class TreeEnsembleParams(DecisionTreeParams): - supportedFeatureSubsetStrategies: List[str] - subsamplingRate: Param[float] - featureSubsetStrategy: Param[str] - def __init__(self) -> None: ... - def getSubsamplingRate(self) -> float: ... - def getFeatureSubsetStrategy(self) -> str: ... - -class HasVarianceImpurity(Params): - supportedImpurities: List[str] - impurity: Param[str] - def __init__(self) -> None: ... - def getImpurity(self) -> str: ... - -class TreeRegressorParams(HasVarianceImpurity): ... - -class RandomForestParams(TreeEnsembleParams): - numTrees: Param[int] - def __init__(self) -> None: ... - def getNumTrees(self) -> int: ... - -class GBTParams(TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol): - stepSize: Param[float] - validationTol: Param[float] - def getValidationTol(self) -> float: ... - -class GBTRegressorParams(GBTParams, TreeRegressorParams): - supportedLossTypes: List[str] - lossType: Param[str] - def getLossType(self) -> str: ... - -class DecisionTreeRegressor(JavaPredictor[DecisionTreeRegressionModel], HasWeightCol, DecisionTreeParams, TreeRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[DecisionTreeRegressor], HasVarianceCol): - def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., varianceCol: Optional[str] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> None: ... - def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., varianceCol: Optional[str] = ..., weightCol: Optional[str] = ..., leafCol: str = ...) -> DecisionTreeRegressor: ... +class DecisionTreeRegressor(JavaPredictor[DecisionTreeRegressionModel], _DecisionTreeRegressorParams, JavaMLWritable, JavaMLReadable[DecisionTreeRegressor], HasVarianceCol): + def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., varianceCol: Optional[str] = ..., weightCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> None: ... + def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., seed: Optional[int] = ..., varianceCol: Optional[str] = ..., weightCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> DecisionTreeRegressor: ... def setMaxDepth(self, value: int) -> DecisionTreeRegressor: ... def setMaxBins(self, value: int) -> DecisionTreeRegressor: ... def setMinInstancesPerNode(self, value: int) -> DecisionTreeRegressor: ... + def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeRegressor: ... def setMinInfoGain(self, value: float) -> DecisionTreeRegressor: ... def setMaxMemoryInMB(self, value: int) -> DecisionTreeRegressor: ... def setCacheNodeIds(self, value: bool) -> DecisionTreeRegressor: ... def setImpurity(self, value: str) -> DecisionTreeRegressor: ... -class DecisionTreeModel(JavaPredictionModel[T]): - @property - def numNodes(self) -> int: ... - @property - def depth(self) -> int: ... - @property - def toDebugString(self) -> str: ... - -class TreeEnsembleModel(JavaModel): - @property - def trees(self) -> Sequence[DecisionTreeModel]: ... - @property - def getNumTrees(self) -> int: ... - @property - def treeWeights(self) -> List[float]: ... - @property - def totalNumNodes(self) -> int: ... - @property - def toDebugString(self) -> str: ... - -class DecisionTreeRegressionModel(DecisionTreeModel[T], JavaMLWritable, JavaMLReadable[DecisionTreeRegressionModel]): +class DecisionTreeRegressionModel(_DecisionTreeModel[T], JavaMLWritable, JavaMLReadable[DecisionTreeRegressionModel]): @property def featureImportances(self) -> Vector: ... -class RandomForestRegressor(JavaPredictor[RandomForestRegressionModel], HasSeed, RandomForestParams, TreeRegressorParams, HasCheckpointInterval, JavaMLWritable, JavaMLReadable[RandomForestRegressor]): - def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., subsamplingRate: float = ..., seed: Optional[int] = ..., numTrees: int = ..., featureSubsetStrategy: str = ...) -> None: ... - def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., subsamplingRate: float = ..., seed: Optional[int] = ..., numTrees: int = ..., featureSubsetStrategy: str = ...) -> RandomForestRegressor: ... +class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams): ... + +class RandomForestRegressor(JavaPredictor[RandomForestRegressionModel], _RandomForestRegressorParams, JavaMLWritable, JavaMLReadable[RandomForestRegressor]): + def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., subsamplingRate: float = ..., seed: Optional[int] = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., minWeightFractionPerNode: float = ...) -> None: ... + def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., checkpointInterval: int = ..., impurity: str = ..., subsamplingRate: float = ..., seed: Optional[int] = ..., numTrees: int = ..., featureSubsetStrategy: str = ..., minWeightFractionPerNode: float = ...) -> RandomForestRegressor: ... def setFeatureSubsetStrategy(self, value: str) -> RandomForestRegressor: ... -class RandomForestRegressionModel(TreeEnsembleModel, JavaPredictionModel[Vector], JavaMLWritable, JavaMLReadable[RandomForestRegressionModel]): +class RandomForestRegressionModel(_TreeEnsembleModel[Vector], _RandomForestRegressorParams, JavaMLWritable, JavaMLReadable[RandomForestRegressionModel]): @property def trees(self) -> Sequence[DecisionTreeRegressionModel]: ... @property def featureImportances(self) -> Vector: ... -class GBTRegressor(JavaPredictor[GBTRegressionModel], GBTRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable[GBTRegressor]): - def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., subsamplingRate: float = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., impurity: str = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> None: ... - def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., subsamplingRate: float = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., impuriy: str = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ...) -> GBTRegressor: ... +class _GBTRegressorParams(_GBTParams, _TreeRegressorParams): + supportedLossTypes: List[str] + lossType: Param[str] + def getLossType(self) -> str: ... + +class GBTRegressor(JavaPredictor[GBTRegressionModel], _GBTRegressorParams, JavaMLWritable, JavaMLReadable[GBTRegressor]): + def __init__(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., subsamplingRate: float = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., impurity: str = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> None: ... + def setParams(self, *, featuresCol: str = ..., labelCol: str = ..., predictionCol: str = ..., maxDepth: int = ..., maxBins: int = ..., minInstancesPerNode: int = ..., minInfoGain: float = ..., maxMemoryInMB: int = ..., cacheNodeIds: bool = ..., subsamplingRate: float = ..., checkpointInterval: int = ..., lossType: str = ..., maxIter: int = ..., stepSize: float = ..., seed: Optional[int] = ..., impuriy: str = ..., featureSubsetStrategy: str = ..., validationTol: float = ..., validationIndicatorCol: Optional[str] = ..., leafCol: str = ..., minWeightFractionPerNode: float = ...) -> GBTRegressor: ... def setMaxDepth(self, value: int) -> GBTRegressor: ... def setMaxBins(self, value: int) -> GBTRegressor: ... def setMinInstancesPerNode(self, value: int) -> GBTRegressor: ... @@ -198,7 +134,7 @@ class GBTRegressor(JavaPredictor[GBTRegressionModel], GBTRegressorParams, HasChe def setFeatureSubsetStrategy(self, value: str) -> GBTRegressor: ... def setValidationIndicatorCol(self, value: str) -> GBTRegressor: ... -class GBTRegressionModel(TreeEnsembleModel, JavaPredictionModel[Vector], JavaMLWritable, JavaMLReadable[GBTRegressionModel]): +class GBTRegressionModel(_TreeEnsembleModel[Vector], _GBTRegressorParams, JavaMLWritable, JavaMLReadable[GBTRegressionModel]): @property def featureImportances(self) -> Vector: ... @property diff --git a/third_party/3/pyspark/ml/tree.pyi b/third_party/3/pyspark/ml/tree.pyi new file mode 100644 index 00000000..48a86e66 --- /dev/null +++ b/third_party/3/pyspark/ml/tree.pyi @@ -0,0 +1,79 @@ +from typing import Any, List, Optional, Sequence +from pyspark.ml._typing import P, T + +from pyspark.ml.linalg import Vector +from pyspark.ml.param.shared import * +from pyspark.ml.util import * +from pyspark.ml.wrapper import JavaEstimator as JavaEstimator, JavaModel as JavaModel, JavaParams as JavaParams, JavaPredictionModel as JavaPredictionModel, JavaPredictor as JavaPredictor + +class _DecisionTreeModel(JavaPredictionModel[T]): + @property + def numNodes(self) -> int: ... + @property + def depth(self) -> int: ... + @property + def toDebugString(self) -> str: ... + def predictLeaf(self, value: Vector) -> float: ... + +class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol): + leafCol: Param[str] + maxDepth: Param[int] + maxBins: Param[int] + minInstancesPerNode: Param[int] + minWeightFractionPerNode: Param[float] + minInfoGain: Param[float] + maxMemoryInMB: Param[int] + cacheNodeIds: Param[bool] + def __init__(self) -> None: ... + def setLeafCol(self: P, value: str) -> P: ... + def getLeafCol(self) -> str: ... + def getMaxDepth(self) -> int: ... + def getMaxBins(self) -> int: ... + def getMinInstancesPerNode(self) -> int: ... + def getMinInfoGain(self) -> float: ... + def getMaxMemoryInMB(self) -> int: ... + def getCacheNodeIds(self) -> bool: ... + +class _TreeEnsembleModel(JavaPredictionModel[T]): + @property + def trees(self) -> Sequence[_DecisionTreeModel]: ... + @property + def getNumTrees(self) -> int: ... + @property + def treeWeights(self) -> List[float]: ... + @property + def totalNumNodes(self) -> int: ... + @property + def toDebugString(self) -> str: ... + +class _TreeEnsembleParams(_DecisionTreeParams): + subsamplingRate: Param[float] + supportedFeatureSubsetStrategies: List[str] + featureSubsetStrategy: Param[str] + def __init__(self) -> None: ... + def getSubsamplingRate(self) -> float: ... + def getFeatureSubsetStrategy(self) -> str: ... + +class _RandomForestParams(_TreeEnsembleParams): + numTrees: Param[int] + def __init__(self) -> None: ... + def getNumTrees(self) -> int: ... + +class _GBTParams(_TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol): + stepSize: Param[float] + validationTol: Param[float] + def getValidationTol(self) -> float: ... + +class _HasVarianceImpurity(Params): + supportedImpurities: List[str] + impurity: Param[str] + def __init__(self) -> None: ... + def getImpurity(self) -> str: ... + +class _TreeClassifierParams: + supportedImpurities: List[str] + impurity: Param[str] + def __init__(self) -> None: ... + def getImpurity(self) -> str: ... + +class _TreeRegressorParams(_HasVarianceImpurity): ...