diff --git a/py-polars/polars/_utils/construction/series.py b/py-polars/polars/_utils/construction/series.py index 9c107bb695b7..07c6117ebdfc 100644 --- a/py-polars/polars/_utils/construction/series.py +++ b/py-polars/polars/_utils/construction/series.py @@ -32,6 +32,7 @@ from polars.datatypes import ( INTEGER_DTYPES, TEMPORAL_DTYPES, + Array, Boolean, Categorical, Date, @@ -96,7 +97,7 @@ def sequence_to_pyseries( dtype = Null # lists defer to subsequent handling; identify nested type - elif dtype == List: + elif dtype in (List, Array): python_dtype = list # infer temporal type handling @@ -130,8 +131,9 @@ def sequence_to_pyseries( # flat data if ( dtype is not None - and dtype not in (List, Struct, Unknown) and is_polars_dtype(dtype) + and not dtype.is_nested() + and dtype != Unknown and (python_dtype is None) ): constructor = polars_type_to_constructor(dtype) @@ -160,159 +162,153 @@ def sequence_to_pyseries( schema=struct_schema, orient="row", ).to_struct(name) - else: - if python_dtype is None: - if value is None: - constructor = polars_type_to_constructor(Null) - return constructor(name, values, strict) - - # generic default dtype - python_dtype = type(value) - - # temporal branch - if python_dtype in py_temporal_types: - if dtype is None: - dtype = py_type_to_dtype(python_dtype) # construct from integer - elif dtype in py_temporal_types: - dtype = py_type_to_dtype(dtype) - - values_dtype = ( - None - if value is None - else py_type_to_dtype(type(value), raise_unmatched=False) + + if python_dtype is None: + if value is None: + constructor = polars_type_to_constructor(Null) + return constructor(name, values, strict) + + # generic default dtype + python_dtype = type(value) + + # temporal branch + if python_dtype in py_temporal_types: + if dtype is None: + dtype = py_type_to_dtype(python_dtype) # construct from integer + elif dtype in py_temporal_types: + dtype = py_type_to_dtype(dtype) + + values_dtype = ( + None + if value is None + else py_type_to_dtype(type(value), raise_unmatched=False) + ) + if values_dtype is not None and values_dtype.is_float(): + msg = f"'float' object cannot be interpreted as a {python_dtype.__name__!r}" + raise TypeError( + # we do not accept float values as temporal; if this is + # required, the caller should explicitly cast to int first. + msg ) - if values_dtype is not None and values_dtype.is_float(): - msg = f"'float' object cannot be interpreted as a {python_dtype.__name__!r}" - raise TypeError( - # we do not accept float values as temporal; if this is - # required, the caller should explicitly cast to int first. - msg - ) - # We use the AnyValue builder to create the datetime array - # We store the values internally as UTC and set the timezone - py_series = PySeries.new_from_any_values(name, values, strict) + # We use the AnyValue builder to create the datetime array + # We store the values internally as UTC and set the timezone + py_series = PySeries.new_from_any_values(name, values, strict) - time_unit = getattr(dtype, "time_unit", None) - time_zone = getattr(dtype, "time_zone", None) + time_unit = getattr(dtype, "time_unit", None) + time_zone = getattr(dtype, "time_zone", None) - if time_unit is None or values_dtype == Date: - s = wrap_s(py_series) - else: - s = wrap_s(py_series).dt.cast_time_unit(time_unit) + if time_unit is None or values_dtype == Date: + s = wrap_s(py_series) + else: + s = wrap_s(py_series).dt.cast_time_unit(time_unit) - if (values_dtype == Date) & (dtype == Datetime): - return ( - s.cast(Datetime(time_unit or "us")) - .dt.replace_time_zone(time_zone) - ._s + if (values_dtype == Date) & (dtype == Datetime): + return ( + s.cast(Datetime(time_unit or "us")).dt.replace_time_zone(time_zone)._s + ) + + if (dtype == Datetime) and (value.tzinfo is not None or time_zone is not None): + values_tz = str(value.tzinfo) if value.tzinfo is not None else None + dtype_tz = dtype.time_zone # type: ignore[union-attr] + if values_tz is not None and (dtype_tz is not None and dtype_tz != "UTC"): + msg = ( + "time-zone-aware datetimes are converted to UTC" + "\n\nPlease either drop the time zone from the dtype, or set it to 'UTC'." + " To convert to a different time zone, please use `.dt.convert_time_zone`." + ) + raise ValueError(msg) + if values_tz != "UTC" and dtype_tz is None: + warnings.warn( + "Constructing a Series with time-zone-aware " + "datetimes results in a Series with UTC time zone. " + "To silence this warning, you can filter " + "warnings of class TimeZoneAwareConstructorWarning, or " + "set 'UTC' as the time zone of your datatype.", + TimeZoneAwareConstructorWarning, + stacklevel=find_stacklevel(), ) + return s.dt.replace_time_zone(dtype_tz or "UTC")._s + return s._s - if (dtype == Datetime) and ( - value.tzinfo is not None or time_zone is not None - ): - values_tz = str(value.tzinfo) if value.tzinfo is not None else None - dtype_tz = dtype.time_zone # type: ignore[union-attr] - if values_tz is not None and ( - dtype_tz is not None and dtype_tz != "UTC" - ): - msg = ( - "time-zone-aware datetimes are converted to UTC" - "\n\nPlease either drop the time zone from the dtype, or set it to 'UTC'." - " To convert to a different time zone, please use `.dt.convert_time_zone`." - ) - raise ValueError(msg) - if values_tz != "UTC" and dtype_tz is None: - warnings.warn( - "Constructing a Series with time-zone-aware " - "datetimes results in a Series with UTC time zone. " - "To silence this warning, you can filter " - "warnings of class TimeZoneAwareConstructorWarning, or " - "set 'UTC' as the time zone of your datatype.", - TimeZoneAwareConstructorWarning, - stacklevel=find_stacklevel(), - ) - return s.dt.replace_time_zone(dtype_tz or "UTC")._s - return s._s + elif ( + _check_for_numpy(value) + and isinstance(value, np.ndarray) + and len(value.shape) == 1 + ): + n_elems = len(value) + if all(len(v) == n_elems for v in values): + # can take (much) faster path if all lists are the same length + return numpy_to_pyseries( + name, + np.vstack(values), + strict=strict, + nan_to_null=nan_to_null, + ) + else: + return PySeries.new_series_list( + name, + [ + numpy_to_pyseries("", v, strict=strict, nan_to_null=nan_to_null) + for v in values + ], + strict, + ) - elif ( - _check_for_numpy(value) - and isinstance(value, np.ndarray) - and len(value.shape) == 1 - ): - n_elems = len(value) - if all(len(v) == n_elems for v in values): - # can take (much) faster path if all lists are the same length - return numpy_to_pyseries( - name, - np.vstack(values), - strict=strict, - nan_to_null=nan_to_null, - ) + elif python_dtype in (list, tuple): + if dtype is None: + return PySeries.new_from_any_values(name, values, strict=strict) + elif dtype == Object: + return PySeries.new_object(name, values, strict) + else: + if (inner_dtype := getattr(dtype, "inner", None)) is not None: + pyseries_list = [ + None + if value is None + else sequence_to_pyseries( + "", + value, + inner_dtype, + strict=strict, + nan_to_null=nan_to_null, + ) + for value in values + ] + pyseries = PySeries.new_series_list(name, pyseries_list, strict) else: - return PySeries.new_series_list( - name, - [ - numpy_to_pyseries("", v, strict=strict, nan_to_null=nan_to_null) - for v in values - ], - strict, + pyseries = PySeries.new_from_any_values_and_dtype( + name, values, dtype, strict=strict ) + if dtype != pyseries.dtype(): + pyseries = pyseries.cast(dtype, strict=False) + return pyseries - elif python_dtype in (list, tuple): - if dtype is None: - return PySeries.new_from_any_values(name, values, strict=strict) - elif dtype == Object: - return PySeries.new_object(name, values, strict) - else: - if (inner_dtype := getattr(dtype, "inner", None)) is not None: - pyseries_list = [ - None - if value is None - else sequence_to_pyseries( - "", - value, - inner_dtype, - strict=strict, - nan_to_null=nan_to_null, - ) - for value in values - ] - pyseries = PySeries.new_series_list(name, pyseries_list, strict) + elif python_dtype == pl.Series: + return PySeries.new_series_list( + name, [v._s if v is not None else None for v in values], strict + ) + + elif python_dtype == PySeries: + return PySeries.new_series_list(name, values, strict) + else: + constructor = py_type_to_constructor(python_dtype) + if constructor == PySeries.new_object: + try: + srs = PySeries.new_from_any_values(name, values, strict) + if _check_for_numpy(python_dtype, check_type=False) and isinstance( + np.bool_(True), np.generic + ): + dtype = numpy_char_code_to_dtype(np.dtype(python_dtype).char) + return srs.cast(dtype, strict=strict) else: - pyseries = PySeries.new_from_any_values_and_dtype( - name, values, dtype, strict=strict - ) - if dtype != pyseries.dtype(): - pyseries = pyseries.cast(dtype, strict=False) - return pyseries + return srs - elif python_dtype == pl.Series: - return PySeries.new_series_list( - name, [v._s if v is not None else None for v in values], strict - ) + except RuntimeError: + return PySeries.new_from_any_values(name, values, strict=strict) - elif python_dtype == PySeries: - return PySeries.new_series_list(name, values, strict) - else: - constructor = py_type_to_constructor(python_dtype) - if constructor == PySeries.new_object: - try: - srs = PySeries.new_from_any_values(name, values, strict) - if _check_for_numpy(python_dtype, check_type=False) and isinstance( - np.bool_(True), np.generic - ): - dtype = numpy_char_code_to_dtype(np.dtype(python_dtype).char) - return srs.cast(dtype, strict=strict) - else: - return srs - - except RuntimeError: - return PySeries.new_from_any_values(name, values, strict=strict) - - return _construct_series_with_fallbacks( - constructor, name, values, dtype, strict=strict - ) + return _construct_series_with_fallbacks( + constructor, name, values, dtype, strict=strict + ) def _construct_series_with_fallbacks( diff --git a/py-polars/tests/unit/constructors/test_series.py b/py-polars/tests/unit/constructors/test_series.py index aa5f0b3e7241..d34c594355ca 100644 --- a/py-polars/tests/unit/constructors/test_series.py +++ b/py-polars/tests/unit/constructors/test_series.py @@ -88,3 +88,12 @@ def test_large_timedelta(dtype: pl.DataType | None) -> None: # Microsecond precision is lost expected = [timedelta.min, timedelta.max - timedelta(microseconds=999)] assert s.to_list() == expected + + +def test_array_large_u64() -> None: + u64_max = 2**64 - 1 + values = [[u64_max]] + dtype = pl.Array(pl.UInt64, 1) + s = pl.Series(values, dtype=dtype) + assert s.dtype == dtype + assert s.to_list() == values