docs(python): Expand docstrings for to_numpy methods (pola-rs#16394)

Wouittone · Jun 22, 2024 · d936479 · d936479
1 parent eddc944
commit d936479
Show file tree

Hide file tree

Showing 2 changed files with 101 additions and 29 deletions.
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -1498,13 +1498,23 @@ def to_numpy(
         structured: bool = False,  # noqa: FBT001
         *,
         order: IndexOrder = "fortran",
-        allow_copy: bool = True,
         writable: bool = False,
+        allow_copy: bool = True,
         use_pyarrow: bool | None = None,
     ) -> np.ndarray[Any, Any]:
         """
         Convert this DataFrame to a NumPy ndarray.
 
+        This operation copies data only when necessary. The conversion is zero copy when
+        all of the following hold:
+
+        - The DataFrame is fully contiguous in memory, with all Series back-to-back and
+          all Series consisting of a single chunk.
+        - The data type is an integer or float.
+        - The DataFrame contains no null values.
+        - The `order` parameter is set to `fortran` (default).
+        - The `writable` parameter is set to `False` (default).
+
         Parameters
         ----------
         structured
@@ -1519,13 +1529,13 @@ def to_numpy(
             However, the C-like order might be more appropriate to use for downstream
             applications to prevent cloning data, e.g. when reshaping into a
             one-dimensional array.
-        allow_copy
-            Allow memory to be copied to perform the conversion. If set to `False`,
-            causes conversions that are not zero-copy to fail.
         writable
             Ensure the resulting array is writable. This will force a copy of the data
             if the array was created without copy, as the underlying Arrow data is
             immutable.
+        allow_copy
+            Allow memory to be copied to perform the conversion. If set to `False`,
+            causes conversions that are not zero-copy to fail.
 
         use_pyarrow
             Use `pyarrow.Array.to_numpy
@@ -1538,6 +1548,48 @@ def to_numpy(
 
         Examples
         --------
+        Numeric data without nulls can be converted without copying data in some cases.
+        The resulting array will not be writable.
+
+        >>> df = pl.DataFrame({"a": [1, 2, 3]})
+        >>> arr = df.to_numpy()
+        >>> arr
+        array([[1],
+               [2],
+               [3]])
+        >>> arr.flags.writeable
+        False
+
+        Set `writable=True` to force data copy to make the array writable.
+
+        >>> df.to_numpy(writable=True).flags.writeable
+        True
+
+        If the DataFrame contains different numeric data types, the resulting data type
+        will be the supertype. This requires data to be copied. Integer types with
+        nulls are cast to a float type with `nan` representing a null value.
+
+        >>> df = pl.DataFrame({"a": [1, 2, None], "b": [4.0, 5.0, 6.0]})
+        >>> df.to_numpy()
+        array([[ 1.,  4.],
+               [ 2.,  5.],
+               [nan,  6.]])
+
+        Set `allow_copy=False` to raise an error if data would be copied.
+
+        >>> s.to_numpy(allow_copy=False)  # doctest: +SKIP
+        Traceback (most recent call last):
+        ...
+        RuntimeError: copy not allowed: cannot convert to a NumPy array without copying data
+
+        Polars defaults to F-contiguous order. Use `order="c"` to force the resulting
+        array to be C-contiguous.
+
+        >>> df.to_numpy(order="c").flags.c_contiguous
+        True
+
+        DataFrames with mixed types will result in an array with an object dtype.
+
         >>> df = pl.DataFrame(
         ...     {
         ...         "foo": [1, 2, 3],
@@ -1546,28 +1598,18 @@ def to_numpy(
         ...     },
         ...     schema_overrides={"foo": pl.UInt8, "bar": pl.Float32},
         ... )
-
-        Export to a standard 2D numpy array.
-
         >>> df.to_numpy()
         array([[1, 6.5, 'a'],
                [2, 7.0, 'b'],
                [3, 8.5, 'c']], dtype=object)
 
-        Export to a structured array, which can better-preserve individual
-        column data, such as name and dtype...
+        Set `structured=True` to convert to a structured array, which can better
+        preserve individual column data such as name and data type.
 
         >>> df.to_numpy(structured=True)
         array([(1, 6.5, 'a'), (2, 7. , 'b'), (3, 8.5, 'c')],
               dtype=[('foo', 'u1'), ('bar', '<f4'), ('ham', '<U1')])
-
-        ...optionally going on to view as a record array:
-
-        >>> import numpy as np
-        >>> df.to_numpy(structured=True).view(np.recarray)
-        rec.array([(1, 6.5, 'a'), (2, 7. , 'b'), (3, 8.5, 'c')],
-                  dtype=[('foo', 'u1'), ('bar', '<f4'), ('ham', '<U1')])
-        """
+        """  # noqa: W505
         if use_pyarrow is not None:
             issue_deprecation_warning(
                 "The `use_pyarrow` parameter for `DataFrame.to_numpy` is deprecated."

diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
@@ -4427,13 +4427,13 @@ def to_numpy(
         """
         Convert this Series to a NumPy ndarray.
 
-        This operation may copy data, but is completely safe. Note that:
+        This operation copies data only when necessary. The conversion is zero copy when
+        all of the following hold:
 
-        - Data which is purely numeric AND without null values is not cloned
-        - Floating point `nan` values can be zero-copied
-        - Booleans cannot be zero-copied
-
-        To ensure that no data is copied, set `allow_copy=False`.
+        - The data type is an integer, float, `Datetime`, `Duration`, or `Array`.
+        - The Series contains no null values.
+        - The Series consists of a single chunk.
+        - The `writable` parameter is set to `False` (default).
 
         Parameters
         ----------
@@ -4466,13 +4466,43 @@ def to_numpy(
 
         Examples
         --------
-        >>> s = pl.Series("a", [1, 2, 3])
+        Numeric data without nulls can be converted without copying data.
+        The resulting array will not be writable.
+
+        >>> s = pl.Series([1, 2, 3], dtype=pl.Int8)
         >>> arr = s.to_numpy()
-        >>> arr  # doctest: +IGNORE_RESULT
-        array([1, 2, 3], dtype=int64)
-        >>> type(arr)
-        <class 'numpy.ndarray'>
-        """
+        >>> arr
+        array([1, 2, 3], dtype=int8)
+        >>> arr.flags.writeable
+        False
+
+        Set `writable=True` to force data copy to make the array writable.
+
+        >>> s.to_numpy(writable=True).flags.writeable
+        True
+
+        Integer Series containing nulls will be cast to a float type with `nan`
+        representing a null value. This requires data to be copied.
+
+        >>> s = pl.Series([1, 2, None], dtype=pl.UInt16)
+        >>> s.to_numpy()
+        array([ 1.,  2., nan], dtype=float32)
+
+        Set `allow_copy=False` to raise an error if data would be copied.
+
+        >>> s.to_numpy(allow_copy=False)  # doctest: +SKIP
+        Traceback (most recent call last):
+        ...
+        RuntimeError: copy not allowed: cannot convert to a NumPy array without copying data
+
+        Series of data type `Array` and `Struct` will result in an array with more than
+        one dimension.
+
+        >>> s = pl.Series([[1, 2, 3], [4, 5, 6]], dtype=pl.Array(pl.Int64, 3))
+        >>> s.to_numpy()
+        array([[1, 2, 3],
+               [4, 5, 6]])
+        """  # noqa: W505
         if zero_copy_only is not None:
             issue_deprecation_warning(
                 "The `zero_copy_only` parameter for `Series.to_numpy` is deprecated."