diff --git a/.gitignore b/.gitignore index be92b9c482..28e08b3686 100644 --- a/.gitignore +++ b/.gitignore @@ -32,4 +32,4 @@ log/ # docs autogen -/docs/source/api_docs/*/ +/docs/source/api_docs/doc_gen/ diff --git a/daft/__init__.py b/daft/__init__.py index 8ad5025a18..e18ac11455 100644 --- a/daft/__init__.py +++ b/daft/__init__.py @@ -77,7 +77,7 @@ class daft: from_ray_dataset, ) from daft.dataframe import DataFrame -from daft.datatype import DataType +from daft.datatype import DataType, ImageMode from daft.expressions import col, lit from daft.io import from_glob_path, read_csv, read_json, read_parquet from daft.series import Series @@ -98,6 +98,7 @@ class daft: "DataFrame", "col", "DataType", + "ImageMode", "lit", "Series", "register_viz_hook", diff --git a/daft/daft.pyi b/daft/daft.pyi index 159c3bf7c6..739171cc77 100644 --- a/daft/daft.pyi +++ b/daft/daft.pyi @@ -6,31 +6,45 @@ class ImageMode(Enum): Supported image modes for Daft's image type. """ - # 8-bit grayscale + #: 8-bit grayscale L: int - # 8-bit grayscale + alpha + + #: 8-bit grayscale + alpha LA: int - # 8-bit RGB + + #: 8-bit RGB RGB: int - # 8-bit RGB + alpha + + #: 8-bit RGB + alpha RGBA: int - # 16-bit grayscale + + #: 16-bit grayscale L16: int - # 16-bit grayscale + alpha + + #: 16-bit grayscale + alpha LA16: int - # 16-bit RGB + + #: 16-bit RGB RGB16: int - # 16-bit RGB + alpha + + #: 16-bit RGB + alpha RGBA16: int - # 32-bit floating RGB + + #: 32-bit floating RGB RGB32F: int - # 32-bit floating RGB + alpha + + #: 32-bit floating RGB + alpha RGBA32F: int @staticmethod def from_mode_string(mode: str) -> ImageMode: """ Create an ImageMode from its string representation. + + Args: + mode: String representation of the mode. This is the same as the enum + attribute name, e.g. ``ImageMode.from_mode_string("RGB")`` would + return ``ImageMode.RGB``. """ ... diff --git a/daft/datatype.py b/daft/datatype.py index 539e0c20d0..b682d4084b 100644 --- a/daft/datatype.py +++ b/daft/datatype.py @@ -176,6 +176,28 @@ def embedding(cls, name: str, dtype: DataType, size: int) -> DataType: def image( cls, mode: str | ImageMode | None = None, height: int | None = None, width: int | None = None ) -> DataType: + """Create an Image DataType: image arrays contain (height, width, channel) ndarrays of pixel values. + + Each image in the array has an :class:`~daft.ImageMode`, which describes the pixel dtype (e.g. uint8) and + the number of image channels/bands and their logical interpretation (e.g. RGB). + + If the height, width, and mode are the same for all images in the array, specifying them when constructing + this type is advised, since that will allow Daft to create a more optimized physical representation + of the image array. + + If the height, width, or mode may vary across images in the array, leaving these fields unspecified when + creating this type will cause Daft to respresent this image array as a heterogeneous collection of images, + where each image can have a different mode, height, and width. This is much more flexible, but will result + in a less compact representation and may be make some operations less efficient. + + Args: + mode: The mode of the image. By default, this is inferred from the underlying data. + If height and width are specified, the mode must also be specified. + height: The height of the image. By default, this is inferred from the underlying data. + Must be specified if the width is specified. + width: The width of the image. By default, this is inferred from the underlying data. + Must be specified if the width is specified. + """ if isinstance(mode, str): mode = ImageMode.from_mode_string(mode) if height is not None and width is not None: diff --git a/docs/source/api_docs/context.rst b/docs/source/api_docs/context.rst index 5c10a46f9a..e7d94488c8 100644 --- a/docs/source/api_docs/context.rst +++ b/docs/source/api_docs/context.rst @@ -8,7 +8,7 @@ Control the execution backend that Daft will run on by calling these functions o .. autosummary:: :nosignatures: - :toctree: configuration_functions + :toctree: doc_gen/configuration_functions daft.context.set_runner_py daft.context.set_runner_ray diff --git a/docs/source/api_docs/dataframe.rst b/docs/source/api_docs/dataframe.rst index 0b72f1e229..5c7145aca4 100644 --- a/docs/source/api_docs/dataframe.rst +++ b/docs/source/api_docs/dataframe.rst @@ -5,7 +5,7 @@ DataFrame .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods DataFrame @@ -26,7 +26,7 @@ Manipulating Columns .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.select daft.DataFrame.with_column @@ -41,7 +41,7 @@ Filtering Rows .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.distinct daft.DataFrame.where @@ -54,7 +54,7 @@ Reordering .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.sort daft.DataFrame.repartition @@ -66,7 +66,7 @@ Combining .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.join @@ -81,7 +81,7 @@ Aggregations .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.groupby daft.DataFrame.sum @@ -102,7 +102,7 @@ Materialization .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.collect @@ -113,7 +113,7 @@ Visualization .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.show @@ -127,7 +127,7 @@ Writing Data .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.write_parquet daft.DataFrame.write_csv @@ -139,7 +139,7 @@ Integrations .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.to_pandas daft.DataFrame.to_ray_dataset @@ -150,7 +150,7 @@ Schema and Lineage .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.DataFrame.explain daft.DataFrame.schema diff --git a/docs/source/api_docs/datatype.rst b/docs/source/api_docs/datatype.rst index 33a498c350..bd4cf3e1cc 100644 --- a/docs/source/api_docs/datatype.rst +++ b/docs/source/api_docs/datatype.rst @@ -5,7 +5,7 @@ DataTypes .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType @@ -30,7 +30,7 @@ Numeric .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.int8 daft.DataType.int16 @@ -51,7 +51,7 @@ Logical .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.bool @@ -63,7 +63,7 @@ Strings .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.binary daft.DataType.string @@ -76,7 +76,7 @@ Temporal .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.date @@ -88,7 +88,7 @@ Nested .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.list daft.DataType.fixed_size_list @@ -100,7 +100,7 @@ Python .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.python @@ -115,7 +115,7 @@ Machine Learning .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.embedding @@ -124,15 +124,25 @@ Computer Vision .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.image +.. autosummary:: + :nosignatures: + + ImageMode + Miscellaneous ^^^^^^^^^^^^^ .. autosummary:: :nosignatures: - :toctree: datatype_methods + :toctree: doc_gen/datatype_methods daft.DataType.null + +.. toctree:: + :hidden: + + datatype_image_mode/daft.ImageMode diff --git a/docs/source/api_docs/datatype_image_mode/daft.ImageMode.from_mode_string.rst b/docs/source/api_docs/datatype_image_mode/daft.ImageMode.from_mode_string.rst new file mode 100644 index 0000000000..c0b9a4392f --- /dev/null +++ b/docs/source/api_docs/datatype_image_mode/daft.ImageMode.from_mode_string.rst @@ -0,0 +1,6 @@ +daft.ImageMode.from\_mode\_string +================================= + +.. currentmodule:: daft + +.. automethod:: ImageMode.from_mode_string \ No newline at end of file diff --git a/docs/source/api_docs/datatype_image_mode/daft.ImageMode.rst b/docs/source/api_docs/datatype_image_mode/daft.ImageMode.rst new file mode 100644 index 0000000000..6699b6f221 --- /dev/null +++ b/docs/source/api_docs/datatype_image_mode/daft.ImageMode.rst @@ -0,0 +1,27 @@ +daft.ImageMode +============== + +.. currentmodule:: daft + +.. autoclass:: ImageMode + + .. autosummary:: + :toctree: + + ~ImageMode.from_mode_string + + .. rubric:: Variants + + .. autosummary:: + + ~ImageMode.L + ~ImageMode.LA + ~ImageMode.RGB + ~ImageMode.RGBA + ~ImageMode.L16 + ~ImageMode.LA16 + ~ImageMode.RGB16 + ~ImageMode.RGBA16 + ~ImageMode.RGB32F + ~ImageMode.RGBA32F + \ No newline at end of file diff --git a/docs/source/api_docs/expressions.rst b/docs/source/api_docs/expressions.rst index ff2ab700c0..a61825a5f1 100644 --- a/docs/source/api_docs/expressions.rst +++ b/docs/source/api_docs/expressions.rst @@ -5,7 +5,7 @@ Expressions .. autosummary:: :nosignatures: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.Expression @@ -14,7 +14,7 @@ Expression Constructors .. autosummary:: :nosignatures: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.DataFrame.__getitem__ daft.expressions.col @@ -31,7 +31,7 @@ Numeric Operations on numbers (floats and integers) .. autosummary:: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.Expression.__abs__ daft.expressions.Expression.__add__ @@ -46,7 +46,7 @@ Logical Operations on logical expressions (True/False booleans) .. autosummary:: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.Expression.__invert__ daft.expressions.Expression.__and__ @@ -61,7 +61,7 @@ Comparisons Comparing expressions and values, returning a logical expression .. autosummary:: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.Expression.__lt__ daft.expressions.Expression.__le__ @@ -83,7 +83,7 @@ Operations on strings, accessible through the ``Expression.float`` method access Example: ``e1.float.is_nan()`` .. autosummary:: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.expressions.ExpressionFloatNamespace.is_nan @@ -97,7 +97,7 @@ Operations on strings, accessible through the ``Expression.str`` method accessor Example: ``e1.str.concat(e2)`` .. autosummary:: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.expressions.ExpressionStringNamespace.concat daft.expressions.expressions.ExpressionStringNamespace.contains @@ -116,7 +116,7 @@ Example: ``e.dt.day()`` .. autosummary:: :nosignatures: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.expressions.ExpressionDatetimeNamespace.day daft.expressions.expressions.ExpressionDatetimeNamespace.month @@ -134,7 +134,7 @@ Example: ``e.url.download()`` .. autosummary:: :nosignatures: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.expressions.ExpressionUrlNamespace.download @@ -149,7 +149,7 @@ Example: ``e.image.resize()`` .. autosummary:: :nosignatures: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.expressions.ExpressionImageNamespace.resize daft.expressions.expressions.ExpressionImageNamespace.decode @@ -160,7 +160,7 @@ Changing Column Names/Types .. autosummary:: :nosignatures: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.Expression.alias daft.expressions.Expression.cast @@ -170,6 +170,6 @@ Running Python Functions .. autosummary:: :nosignatures: - :toctree: expression_methods + :toctree: doc_gen/expression_methods daft.expressions.Expression.apply diff --git a/docs/source/api_docs/groupby.rst b/docs/source/api_docs/groupby.rst index 2e0513165f..342132e218 100644 --- a/docs/source/api_docs/groupby.rst +++ b/docs/source/api_docs/groupby.rst @@ -9,7 +9,7 @@ Calling ``DataFrame.groupby`` returns a ``GroupedDataFrame`` object which is a v .. autosummary:: :nosignatures: - :toctree: dataframe_methods + :toctree: doc_gen/dataframe_methods daft.dataframe.dataframe.GroupedDataFrame.sum daft.dataframe.dataframe.GroupedDataFrame.mean diff --git a/docs/source/api_docs/input_output.rst b/docs/source/api_docs/input_output.rst index df5ba0cd75..cec6ddb678 100644 --- a/docs/source/api_docs/input_output.rst +++ b/docs/source/api_docs/input_output.rst @@ -15,7 +15,7 @@ Python Objects .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.from_pylist daft.from_pydict @@ -26,12 +26,12 @@ Arrow .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.from_arrow daft.DataFrame.to_arrow @@ -41,7 +41,7 @@ Pandas .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.from_pandas daft.DataFrame.to_pandas @@ -51,7 +51,7 @@ File Paths .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.from_glob_path @@ -65,7 +65,7 @@ Parquet .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.read_parquet daft.DataFrame.write_parquet @@ -75,7 +75,7 @@ CSV .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.read_csv daft.DataFrame.write_csv @@ -85,7 +85,7 @@ JSON .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.read_json @@ -99,7 +99,7 @@ Ray Datasets .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.from_ray_dataset daft.DataFrame.to_ray_dataset @@ -109,7 +109,7 @@ Dask .. autosummary:: :nosignatures: - :toctree: io_functions + :toctree: doc_gen/io_functions daft.from_dask_dataframe daft.DataFrame.to_dask_dataframe diff --git a/docs/source/learn/user_guides/datatypes.rst b/docs/source/learn/user_guides/datatypes.rst index 3177edd051..3f0e163e60 100644 --- a/docs/source/learn/user_guides/datatypes.rst +++ b/docs/source/learn/user_guides/datatypes.rst @@ -111,8 +111,8 @@ Complex Types Daft supports many more interesting complex DataTypes, for example: -* :meth:`~daft.DataType.embedding()`: Lower-dimensional vector representation of data (e.g. words) -* :meth:`~daft.DataType.image()`: NHWC images +* :meth:`DataType.embedding() `: Lower-dimensional vector representation of data (e.g. words) +* :meth:`DataType.image() `: NHWC images Daft abstracts away the in-memory representation of your data and provides kernels for many common operations on top of these data types. For supported image operations see the :ref:`image expressions API reference `. diff --git a/src/datatypes/image_mode.rs b/src/datatypes/image_mode.rs index 8a039d002c..d0e8c5331c 100644 --- a/src/datatypes/image_mode.rs +++ b/src/datatypes/image_mode.rs @@ -13,16 +13,16 @@ use crate::{ /// Supported image modes for Daft's image type. /// -/// L - 8-bit grayscale -/// LA - 8-bit grayscale + alpha -/// RGB - 8-bit RGB -/// RGBA - 8-bit RGB + alpha -/// L16 - 16-bit grayscale -/// LA16 - 16-bit grayscale + alpha -/// RGB16 - 16-bit RGB -/// RGBA16 - 16-bit RGB + alpha -/// RGB32F - 32-bit floating RGB -/// RGBA32F - 32-bit floating RGB + alpha +/// | L - 8-bit grayscale +/// | LA - 8-bit grayscale + alpha +/// | RGB - 8-bit RGB +/// | RGBA - 8-bit RGB + alpha +/// | L16 - 16-bit grayscale +/// | LA16 - 16-bit grayscale + alpha +/// | RGB16 - 16-bit RGB +/// | RGBA16 - 16-bit RGB + alpha +/// | RGB32F - 32-bit floating RGB +/// | RGBA32F - 32-bit floating RGB + alpha #[allow(clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Hash, FromPrimitive)] #[cfg_attr(feature = "python", pyclass)] @@ -43,6 +43,11 @@ pub enum ImageMode { #[pymethods] impl ImageMode { /// Create an ImageMode from its string representation. + /// + /// Args: + /// mode: String representation of the mode. This is the same as the enum + /// attribute name, e.g. ``ImageMode.from_mode_string("RGB")`` would + /// return ``ImageMode.RGB``. #[staticmethod] pub fn from_mode_string(mode: &str) -> PyResult { Self::from_str(mode).map_err(|e| PyValueError::new_err(e.to_string()))