From 57746b594a22bb91e2e43f319197c2fab8bc33e9 Mon Sep 17 00:00:00 2001 From: Thomas Zeutschler Date: Fri, 6 Sep 2024 21:37:00 +0200 Subject: [PATCH 1/2] slice refactored --- docs/{use-cases.md => best-practise.md} | 0 docs/class-dimension.md | 2 +- docs/class-measure.md | 2 +- mkdocs.yml | 2 +- pages/404.html | 4 +- pages/404/index.html | 4 +- pages/advanced-usage/index.html | 4 +- pages/basic-usage/index.html | 4 +- pages/best-practises/index.html | 4 +- pages/blog001_why_cpd/index.html | 4 +- pages/class-cube/index.html | 4 +- pages/class-dimension/index.html | 18 +- pages/class-measure/index.html | 18 +- pages/class-pandas-ext/index.html | 4 +- pages/class-schema/index.html | 4 +- pages/contributing/index.html | 4 +- pages/faq/index.html | 10 +- pages/index.html | 11 +- pages/installation/index.html | 4 +- pages/license/index.html | 4 +- pages/method-cubed/index.html | 4 +- pages/readme_mkdocs/index.html | 4 +- pages/release-notes/index.html | 4 +- pages/search/search_index.json | 634 +++++++++++++++++++++++- pages/sitemap.xml | 10 +- pages/sitemap.xml.gz | Bin 396 -> 392 bytes pages/thomas/index.html | 4 +- 27 files changed, 697 insertions(+), 74 deletions(-) rename docs/{use-cases.md => best-practise.md} (100%) diff --git a/docs/use-cases.md b/docs/best-practise.md similarity index 100% rename from docs/use-cases.md rename to docs/best-practise.md diff --git a/docs/class-dimension.md b/docs/class-dimension.md index 0c17cda..a859f8f 100644 --- a/docs/class-dimension.md +++ b/docs/class-dimension.md @@ -1,4 +1,4 @@ -# SalesDataModel class +# Dimension class ::: cubedpandas.schema.Dimension diff --git a/docs/class-measure.md b/docs/class-measure.md index 784ae67..61c9a79 100644 --- a/docs/class-measure.md +++ b/docs/class-measure.md @@ -1,4 +1,4 @@ -# SalesDataModel class +# Measure class ::: cubedpandas.schema.Measure diff --git a/mkdocs.yml b/mkdocs.yml index 1687f2d..ef10d6b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -111,7 +111,7 @@ plugins: nav: - Home: - 'Welcome': 'index.md' - - 'Use Cases': 'use-cases.md' + - 'Best Practise': 'best-practise.md' - 'FAQ': 'faq.md' - 'Installation': 'installation.md' - 'Basic Usage': 'basic-usage.md' diff --git a/pages/404.html b/pages/404.html index 865aead..775c32b 100644 --- a/pages/404.html +++ b/pages/404.html @@ -388,11 +388,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/404/index.html b/pages/404/index.html index 4b28025..4ad41e4 100644 --- a/pages/404/index.html +++ b/pages/404/index.html @@ -395,11 +395,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/advanced-usage/index.html b/pages/advanced-usage/index.html index 4073032..b068142 100644 --- a/pages/advanced-usage/index.html +++ b/pages/advanced-usage/index.html @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/basic-usage/index.html b/pages/basic-usage/index.html index 4ee9e6a..5089b62 100644 --- a/pages/basic-usage/index.html +++ b/pages/basic-usage/index.html @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/best-practises/index.html b/pages/best-practises/index.html index a3b5d9d..693d609 100644 --- a/pages/best-practises/index.html +++ b/pages/best-practises/index.html @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/blog001_why_cpd/index.html b/pages/blog001_why_cpd/index.html index 082c8b0..a2e79e3 100644 --- a/pages/blog001_why_cpd/index.html +++ b/pages/blog001_why_cpd/index.html @@ -395,11 +395,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/class-cube/index.html b/pages/class-cube/index.html index 11c90b8..6eb6278 100644 --- a/pages/class-cube/index.html +++ b/pages/class-cube/index.html @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/class-dimension/index.html b/pages/class-dimension/index.html index ca460ed..3965848 100644 --- a/pages/class-dimension/index.html +++ b/pages/class-dimension/index.html @@ -90,9 +90,9 @@
    - - - + + + Skip to content @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise @@ -1273,14 +1273,10 @@
    - - - - - -

    SalesDataModel class

    +

    Dimension class

    diff --git a/pages/class-measure/index.html b/pages/class-measure/index.html index 52ffcf9..70b51d6 100644 --- a/pages/class-measure/index.html +++ b/pages/class-measure/index.html @@ -90,9 +90,9 @@
    - - - + + + Skip to content @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise @@ -1057,14 +1057,10 @@
    - - - - - -

    SalesDataModel class

    +

    Measure class

    diff --git a/pages/class-pandas-ext/index.html b/pages/class-pandas-ext/index.html index ce1f8bf..3be3725 100644 --- a/pages/class-pandas-ext/index.html +++ b/pages/class-pandas-ext/index.html @@ -395,11 +395,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/class-schema/index.html b/pages/class-schema/index.html index 22ac345..e6b892a 100644 --- a/pages/class-schema/index.html +++ b/pages/class-schema/index.html @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/contributing/index.html b/pages/contributing/index.html index 91ffe1b..a787085 100644 --- a/pages/contributing/index.html +++ b/pages/contributing/index.html @@ -401,11 +401,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/faq/index.html b/pages/faq/index.html index 229ab87..59c265e 100644 --- a/pages/faq/index.html +++ b/pages/faq/index.html @@ -13,9 +13,9 @@ - - - + + + @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/index.html b/pages/index.html index aa1d240..efe77f1 100644 --- a/pages/index.html +++ b/pages/index.html @@ -13,10 +13,9 @@ - - - - + + + @@ -493,11 +492,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/installation/index.html b/pages/installation/index.html index 6525b78..d718cac 100644 --- a/pages/installation/index.html +++ b/pages/installation/index.html @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/license/index.html b/pages/license/index.html index 8a2f491..9e1d177 100644 --- a/pages/license/index.html +++ b/pages/license/index.html @@ -401,11 +401,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/method-cubed/index.html b/pages/method-cubed/index.html index e988e00..f965cc3 100644 --- a/pages/method-cubed/index.html +++ b/pages/method-cubed/index.html @@ -404,11 +404,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/readme_mkdocs/index.html b/pages/readme_mkdocs/index.html index 7aae1b2..76176db 100644 --- a/pages/readme_mkdocs/index.html +++ b/pages/readme_mkdocs/index.html @@ -395,11 +395,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/release-notes/index.html b/pages/release-notes/index.html index 597d07b..844e794 100644 --- a/pages/release-notes/index.html +++ b/pages/release-notes/index.html @@ -399,11 +399,11 @@
  • - + - Use Cases + Best Practise diff --git a/pages/search/search_index.json b/pages/search/search_index.json index 7d7eb89..eb60fcb 100644 --- a/pages/search/search_index.json +++ b/pages/search/search_index.json @@ -1 +1,633 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"CubedPandas Documentation","text":"

    Welcome to the CubedPandas Documentation site. Here you will find all the information you need to get started with CubedPandas, a powerful and easy-to-use Python library for working with Pandas dataframes.

    Note

    CubedPandas is in an early stage of development, your Ideas, Issues and Feedback are very welcome to make CubedPandas even more awesome. Many thanks!

    "},{"location":"#olap-comfort-meets-pandas-power","title":"OLAP comfort meets Pandas power!","text":"

    CubedPandas offer a new easy, fast & fun approach to navigate and analyze Pandas dataframes. CubedPandas is inspired by the powerful concepts of OLAP (Online Analytical Processing) and MDX (Multi-Dimensional Expressions) and aims to bring the comfort and power of OLAP to Pandas dataframes.

    For novice users, CubedPandas can be a great help to get started with Pandas, as it hides some of the complexity and verbosity of Pandas dataframes. For experienced users, CubedPandas can be a productivity booster, as it allows you to write more compact, readable and maintainable code. Just to give you a first idea, this Pandas code

    # Pandas: calculate the total revenue of all hybrid Audi cars\nvalue = df.loc[(df['make'] == 'Audi') & (df['engine'] == 'hybrid'), 'price'].sum()\n

    turns into this CubedPandas code

    # CubedPandas: calculate the total revenue of all hybrid Audi cars\nvalue = df.cubed.Audi.hybrid.price\n

    As CubedPandas does not duplicate data or modifies the underlying dataframe and does not add any performance penalty - in some cases can even boost Pandas performance by factors - it can be used in production without any concerns and should be of great help in many use cases.

    In Jupyter notebooks, CubedPandas will really start to shine. For further information, please visit the CubedPandas Documentation or try the included samples.

    "},{"location":"#getting-started","title":"Getting Started","text":"

    CubedPandas is available on pypi.org (https://pypi.org/project/cubedpandas/) and can be installed by

    pip install cubedpandas\n

    Using CubedPandas is as simple as wrapping any Pandas dataframe into a cube like this:

    import pandas as pd\nfrom cubedpandas import cubed\n\n# Create a dataframe with some sales data\ndf = pd.DataFrame({\"product\":  [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"channel\":  [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"customer\": [\"Peter\",  \"Peter\",  \"Paul\",   \"Paul\",   \"Mary\",   \"Mary\"  ],\n                   \"mailing\":  [True,     False,    True,     False,    True,     False   ],\n                   \"revenue\":  [100,      150,      300,      200,      250,      350     ],\n                   \"cost\":     [50,       90,       150,      100,      150,      175     ]})\n\ncdf = cubed(df)  # Wrapp your dataframe into a cube and start using it!\n

    CubedPandas automatically infers a multi-dimensional schema from your Pandas dataframe which defines a virtual Cube over the dataframe. By default, numeric columns of the dataframe are considered as Measures - the numeric values to analyse & aggregate - all other columns are considered as Dimensions - to filter, navigate and view the data. The individual values in a dimension column are called the Members of the dimension. In the example above, column channel becomes a dimension with the two members Online and Retail, revenue and cost are our measures.

    Although rarely required, you can also define your own schema. Schemas are quite powerful and flexible, as they will allow you to define dimensions and measures, aliases and (planned for upcoming releases) also custom aggregations, business logic, number formating, linked cubes (star-schemas) and much more.

    "},{"location":"#context-please-so-i-will-give-you-data","title":"Context please, so I will give you data!","text":"

    One key feature of CubePandas is an easy & intuitive access to individual Data Cells in multi-dimensional data space. To do so, you'll need to define a multi-dimensional Context so CubedPandas will evaluate, aggregate (sum by default) and return the requested value from the underlying dataframe.

    Context objects behave like normal numbers (float, int), so you can use them directly in arithmetic operations. In the following examples, all addresses will refer to the exactly same rows from the dataframe and thereby all return the same value of 100.

    # Let Pandas set the scene...\na = df.loc[(df[\"product\"] == \"Apple\") & (df[\"channel\"] == \"Online\") & (df[\"customer\"] == \"Peter\"), \"revenue\"].sum()\n\n# Can we do better with CubedPandas? \nb = cdf[\"product:Apple\", \"channel:Online\", \"customer:Peter\"].revenue  # explicit, readable, flexible and fast  \nc = cdf.product[\"Apple\"].channel[\"Online\"].customer[\n    \"Peter\"].revenue  # ...better, if column names are Python-compliant  \nd = cdf.product.Apple.channel.Online.customer.Peter.revenue  # ...even better, if member names are Python-compliant\n\n# If there are no ambiguities in your dataframe - what can be easily checked - then you can use this shorthand forms:\ne = cdf[\"Online\", \"Apple\", \"Peter\", \"revenue\"]\nf = cdf.Online.Apple.Peter.revenue\ng = cdf.Online.Apple.Peter  # as 'revenue' is the default (first) measure of the cube, it can be omitted\n\nassert a == b == c == d == e == f == g == 100\n

    Context objects also act as filters on the underlying dataframe. So you can use also CubedPandas for fast and easy filtering only, e.g. like this:

    df = df.cubed.product[\"Apple\"].channel[\"Online\"].df\ndf = df.cubed.Apple.Online.df  # short form, if column names are Python-compliant and there are no ambiguities\n
    "},{"location":"#pivot-drill-down-slice-dice","title":"Pivot, Drill-Down, Slice & Dice","text":"

    The Pandas pivot table is a very powerful tool. Unfortunately, it is quite verbose and very hard to master. CubedPandas offers the slice method to create pivot tables in a more intuitive and easy way, e.g. by default

    # Let's create a simple pivot table with the revenue for dimensions products and channels\ncdf.slice(rows=\"product\", columns=\"channel\", measures=\"revenue\")\n

    For further information, samples and a complete feature list as well as valuable tips and tricks, please visit the CubedPandas Documentation.

    "},{"location":"#your-feedback-ideas-and-support-are-very-welcome","title":"Your feedback, ideas and support are very welcome!","text":"

    Please help improve and extend CubedPandas with your feedback & ideas and use the CubedPandas GitHub Issues to request new features and report bugs. For general questions, discussions and feedback, please use the CubedPandas GitHub Discussions.

    If you have fallen in love with CubedPandas or find it otherwise valuable, please consider to become a sponsor of the CubedPandas project so we can push the project forward faster and make CubePandas even more awesome.

    ...happy cubing!

    "},{"location":"404/","title":"404","text":""},{"location":"404/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"advanced-usage/","title":"Advanced Usage","text":""},{"location":"advanced-usage/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"basic-usage/","title":"Basic Usage","text":""},{"location":"basic-usage/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"become-a-sponsor/","title":"Become a sponsor","text":""},{"location":"become-a-sponsor/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"best-practises/","title":"Best Practises","text":""},{"location":"best-practises/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"blog001_why_cpd/","title":"Blog001 why cpd","text":""},{"location":"blog001_why_cpd/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"class-context/","title":"Generator class","text":"

    Bases: SupportsFloat

    A context represents a multi-dimensional data context or area from within a cube. Context objects can be used to navigate and access the data of a cube and thereby the underlying dataframe.

    Cells behave like Python floats and return a numeric aggregation of the underlying data. They are intended to be used in mathematical operations.

    Samples

    cdf = cubed(df) value = cdf.A + cdf.B / 2 200 cdf.A *= 2

    "},{"location":"class-context/#cubedpandas.context.context.Context.function","title":"function: ContextFunction property","text":"

    Returns:

    • ContextFunction \u2013

      The aggregation function that will be applied to the current context.

    "},{"location":"class-context/#cubedpandas.context.context.Context.value","title":"value property writable","text":"

    Returns:

    • \u2013

      The sum value of the current context from the underlying cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.numeric_value","title":"numeric_value: float property","text":"

    Returns:

    • float \u2013

      The numerical value of the current context from the underlying cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.cube","title":"cube: Cube property","text":"

    Returns:

    • Cube \u2013

      The Cube object the Context belongs to.

    "},{"location":"class-context/#cubedpandas.context.context.Context.dimension","title":"dimension: Dimension property","text":"

    Returns:

    • Dimension \u2013

      The Cube object the Context belongs to.

    "},{"location":"class-context/#cubedpandas.context.context.Context.parent","title":"parent: Context property","text":"

    Returns:

    • Context \u2013

      The parent Context of the current Context. If the current Context is the root Context of the cube,

    • Context \u2013

      then the parent Context will be None.

    "},{"location":"class-context/#cubedpandas.context.context.Context.df","title":"df: pd.DataFrame property","text":"

    Returns: Returns a new Pandas dataframe with all column of the underlying dataframe of the Cube, but only with the rows that are represented by the current context.

    The returned dataframe is always a copy of the original dataframe, even if the context is not filtering any rows from the underlying dataframe. The returned dataframe can be used for further processing outside the cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.address","title":"address: any property","text":"

    Returns:

    • any \u2013

      The partial address of the context, as defined by the user

    • any \u2013

      This does not include the addresses defined by predecessor

    • any \u2013

      cells down to the cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.cube_address","title":"cube_address: str property","text":"

    Returns:

    • str \u2013

      The full address of the context, including all predecessor

    • str \u2013

      cells down to the cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.measure","title":"measure: Measure property writable","text":"

    Returns:

    • Measure \u2013

      The Measure object the Context is currently referring to.

    • Measure \u2013

      The measure refers to a column in the underlying dataframe

    • Measure \u2013

      that is used to calculate the value of the context.

    "},{"location":"class-context/#cubedpandas.context.context.Context.row_mask","title":"row_mask: np.ndarray | None property","text":"

    Returns:

    • ndarray | None \u2013

      The row mask of the context. The row mask is represented by a Numpy ndarray

    • ndarray | None \u2013

      of the indexes of the rows represented by the current context. The row mask can be used

    • ndarray | None \u2013

      for subsequent processing of the underlying dataframe outside the cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.member_mask","title":"member_mask: np.ndarray | None property","text":"

    Returns:

    • ndarray | None \u2013

      The member mask of the context. If the context refers to a member or a set of members from a dimension.

    • ndarray | None \u2013

      then a Numpy ndarray containing the indexes of the rows representing the members is returned.

    • ndarray | None \u2013

      None is returned otherwise.

    • ndarray | None \u2013

      The row mask can be used for subsequent processing of the underlying dataframe outside the cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.row_mask_inverse","title":"row_mask_inverse: np.ndarray property","text":"

    Returns:

    • ndarray \u2013

      The inverted row mask of the context. The inverted row mask is represented by a Numpy ndarray

    • ndarray \u2013

      of the indexes of the rows NOT represented by the current context. The inverted row mask

    • ndarray \u2013

      can be used for subsequent processing of the underlying dataframe outside the cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.__init__","title":"__init__(cube, address, parent=None, row_mask=None, member_mask=None, measure=None, dimension=None, function=ContextFunction.SUM, resolve=True, filtered=False, dynamic_attribute=False)","text":"

    Initializes a new Context object. For internal use only. Raises: ValueError: If the address is invalid and does not refer to a dimension, member or measure of the cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.set_value","title":"set_value(value, allocation_function=ContextAllocation.DISTRIBUTE)","text":"

    Writes a value to the current context of the cube down to the underlying dataframe. The allocation method can be chosen.

    Parameters:

    • value \u2013

      The value to be written to the cube.

    • allocation_function (ContextAllocation, default: DISTRIBUTE ) \u2013

      The allocation function to be used for writing the value to the cube.

    Returns: The new value of the current context from the underlying cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.top","title":"top(n)","text":"

    Returns the top n members of the current context. Args: n: The number of top members to be returned. Returns: A list of the top n members of the current context.

    "},{"location":"class-context/#cubedpandas.context.context.Context.bottom","title":"bottom(n)","text":"

    Returns the bottom n members of the current context. Args: n: The number of bottom members to be returned. Returns: A list of the bottom n members of the current context.

    "},{"location":"class-context/#cubedpandas.context.context.Context.__getattr__","title":"__getattr__(name)","text":"

    Dynamically resolves member from the cube and predecessor cells.

    "},{"location":"class-context/#cubedpandas.context.context.Context.__getitem__","title":"__getitem__(address)","text":"

    Returns a nested context of the cube and for a given address. Subsequent nested cells can bee seen as subsequent filters upon the underlying dataframe.

    Parameters:

    • address \u2013

      A valid cube address. Please refer the documentation for further details.

    Returns:

    • \u2013

      A Context object that represents the cube data related to the address

    • \u2013

      and all predecessor cells down to the cube.

    Raises:

    • ValueError \u2013

      If the address is not valid or can not be resolved.

    "},{"location":"class-context/#cubedpandas.context.context.Context.__setitem__","title":"__setitem__(address, value)","text":"

    Sets a value for a given address in the cube. Args: address: A valid cube address. Please refer the documentation for further details. value: The value to be set for the data represented by the address. Raises: PermissionError: If write back is attempted on a read-only Cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.__delitem__","title":"__delitem__(address)","text":"

    Deletes the records represented by the given address from the underlying dataframe of the cube. Args: address: A valid cube address. Please refer the documentation for further details. Raises: PermissionError: If write back is attempted on a read-only Cube.

    "},{"location":"class-context/#cubedpandas.context.context.Context.slice","title":"slice(rows=None, columns=None, config=None)","text":"

    Returns a new slice for the context. A slice represents a table-alike view to data in the cube. Typically, a slice has rows, columns and filters, comparable to an Excel PivotTable. Useful for printing in Jupyter, visual data analysis and reporting purposes. Slices can be easily 'navigated' by setting and changing rows, columns and filters.

    Please refer to the documentation of the Slice class for further details.

    Parameters:

    • rows \u2013

      The rows of the slice. Can be one or more dimensions with or without a member definition, or no dimension.

    • columns \u2013

      The columns of the slice. Can be one or more dimensions with or without a member definition, or no dimension.

    • filters \u2013

      The filters of the slice. Can be one or more dimensions with or without a member definition, or no dimension.

    • config \u2013

      (optional) A slice configuration as a dictionary, a json string or a path to an existing file containing the configuration. Slice configurations can be used to define a more complex layout. Please refer to the documentation of the Slice class for further details.

    Samples

    cdf = cubed(df) cdf.slice(rows=\"product\", columns=\"region\", filters={\"year\": 2020})

    "},{"location":"class-context/#cubedpandas.context.context.Context.slice--year-2000","title":"year: 2000","text":""},{"location":"class-context/#cubedpandas.context.context.Context.slice--all-north-south","title":"| | (all) | North | South |","text":"

    | (all) | 550 | 300 | 250 | | Apple | 200 | 100 | 100 | | Banana | 350 | 200 | 150 |

    "},{"location":"class-context/#cubedpandas.context.context.Context.filter","title":"filter(filter)","text":"

    Filters the current context by a given filter expression, criteria or callable function. Args: filter: The filter expression, criteria or callable function to be used for filtering the context. Returns: A new context with the filtered data.

    "},{"location":"class-cube/","title":"Generator class","text":"

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back. A schema, that defines the dimensions and measures of the Cube, can either be inferred automatically from the underlying dataframe (default) or defined explicitly.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.settings","title":"settings: CubeSettings property","text":"

    Returns:

    • CubeSettings \u2013

      The settings of the Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.measures","title":"measures: MeasureCollection property","text":"

    Returns:

    • MeasureCollection \u2013

      The measures available within or defined for the Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.ambiguities","title":"ambiguities: Ambiguities property","text":"

    Returns:

    • Ambiguities \u2013

      An Ambiguities object that provides information about ambiguous data types in the underlying dataframe.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.linked_cubes","title":"linked_cubes: CubeLinks property","text":"

    Returns:

    • CubeLinks \u2013

      A list of linked cubes that are linked to this cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.schema","title":"schema: Schema property","text":"

    Returns:

    • Schema \u2013

      The Schema of the Cube which defines the dimensions and measures of the Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.df","title":"df: pd.DataFrame property","text":"

    Returns: The underlying Pandas dataframe of the Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.dimensions","title":"dimensions: DimensionCollection property","text":"

    Returns:

    • DimensionCollection \u2013

      The dimensions available through the Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.size_in_bytes","title":"size_in_bytes: int property","text":"

    Returns: The size in bytes allocated by the Cube object instance. The memory allocation by the underlying dataframe is not included.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.__init__","title":"__init__(df, schema=None, infer_schema=True, exclude=None, read_only=True, ignore_member_key_errors=True, ignore_case=True, ignore_key_errors=True, caching=CachingStrategy.LAZY, caching_threshold=EAGER_CACHING_THRESHOLD, eager_evaluation=True)","text":"

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df (DataFrame) \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema (bool, default: True ) \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude (str | list | tuple | None, default: None ) \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • read_only (bool, default: True ) \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    • ignore_case (bool, default: True ) \u2013

      (optional) If set to True, the case of member names will be ignored, 'Apple' and 'apple' will be treated as the same member. If set to False, member names are case-sensitive, 'Apple' and 'apple' will be treated as different members. Default value is True.

    • ignore_key_errors (bool, default: True ) \u2013

      (optional) If set to True, key errors for members of dimensions will be ignored and cell values will return 0.0 or None if no matching record exists. If set to False, key errors will be raised as exceptions when accessing cell values for non-existing members. Default value is True.

    • caching (CachingStrategy, default: LAZY ) \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold (int, default: EAGER_CACHING_THRESHOLD ) \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • eager_evaluation (bool, default: True ) \u2013

      (optional) If set to True, the cube will evaluate the context eagerly, i.e. when the context is created. Eager evaluation is recommended for most use cases, as it simplifies debugging and error handling. If set to False, the cube will evaluate the context lazily, i.e. only when the value of a context is accessed/requested.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n2\n
    "},{"location":"class-cube/#cubedpandas.cube.Cube.__len__","title":"__len__()","text":"

    Returns:

    • \u2013

      The number of records in the underlying dataframe of the Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.clear_cache","title":"clear_cache()","text":"

    Clears the cache of the Cube for all dimensions.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.__getattr__","title":"__getattr__(name)","text":"

    Dynamically resolves dimensions, measure or member from the cube. This enables a more natural access to the cube data using the Python dot notation.

    If the name is not a valid Python identifier and contains special characters or whitespaces or start with numbers, then the slicer method needs to be used to resolve the name, e.g., if 12 data % is the name of a column or value in a dataframe, then cube[\"12 data %\"] needs to be used to return the dimension, measure or column.

    Parameters:

    • name \u2013

      Existing Name of a dimension, member or measure in the cube.

    Returns:

    • Context | CubeContext \u2013

      A Cell object that represents the cube data related to the address.

    Samples

    cdf = cubed(df) cdf.Online.Apple.cost 50

    "},{"location":"class-cube/#cubedpandas.cube.Cube.__getitem__","title":"__getitem__(address)","text":"

    Returns a cell of the cube for a given address. Args: address: A valid cube address. Please refer the documentation for further details.

    Returns:

    • Context \u2013

      A Cell object that represents the cube data related to the address.

    Raises:

    • ValueError \u2013

      If the address is not valid or can not be resolved.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.__setitem__","title":"__setitem__(address, value)","text":"

    Sets a value for a given address in the cube. Args: address: A valid cube address. Please refer the documentation for further details. value: The value to be set for the data represented by the address. Raises: PermissionError: If write back is attempted on a read-only Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.__delitem__","title":"__delitem__(address)","text":"

    Deletes the records represented by the given address from the underlying dataframe of the cube. Args: address: A valid cube address. Please refer the documentation for further details. Raises: PermissionError: If write back is attempted on a read-only Cube.

    "},{"location":"class-cube/#cubedpandas.cube.Cube.slice","title":"slice(rows=None, columns=None, config=None)","text":"

    Returns a new slice for the cube. A slice represents a table-alike view to data in the cube. Typically, a slice has rows, columns and filters, comparable to an Excel PivotTable. Useful for printing in Jupyter, visual data analysis and reporting purposes. Slices can be easily 'navigated' by setting and changing rows, columns and filters.

    Please refer to the documentation of the Slice class for further details.

    Samples

    cdf = cubed(df) cdf.slice(rows=\"product\", columns=\"region\", filters={\"year\": 2020})

    "},{"location":"class-cube/#cubedpandas.cube.Cube.slice--year-2000","title":"year: 2000","text":""},{"location":"class-cube/#cubedpandas.cube.Cube.slice--all-north-south","title":"| | (all) | North | South |","text":"

    | (all) | 550 | 300 | 250 | | Apple | 200 | 100 | 100 | | Banana | 350 | 200 | 150 |

    "},{"location":"class-dimension/","title":"SalesDataModel class","text":"

    Bases: Iterable, ABC

    Represents a dimension of a cube, mapped to a column in the underlying Pandas dataframe.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.df","title":"df: pd.DataFrame property","text":"

    Returns the underlying Pandas dataframe the dimension/column refers to.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.members","title":"members: list property","text":"

    Returns the list of members of the dimension.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.member_set","title":"member_set: set property","text":"

    Returns the set of members of the dimension.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.column","title":"column property","text":"

    Returns the column name in the underlying Pandas dataframe the dimension refers to.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.name","title":"name property","text":"

    Returns the name (column name in the underlying Pandas dataframe) of the dimension.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.dtype","title":"dtype property","text":"

    Returns the Pandas data type of the dimension column.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.__init__","title":"__init__(df, column, caching=CachingStrategy.LAZY)","text":"

    Initializes a new Dimension from a Pandas dataframe and a column name.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.__getattr__","title":"__getattr__(name)","text":"

    Dynamically resolves a Filter based on member names from the dimension. This enables a more natural access to the cube data using the Python dot notation.

    Member names need to be valid Python identifier/variable name. CubedPandas applies the following rules to resolve member names: - If a member name is also a valid Python identifier, it can be used directly. e.g., Apple - Member name resolving is case-insensitive, e.g., apple will resolve Apple. - White spaces in member names are replaced by underscores, e.g., best_offer will resolve best offer. - Leading numbers in a member name are replaced by underscores, e.g., _2_cute will resolve 2 cute. - Leading and trailing underscores are ignored/removed, e.g., hello will resolve hello. - All other special characters are removed, e.g., 12/4 cars is the same as 124_cars.

    • If the name is not a valid Python identifier (e.g. contains special characters), the slicer method needs to be used to resolve the member name. e.g., 12/4 cars is a valid name for a value

    If the name is not a valid Python identifier (e.g. contains special characters), the slicer method needs to be used to resolve the member name. e.g., 12/4 cars is a valid name for a value in a Pandas dataframe column, but not a valid Python identifier/variable name, hence dimension[\"12/4 cars\"] needs to be used to return the member.

    Parameters:

    • name \u2013

      Name of a member or measure in the cube.

    Returns:

    • \u2013

      A Cell object that represents the cube data related to the address.

    Samples

    cdf = cubed(df) cdf.Online.Apple.cost 50

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.clear_cache","title":"clear_cache()","text":"

    Clears the cache of the Dimension.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.wildcard_filter","title":"wildcard_filter(pattern)","text":"

    Returns a list of members that match the given wildcard pattern.

    Parameters:

    • pattern \u2013

      A wildcard pattern to filter the dimension members.

    Returns:

    • (bool, list) \u2013

      A new DimensionFilter object.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.count","title":"count(member)","text":"

    Returns the number of rows in the underlying dataframe where the dimension column contains the given member.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.choice","title":"choice()","text":"

    Return a random member from the dimension.

    See https://docs.python.org/3/library/random.html#random.choice for more information.

    Returns:

    • \u2013

      Return a random member from the dimension.

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.choices","title":"choices(k=1, weights=None, cum_weights=None)","text":"

    Return a k sized list of members chosen from the dimension (with replacement).

    See https://docs.python.org/3/library/random.html#random.choices for more information.

    Returns:

    • \u2013

      Return a k sized list of members chosen from the dimension (with replacement).

    "},{"location":"class-dimension/#cubedpandas.schema.Dimension.sample","title":"sample(k=1, counts=None)","text":"

    Return a k sized list of unique members chosen from the dimension (without replacement).

    See https://docs.python.org/3/library/random.html#random.sample for more information.

    Returns:

    • \u2013

      Return a k sized list of unique members chosen from the dimension (without replacement).

    "},{"location":"class-measure/","title":"SalesDataModel class","text":"

    Represents a measure within a Cube. Each measure is mapped to a column in the underlying Pandas dataframe.

    "},{"location":"class-measure/#cubedpandas.schema.Measure.column","title":"column property","text":"

    Returns the column name in underlying Pandas dataframe the measure refers to.

    "},{"location":"class-measure/#cubedpandas.schema.Measure.df","title":"df: pd.DataFrame property","text":"

    Returns the underlying Pandas dataframe of the cube.

    "},{"location":"class-pandas-ext/","title":"Extension to Pandas","text":"

    lorem ipsum

    A Pandas extension that provides the CubedPandas 'cubed' accessor for Pandas dataframes.

    "},{"location":"class-pandas-ext/#cubedpandas.pandas_extension.CubedPandasAccessor.cube","title":"cube property","text":"

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • read_only \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    • ignore_case \u2013

      (optional) If set to True, the case of member names will be ignored, 'Apple' and 'apple' will be treated as the same member. If set to False, member names are case-sensitive, 'Apple' and 'apple' will be treated as different members. Default value is True.

    • ignore_key_errors \u2013

      (optional) If set to True, key errors for members of dimensions will be ignored and cell values will return 0.0 or None if no matching record exists. If set to False, key errors will be raised as exceptions when accessing cell values for non-existing members. Default value is True.

    • caching \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • eager_evaluation \u2013

      (optional) If set to True, the cube will evaluate the context eagerly, i.e. when the context is created. Eager evaluation is recommended for most use cases, as it simplifies debugging and error handling. If set to False, the cube will evaluate the context lazily, i.e. only when the value of a context is accessed/requested.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n
    "},{"location":"class-schema/","title":"SalesDataModel class","text":"

    Defines a multidimensional schema, for cell-based data access to a Pandas dataframe using an Cube.

    The schema defines the dimensions and measures of the cube and can be either inferred from the underlying Pandas dataframe automatically or defined explicitly. The schema can be validated against the Pandas dataframe to ensure the schema is valid for the table.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.dimensions","title":"dimensions: DimensionCollection property","text":"

    Returns the dimensions of the schema.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.measures","title":"measures: MeasureCollection property","text":"

    Returns the measures of the schema.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.__init__","title":"__init__(df=None, schema=None, caching=CachingStrategy.LAZY)","text":"

    Initializes a new schema for a Cube upon a given Pandas dataframe. If the dataframe is not provided, the schema needs to be built manually and can also not be validated against the Pandas dataframe.

    For building a schema manually, you can either create a new schema from scratch or you can load, extend and modify an existing schema as defined by parameter schema. The parameter schema can either be another Schema object, a Python dictionary containing valid schema information, a json string containing valid schema information or a file name or path to a json file containing valid schema information.

    :param df: (optional) the Pandas dataframe to build the schema from or for. :param schema: (optional) a schema to initialize the Schema with. The parameter schema can either be another Schema object, a Python dictionary containing valid schema information, a json string containing valid schema information or a file name or path to a json file containing valid schema information. :param caching: The caching strategy to be used for the Cube. Default is CachingStrategy.LAZY. Please refer to the documentation of 'CachingStrategy' for more information.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.validate","title":"validate(df)","text":"

    Validates the schema against an existing Pandas dataframe.

    If returned True, the schema is valid for the given Pandas dataframe and can be used to access its data. Otherwise, the schema is not valid and will or may lead to errors when accessing its data.

    :param df: The Pandas dataframe to validate the schema against.

    :return: Returns True if the schema is valid for the given Pandas dataframe, otherwise False.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.infer_schema","title":"infer_schema(exclude=None)","text":"

    Infers a multidimensional schema from the Pandas dataframe of the Schema or another Pandas dataframe by analyzing the columns of the table and their contents.

    This process can be time-consuming for large tables. For such cases, it is recommended to infer the schema only from a sample of the records by setting parameter 'sample_records' to True. By default, the schema is inferred from and validated against all records.

    The inference process tries to identify the dimensions and their hierarchies of the cube as well as the measures of the cube. If no schema cannot be inferred, an exception is raised.

    By default, string, datetime and boolean columns are assumed to be measure columns and numerical columns are assumed to be measures for cube computations. By default, all columns of the Pandas dataframe will be used to infer the schema. However, a subset of columns can be specified to infer the schema from. The subset needs to contain at least two columns, one for a single dimensions and one for a single measures.

    For more complex tables it is possible or even likely that the resulting schema does not match your expectations or requirements. For such cases, you will need to build your schema manually. Please refer the documentation for further details on how to build a schema manually.

    :param exclude: (optional) a list of either column names or ordinal column ids to exclude when inferring the schema.

    :return: Returns the inferred schema.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.from_dict","title":"from_dict(dictionary) classmethod","text":"

    Creates a new schema from a dictionary containing schema information for a Cube. Please refer to the documentation for further details on valid schema definitions.

    :param dictionary: The dictionary containing the schema information. :return: Returns a new schema object. :exception: Raises an exception if the schema information is not valid or incomplete.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.from_json","title":"from_json(json_string) classmethod","text":"

    Creates a new schema from a json string containing schema information for a Cube. If the json string is not valid and does refer to a file that contains a valid schema in json format, an exception is raised. Please refer to the documentation for further details on valid schema definitions.

    :param json_string: The json string containing the schema information. :return: Returns a new schema object. :exception: Raises an exception if the schema information is not valid or incomplete.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.to_dict","title":"to_dict()","text":"

    Converts the schema into a dictionary containing schema information for an Cube.

    :return: Returns a dictionary containing the schema information.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.to_json","title":"to_json()","text":"

    Converts the schema into a dictionary containing schema information for an Cube.

    :return: Returns a dictionary containing the schema information.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.save_as_json","title":"save_as_json(file_name)","text":"

    Saves the schema as a json file.

    :param file_name: The name of the file to save the schema to.

    "},{"location":"class-schema/#cubedpandas.schema.Schema.__len__","title":"__len__()","text":"

    Returns the number of dimensions of the schema.

    "},{"location":"contributing/","title":"Contributing","text":""},{"location":"contributing/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"faq/","title":"FAQ","text":""},{"location":"faq/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"installation/","title":"Installation","text":""},{"location":"installation/#cubedpandas-installation","title":"CubedPandas Installation","text":"

    As always with Python packages, the easiest way to install CubedPandas is by using pip.

    pip install cubedpandas\n

    After installing CubedPandas, you are ready to go.

    You will find the latest version of CubedPandas on PyPi. Alternatively, you can clone the CubedPandas GitHub repo and build it on your own or integrate somehow it in your own source code. Please check the requirements.txt file for the necessary dependencies.

    "},{"location":"license/","title":"License","text":""},{"location":"license/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"method-cubed/","title":"Wrapping a Pandas DataFrame into a CubedPandas Cube","text":""},{"location":"method-cubed/#using-the-cubed-method","title":"Using the 'cubed()' Method","text":"

    The cubed function is the most convenient way to wrap and convert a Pandas dataframe into a CubedPandas cube. by the way, cdf is nice and short for a 'cubed data frame' following the Pandas convention of df for a 'data frame'.

    If no schema is provided when applying the cubed method, a schema will be automatically inferred from the DataFrame. By default, all numeric columns will be considered as measures, all other columns as dimensions of the cube.

    import pandas as pd\nfrom cubedpandas import cubed\n\ndf = pd.DataFrame({\"channel\": [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"product\": [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"sales\":   [100,      150,      300,      200,      250,      350     ],})\ncdf = cubed(df)    \nprint(cdf.Online)  # returns 550 = 100 + 150 + 300\n

    Sometimes, e.g. if you want an integer column to be considered as a dimension not as a measure column, you need to provide a schema. Here's a simple example of how to define and use a schema, here identical to schema that will be automatically inferred. For more information please refer to the Schema documentation.

    import pandas as pd\nfrom cubedpandas import cubed\n\ndf = pd.DataFrame({\"channel\": [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"product\": [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"sales\":   [100,      150,      300,      200,      250,      350     ],})\nschema = {\"dimensions\": [{\"column\":\"channel\"}, {\"column\": \"product\"}],\n          \"measures\":   [{\"column\":\"sales\"}]}\ncdf = cubed(df, schema=schema)\nprint(cdf.Online)  # returns 550 = 100 + 150 + 300\n
    "},{"location":"method-cubed/#using-the-cubed-extension-for-python","title":"Using the 'cubed' extension for Python","text":"

    After CubedPandas has been loaded, e.g. by import cubedpandas, you can also directly use the cubed extension for Pandas. The only difference to the cubed() function is, that you need to use the cubed attribute of the Pandas DataFrame and either slice it with the [] operator or get access to the cube or any context using the . operator.

    import pandas as pd\nimport cubedpandas\n\ndf = pd.DataFrame({\"channel\": [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"product\": [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"sales\":   [100,      150,      300,      200,      250,      350     ],})\n\ncdf = df.cubed.cube  # return a reference to the cube, just 'df.cubed' will not work.\n# or directly access any context the cube either by slicing with the [] operator\nx = df.cubed[\"Online\", \"Apple\", \"sales\"]\n# or by using the . operator\ny = df.cubed.Online.Apple.sales\n\nassert(x == y == 100)\n
    "},{"location":"method-cubed/#cubedpandas.common.cubed","title":"cubed(df, schema=None, infer_schema=True, exclude=None, caching=CachingStrategy.LAZY, caching_threshold=EAGER_CACHING_THRESHOLD, read_only=True)","text":"

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df (DataFrame) \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema (bool, default: True ) \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude (str | list | tuple | None, default: None ) \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • caching (CachingStrategy, default: LAZY ) \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold (int, default: EAGER_CACHING_THRESHOLD ) \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • read_only (bool, default: True ) \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n2\n
    "},{"location":"method-cubed/#cubedpandas.pandas_extension.CubedPandasAccessor","title":"CubedPandasAccessor","text":"

    A Pandas extension that provides the CubedPandas 'cubed' accessor for Pandas dataframes.

    "},{"location":"method-cubed/#cubedpandas.pandas_extension.CubedPandasAccessor.cube","title":"cube property","text":"

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • read_only \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    • ignore_case \u2013

      (optional) If set to True, the case of member names will be ignored, 'Apple' and 'apple' will be treated as the same member. If set to False, member names are case-sensitive, 'Apple' and 'apple' will be treated as different members. Default value is True.

    • ignore_key_errors \u2013

      (optional) If set to True, key errors for members of dimensions will be ignored and cell values will return 0.0 or None if no matching record exists. If set to False, key errors will be raised as exceptions when accessing cell values for non-existing members. Default value is True.

    • caching \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • eager_evaluation \u2013

      (optional) If set to True, the cube will evaluate the context eagerly, i.e. when the context is created. Eager evaluation is recommended for most use cases, as it simplifies debugging and error handling. If set to False, the cube will evaluate the context lazily, i.e. only when the value of a context is accessed/requested.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n
    "},{"location":"readme_mkdocs/","title":"CubedPandas website & documentation how-to guide","text":"

    by Thomas Zeutschler, June 2024

    "},{"location":"readme_mkdocs/#introduction","title":"Introduction","text":"

    The CubedPandas website & documentation is made with MkDocs. MkDocs is a static site generator that creates a website from markdown files and from the actual source code of the project. The CubedPandas website will be hosted on GitHub pages: https://zeutschler.github.io/cubedpandas/.

    The documentation is structured as follows:

    • Homepage - Short Intro to CubedPandas, features, installation, sample code, etc.
    • User documentation - The documentation will be used to explain the usage of the tool to the users.
    • Developer documentation - The documentation will be used to explain the architecture and the design of the tool to developers.
    • Blog - Posts and news about CubedPandas features, development, best practices, etc.
    "},{"location":"readme_mkdocs/#how-to-build-the-documentation","title":"How to build the documentation","text":"

    The documentation can be built by using the mkdocs command line tool from the root directory of the project:

    mkdocs build --clean --site-dir 'pages/'\n
    "},{"location":"readme_mkdocs/#how-to-run-and-view-the-documentation-locally","title":"How to run and view the documentation locally","text":"

    The documentation can be run and viewed locally by using the mkdocs command line tool from the root directory of the project

    mkdocs serve\n
    This will start a local web server that serves the documentation. Edits to the markdown files will be automatically reloaded in the browser. The documentation is the available at the following URL:

    http://127.0.0.1:8000/data-model-generator/\n
    To stop the local web server, press CTRL+C in the command line tool. That's all, enjoy!

    Additional information on how to use MkDocs can be found in the MkDocs documentation and in the following guide RealPython: Build Your Python Project Documentation With MkDocs.

    "},{"location":"release-notes/","title":"Release Notes","text":""},{"location":"release-notes/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"thomas/","title":"About me","text":""},{"location":"thomas/#sorry-well-add-that-later","title":"Sorry, we'll add that later...","text":"

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    "},{"location":"use-cases/","title":"Use Cases","text":"

    from PIL.ImageCms import profileToProfile

    "},{"location":"use-cases/#use-cases-for-cubedpandas","title":"Use Cases for CubedPandas","text":"

    CubedPandas is a general purpose library that aims to offer an easy, fast & natural approach to work with Pandas dataframes. But in some cases, CubedPandas can be particularly useful and valuable. Here are some examples.

    Note

    If you have a creative use case of CubedPandas that you would like to share, please let me know, I will then add them here. You can leave me a comment in the CubedPandas GitHub discussions, or by creating a GitHub issue.

    "},{"location":"use-cases/#novice-pandas-users","title":"Novice Pandas Users","text":"

    For novice users, CubedPandas can be a great help to get started with Pandas, as it hides some of the complexity and verbosity of Pandas dataframes. Especially for business users and citizen data analysts, who are not so familiar with Programming, CubedPandas can be much less intimidating than using Pandas.

    # Pandas code\nvalue = df.loc[(df['make'] == 'Audi') & (df['engine'] == 'hybrid'), 'price'].sum()\n\n# CubedPandas code\nvalue = df.cubed.Audi.hybrid.price\n
    "},{"location":"use-cases/#experienced-pandas-users","title":"Experienced Pandas Users","text":"

    For experienced users, CubedPandas can be a great productivity booster, as it allows to write more compact, readable and maintainable code. Experts may use CubedPandas for filtering purposes only. Example:

    # Let's assume you have a data file with a 'changed' column, \n# containing timestamps like '2024-06-18T12:34:56'\n# To get all records that 'changed' yesterday, you could write:\ndf = pd.read_csv('data.csv')).cubed.changed.yesterday.df\n
    "},{"location":"use-cases/#financial-data-analysis","title":"Financial Data Analysis","text":"

    When it's all about the aggregation of financial and business data, CubedPandas really shines. As multi-dimensional addresses are very close to our natural way of thinking, CubedPandas is a perfect fit for reporting, business intelligence and even (minimal) data warehousing.

    First, CubePandas provides direct and intuitive access to aggregated figures, e.g.:

    c = cubed(df)\ntrucks = c.region.North_America.sbu.Trucks.sales\ndelta = trucks.this_year - trucks.last_year\n

    Second, CubedPandas can be used to create reports and pivot-tables, e.g., you can easily create a pivot table with the total sales per region and product:

    # Create a simple pivot table based on the above 'truck' filter with\n# 'salesrep' and 'customer' in the rows and the last and \n# actual month sales in the columns.\ntrucks.pivot(c.salesrep & c.customer, c.lastmonth.sales & c.actualmonth.sales)\n
    "},{"location":"use-cases/#data-quality-analysis","title":"Data Quality Analysis","text":"

    CubedPandas is also a great tool for data quality analysis. Due to the cell based data access, expected totals, missing values, duplicates, and other data quality issues can be easily checked.

    c = cubed(pd.read_csv('daily_delta.csv'))\nnan_count = c.revenue.NAN.count\ninconsistent_records = c[c.revenue_ < c.profit_]    \n
    "},{"location":"blog/","title":"Blog","text":""}]} \ No newline at end of file +{ + "config": { + "lang": [ + "en" + ], + "separator": "[\\s\\-]+", + "pipeline": [ + "stopWordFilter" + ] + }, + "docs": [ + { + "location": "", + "title": "CubedPandas Documentation", + "text": "

    Welcome to the CubedPandas Documentation site. Here you will find all the information you need to get started with CubedPandas, a powerful and easy-to-use Python library for working with Pandas dataframes.

    Note

    CubedPandas is in an early stage of development, your Ideas, Issues and Feedback are very welcome to make CubedPandas even more awesome. Many thanks!

    " + }, + { + "location": "#olap-comfort-meets-pandas-power", + "title": "OLAP comfort meets Pandas power!", + "text": "

    CubedPandas offer a new easy, fast & fun approach to navigate and analyze Pandas dataframes. CubedPandas is inspired by the powerful concepts of OLAP (Online Analytical Processing) and MDX (Multi-Dimensional Expressions) and aims to bring the comfort and power of OLAP to Pandas dataframes.

    For novice users, CubedPandas can be a great help to get started with Pandas, as it hides some of the complexity and verbosity of Pandas dataframes. For experienced users, CubedPandas can be a productivity booster, as it allows you to write more compact, readable and maintainable code. Just to give you a first idea, this Pandas code

    # Pandas: calculate the total revenue of all hybrid Audi cars\nvalue = df.loc[(df['make'] == 'Audi') & (df['engine'] == 'hybrid'), 'price'].sum()\n

    turns into this CubedPandas code

    # CubedPandas: calculate the total revenue of all hybrid Audi cars\nvalue = df.cubed.Audi.hybrid.price\n

    As CubedPandas does not duplicate data or modifies the underlying dataframe and does not add any performance penalty - in some cases can even boost Pandas performance by factors - it can be used in production without any concerns and should be of great help in many use cases.

    In Jupyter notebooks, CubedPandas will really start to shine. For further information, please visit the CubedPandas Documentation or try the included samples.

    " + }, + { + "location": "#getting-started", + "title": "Getting Started", + "text": "

    CubedPandas is available on pypi.org (https://pypi.org/project/cubedpandas/) and can be installed by

    pip install cubedpandas\n

    Using CubedPandas is as simple as wrapping any Pandas dataframe into a cube like this:

    import pandas as pd\nfrom cubedpandas import cubed\n\n# Create a dataframe with some sales data\ndf = pd.DataFrame({\"product\":  [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"channel\":  [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"customer\": [\"Peter\",  \"Peter\",  \"Paul\",   \"Paul\",   \"Mary\",   \"Mary\"  ],\n                   \"mailing\":  [True,     False,    True,     False,    True,     False   ],\n                   \"revenue\":  [100,      150,      300,      200,      250,      350     ],\n                   \"cost\":     [50,       90,       150,      100,      150,      175     ]})\n\ncdf = cubed(df)  # Wrapp your dataframe into a cube and start using it!\n

    CubedPandas automatically infers a multi-dimensional schema from your Pandas dataframe which defines a virtual Cube over the dataframe. By default, numeric columns of the dataframe are considered as Measures - the numeric values to analyse & aggregate - all other columns are considered as Dimensions - to filter, navigate and view the data. The individual values in a dimension column are called the Members of the dimension. In the example above, column channel becomes a dimension with the two members Online and Retail, revenue and cost are our measures.

    Although rarely required, you can also define your own schema. Schemas are quite powerful and flexible, as they will allow you to define dimensions and measures, aliases and (planned for upcoming releases) also custom aggregations, business logic, number formating, linked cubes (star-schemas) and much more.

    " + }, + { + "location": "#context-please-so-i-will-give-you-data", + "title": "Context please, so I will give you data!", + "text": "

    One key feature of CubePandas is an easy & intuitive access to individual Data Cells in multi-dimensional data space. To do so, you'll need to define a multi-dimensional Context so CubedPandas will evaluate, aggregate (sum by default) and return the requested value from the underlying dataframe.

    Context objects behave like normal numbers (float, int), so you can use them directly in arithmetic operations. In the following examples, all addresses will refer to the exactly same rows from the dataframe and thereby all return the same value of 100.

    # Let Pandas set the scene...\na = df.loc[(df[\"product\"] == \"Apple\") & (df[\"channel\"] == \"Online\") & (df[\"customer\"] == \"Peter\"), \"revenue\"].sum()\n\n# Can we do better with CubedPandas? \nb = cdf[\"product:Apple\", \"channel:Online\", \"customer:Peter\"].revenue  # explicit, readable, flexible and fast  \nc = cdf.product[\"Apple\"].channel[\"Online\"].customer[\n    \"Peter\"].revenue  # ...better, if column names are Python-compliant  \nd = cdf.product.Apple.channel.Online.customer.Peter.revenue  # ...even better, if member names are Python-compliant\n\n# If there are no ambiguities in your dataframe - what can be easily checked - then you can use this shorthand forms:\ne = cdf[\"Online\", \"Apple\", \"Peter\", \"revenue\"]\nf = cdf.Online.Apple.Peter.revenue\ng = cdf.Online.Apple.Peter  # as 'revenue' is the default (first) measure of the cube, it can be omitted\n\nassert a == b == c == d == e == f == g == 100\n

    Context objects also act as filters on the underlying dataframe. So you can use also CubedPandas for fast and easy filtering only, e.g. like this:

    df = df.cubed.product[\"Apple\"].channel[\"Online\"].df\ndf = df.cubed.Apple.Online.df  # short form, if column names are Python-compliant and there are no ambiguities\n
    " + }, + { + "location": "#pivot-drill-down-slice-dice", + "title": "Pivot, Drill-Down, Slice & Dice", + "text": "

    The Pandas pivot table is a very powerful tool. Unfortunately, it is quite verbose and very hard to master. CubedPandas offers the slice method to create pivot tables in a more intuitive and easy way, e.g. by default

    # Let's create a simple pivot table with the revenue for dimensions products and channels\ncdf.slice(rows=\"product\", columns=\"channel\", measures=\"revenue\")\n

    For further information, samples and a complete feature list as well as valuable tips and tricks, please visit the CubedPandas Documentation.

    " + }, + { + "location": "#your-feedback-ideas-and-support-are-very-welcome", + "title": "Your feedback, ideas and support are very welcome!", + "text": "

    Please help improve and extend CubedPandas with your feedback & ideas and use the CubedPandas GitHub Issues to request new features and report bugs. For general questions, discussions and feedback, please use the CubedPandas GitHub Discussions.

    If you have fallen in love with CubedPandas or find it otherwise valuable, please consider to become a sponsor of the CubedPandas project so we can push the project forward faster and make CubePandas even more awesome.

    ...happy cubing!

    " + }, + { + "location": "404/", + "title": "404", + "text": "" + }, + { + "location": "404/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "advanced-usage/", + "title": "Advanced Usage", + "text": "" + }, + { + "location": "advanced-usage/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "basic-usage/", + "title": "Basic Usage", + "text": "" + }, + { + "location": "basic-usage/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "become-a-sponsor/", + "title": "Become a sponsor", + "text": "" + }, + { + "location": "become-a-sponsor/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "best-practise/", + "title": "Best Practise", + "text": "

    from PIL.ImageCms import profileToProfile

    " + }, + { + "location": "best-practise/#use-cases-for-cubedpandas", + "title": "Use Cases for CubedPandas", + "text": "

    CubedPandas is a general purpose library that aims to offer an easy, fast & natural approach to work with Pandas dataframes. But in some cases, CubedPandas can be particularly useful and valuable. Here are some examples.

    Note

    If you have a creative use case of CubedPandas that you would like to share, please let me know, I will then add them here. You can leave me a comment in the CubedPandas GitHub discussions, or by creating a GitHub issue.

    " + }, + { + "location": "best-practise/#novice-pandas-users", + "title": "Novice Pandas Users", + "text": "

    For novice users, CubedPandas can be a great help to get started with Pandas, as it hides some of the complexity and verbosity of Pandas dataframes. Especially for business users and citizen data analysts, who are not so familiar with Programming, CubedPandas can be much less intimidating than using Pandas.

    # Pandas code\nvalue = df.loc[(df['make'] == 'Audi') & (df['engine'] == 'hybrid'), 'price'].sum()\n\n# CubedPandas code\nvalue = df.cubed.Audi.hybrid.price\n
    " + }, + { + "location": "best-practise/#experienced-pandas-users", + "title": "Experienced Pandas Users", + "text": "

    For experienced users, CubedPandas can be a great productivity booster, as it allows to write more compact, readable and maintainable code. Experts may use CubedPandas for filtering purposes only. Example:

    # Let's assume you have a data file with a 'changed' column, \n# containing timestamps like '2024-06-18T12:34:56'\n# To get all records that 'changed' yesterday, you could write:\ndf = pd.read_csv('data.csv')).cubed.changed.yesterday.df\n
    " + }, + { + "location": "best-practise/#financial-data-analysis", + "title": "Financial Data Analysis", + "text": "

    When it's all about the aggregation of financial and business data, CubedPandas really shines. As multi-dimensional addresses are very close to our natural way of thinking, CubedPandas is a perfect fit for reporting, business intelligence and even (minimal) data warehousing.

    First, CubePandas provides direct and intuitive access to aggregated figures, e.g.:

    c = cubed(df)\ntrucks = c.region.North_America.sbu.Trucks.sales\ndelta = trucks.this_year - trucks.last_year\n

    Second, CubedPandas can be used to create reports and pivot-tables, e.g., you can easily create a pivot table with the total sales per region and product:

    # Create a simple pivot table based on the above 'truck' filter with\n# 'salesrep' and 'customer' in the rows and the last and \n# actual month sales in the columns.\ntrucks.pivot(c.salesrep & c.customer, c.lastmonth.sales & c.actualmonth.sales)\n
    " + }, + { + "location": "best-practise/#data-quality-analysis", + "title": "Data Quality Analysis", + "text": "

    CubedPandas is also a great tool for data quality analysis. Due to the cell based data access, expected totals, missing values, duplicates, and other data quality issues can be easily checked.

    c = cubed(pd.read_csv('daily_delta.csv'))\nnan_count = c.revenue.NAN.count\ninconsistent_records = c[c.revenue_ < c.profit_]    \n
    " + }, + { + "location": "best-practises/", + "title": "Best Practises", + "text": "" + }, + { + "location": "best-practises/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "blog001_why_cpd/", + "title": "Blog001 why cpd", + "text": "" + }, + { + "location": "blog001_why_cpd/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "class-context/", + "title": "Generator class", + "text": "

    Bases: SupportsFloat

    A context represents a multi-dimensional data context or area from within a cube. Context objects can be used to navigate and access the data of a cube and thereby the underlying dataframe.

    Cells behave like Python floats and return a numeric aggregation of the underlying data. They are intended to be used in mathematical operations.

    Samples

    cdf = cubed(df) value = cdf.A + cdf.B / 2 200 cdf.A *= 2

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.function", + "title": "function: ContextFunction property", + "text": "

    Returns:

    • ContextFunction \u2013

      The aggregation function that will be applied to the current context.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.value", + "title": "value property writable", + "text": "

    Returns:

    • \u2013

      The sum value of the current context from the underlying cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.numeric_value", + "title": "numeric_value: float property", + "text": "

    Returns:

    • float \u2013

      The numerical value of the current context from the underlying cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.cube", + "title": "cube: Cube property", + "text": "

    Returns:

    • Cube \u2013

      The Cube object the Context belongs to.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.dimension", + "title": "dimension: Dimension property", + "text": "

    Returns:

    • Dimension \u2013

      The Cube object the Context belongs to.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.parent", + "title": "parent: Context property", + "text": "

    Returns:

    • Context \u2013

      The parent Context of the current Context. If the current Context is the root Context of the cube,

    • Context \u2013

      then the parent Context will be None.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.df", + "title": "df: pd.DataFrame property", + "text": "

    Returns: Returns a new Pandas dataframe with all column of the underlying dataframe of the Cube, but only with the rows that are represented by the current context.

    The returned dataframe is always a copy of the original dataframe, even if the context is not filtering any rows from the underlying dataframe. The returned dataframe can be used for further processing outside the cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.address", + "title": "address: any property", + "text": "

    Returns:

    • any \u2013

      The partial address of the context, as defined by the user

    • any \u2013

      This does not include the addresses defined by predecessor

    • any \u2013

      cells down to the cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.cube_address", + "title": "cube_address: str property", + "text": "

    Returns:

    • str \u2013

      The full address of the context, including all predecessor

    • str \u2013

      cells down to the cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.measure", + "title": "measure: Measure property writable", + "text": "

    Returns:

    • Measure \u2013

      The Measure object the Context is currently referring to.

    • Measure \u2013

      The measure refers to a column in the underlying dataframe

    • Measure \u2013

      that is used to calculate the value of the context.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.row_mask", + "title": "row_mask: np.ndarray | None property", + "text": "

    Returns:

    • ndarray | None \u2013

      The row mask of the context. The row mask is represented by a Numpy ndarray

    • ndarray | None \u2013

      of the indexes of the rows represented by the current context. The row mask can be used

    • ndarray | None \u2013

      for subsequent processing of the underlying dataframe outside the cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.member_mask", + "title": "member_mask: np.ndarray | None property", + "text": "

    Returns:

    • ndarray | None \u2013

      The member mask of the context. If the context refers to a member or a set of members from a dimension.

    • ndarray | None \u2013

      then a Numpy ndarray containing the indexes of the rows representing the members is returned.

    • ndarray | None \u2013

      None is returned otherwise.

    • ndarray | None \u2013

      The row mask can be used for subsequent processing of the underlying dataframe outside the cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.row_mask_inverse", + "title": "row_mask_inverse: np.ndarray property", + "text": "

    Returns:

    • ndarray \u2013

      The inverted row mask of the context. The inverted row mask is represented by a Numpy ndarray

    • ndarray \u2013

      of the indexes of the rows NOT represented by the current context. The inverted row mask

    • ndarray \u2013

      can be used for subsequent processing of the underlying dataframe outside the cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.__init__", + "title": "__init__(cube, address, parent=None, row_mask=None, member_mask=None, measure=None, dimension=None, function=ContextFunction.SUM, resolve=True, filtered=False, dynamic_attribute=False)", + "text": "

    Initializes a new Context object. For internal use only. Raises: ValueError: If the address is invalid and does not refer to a dimension, member or measure of the cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.set_value", + "title": "set_value(value, allocation_function=ContextAllocation.DISTRIBUTE)", + "text": "

    Writes a value to the current context of the cube down to the underlying dataframe. The allocation method can be chosen.

    Parameters:

    • value \u2013

      The value to be written to the cube.

    • allocation_function (ContextAllocation, default: DISTRIBUTE ) \u2013

      The allocation function to be used for writing the value to the cube.

    Returns: The new value of the current context from the underlying cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.top", + "title": "top(n)", + "text": "

    Returns the top n members of the current context. Args: n: The number of top members to be returned. Returns: A list of the top n members of the current context.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.bottom", + "title": "bottom(n)", + "text": "

    Returns the bottom n members of the current context. Args: n: The number of bottom members to be returned. Returns: A list of the bottom n members of the current context.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.__getattr__", + "title": "__getattr__(name)", + "text": "

    Dynamically resolves member from the cube and predecessor cells.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.__getitem__", + "title": "__getitem__(address)", + "text": "

    Returns a nested context of the cube and for a given address. Subsequent nested cells can bee seen as subsequent filters upon the underlying dataframe.

    Parameters:

    • address \u2013

      A valid cube address. Please refer the documentation for further details.

    Returns:

    • \u2013

      A Context object that represents the cube data related to the address

    • \u2013

      and all predecessor cells down to the cube.

    Raises:

    • ValueError \u2013

      If the address is not valid or can not be resolved.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.__setitem__", + "title": "__setitem__(address, value)", + "text": "

    Sets a value for a given address in the cube. Args: address: A valid cube address. Please refer the documentation for further details. value: The value to be set for the data represented by the address. Raises: PermissionError: If write back is attempted on a read-only Cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.__delitem__", + "title": "__delitem__(address)", + "text": "

    Deletes the records represented by the given address from the underlying dataframe of the cube. Args: address: A valid cube address. Please refer the documentation for further details. Raises: PermissionError: If write back is attempted on a read-only Cube.

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.slice", + "title": "slice(rows=None, columns=None, config=None)", + "text": "

    Returns a new slice for the context. A slice represents a table-alike view to data in the cube. Typically, a slice has rows, columns and filters, comparable to an Excel PivotTable. Useful for printing in Jupyter, visual data analysis and reporting purposes. Slices can be easily 'navigated' by setting and changing rows, columns and filters.

    Please refer to the documentation of the Slice class for further details.

    Parameters:

    • rows \u2013

      The rows of the slice. Can be one or more dimensions with or without a member definition, or no dimension.

    • columns \u2013

      The columns of the slice. Can be one or more dimensions with or without a member definition, or no dimension.

    • filters \u2013

      The filters of the slice. Can be one or more dimensions with or without a member definition, or no dimension.

    • config \u2013

      (optional) A slice configuration as a dictionary, a json string or a path to an existing file containing the configuration. Slice configurations can be used to define a more complex layout. Please refer to the documentation of the Slice class for further details.

    Samples

    cdf = cubed(df) cdf.slice(rows=\"product\", columns=\"region\", filters={\"year\": 2020})

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.slice--year-2000", + "title": "year: 2000", + "text": "" + }, + { + "location": "class-context/#cubedpandas.context.context.Context.slice--all-north-south", + "title": "| | (all) | North | South |", + "text": "

    | (all) | 550 | 300 | 250 | | Apple | 200 | 100 | 100 | | Banana | 350 | 200 | 150 |

    " + }, + { + "location": "class-context/#cubedpandas.context.context.Context.filter", + "title": "filter(filter)", + "text": "

    Filters the current context by a given filter expression, criteria or callable function. Args: filter: The filter expression, criteria or callable function to be used for filtering the context. Returns: A new context with the filtered data.

    " + }, + { + "location": "class-cube/", + "title": "Generator class", + "text": "

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back. A schema, that defines the dimensions and measures of the Cube, can either be inferred automatically from the underlying dataframe (default) or defined explicitly.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.settings", + "title": "settings: CubeSettings property", + "text": "

    Returns:

    • CubeSettings \u2013

      The settings of the Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.measures", + "title": "measures: MeasureCollection property", + "text": "

    Returns:

    • MeasureCollection \u2013

      The measures available within or defined for the Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.ambiguities", + "title": "ambiguities: Ambiguities property", + "text": "

    Returns:

    • Ambiguities \u2013

      An Ambiguities object that provides information about ambiguous data types in the underlying dataframe.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.linked_cubes", + "title": "linked_cubes: CubeLinks property", + "text": "

    Returns:

    • CubeLinks \u2013

      A list of linked cubes that are linked to this cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.schema", + "title": "schema: Schema property", + "text": "

    Returns:

    • Schema \u2013

      The Schema of the Cube which defines the dimensions and measures of the Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.df", + "title": "df: pd.DataFrame property", + "text": "

    Returns: The underlying Pandas dataframe of the Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.dimensions", + "title": "dimensions: DimensionCollection property", + "text": "

    Returns:

    • DimensionCollection \u2013

      The dimensions available through the Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.size_in_bytes", + "title": "size_in_bytes: int property", + "text": "

    Returns: The size in bytes allocated by the Cube object instance. The memory allocation by the underlying dataframe is not included.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.__init__", + "title": "__init__(df, schema=None, infer_schema=True, exclude=None, read_only=True, ignore_member_key_errors=True, ignore_case=True, ignore_key_errors=True, caching=CachingStrategy.LAZY, caching_threshold=EAGER_CACHING_THRESHOLD, eager_evaluation=True)", + "text": "

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df (DataFrame) \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema (bool, default: True ) \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude (str | list | tuple | None, default: None ) \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • read_only (bool, default: True ) \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    • ignore_case (bool, default: True ) \u2013

      (optional) If set to True, the case of member names will be ignored, 'Apple' and 'apple' will be treated as the same member. If set to False, member names are case-sensitive, 'Apple' and 'apple' will be treated as different members. Default value is True.

    • ignore_key_errors (bool, default: True ) \u2013

      (optional) If set to True, key errors for members of dimensions will be ignored and cell values will return 0.0 or None if no matching record exists. If set to False, key errors will be raised as exceptions when accessing cell values for non-existing members. Default value is True.

    • caching (CachingStrategy, default: LAZY ) \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold (int, default: EAGER_CACHING_THRESHOLD ) \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • eager_evaluation (bool, default: True ) \u2013

      (optional) If set to True, the cube will evaluate the context eagerly, i.e. when the context is created. Eager evaluation is recommended for most use cases, as it simplifies debugging and error handling. If set to False, the cube will evaluate the context lazily, i.e. only when the value of a context is accessed/requested.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n2\n
    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.__len__", + "title": "__len__()", + "text": "

    Returns:

    • \u2013

      The number of records in the underlying dataframe of the Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.clear_cache", + "title": "clear_cache()", + "text": "

    Clears the cache of the Cube for all dimensions.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.__getattr__", + "title": "__getattr__(name)", + "text": "

    Dynamically resolves dimensions, measure or member from the cube. This enables a more natural access to the cube data using the Python dot notation.

    If the name is not a valid Python identifier and contains special characters or whitespaces or start with numbers, then the slicer method needs to be used to resolve the name, e.g., if 12 data % is the name of a column or value in a dataframe, then cube[\"12 data %\"] needs to be used to return the dimension, measure or column.

    Parameters:

    • name \u2013

      Existing Name of a dimension, member or measure in the cube.

    Returns:

    • Context | CubeContext \u2013

      A Cell object that represents the cube data related to the address.

    Samples

    cdf = cubed(df) cdf.Online.Apple.cost 50

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.__getitem__", + "title": "__getitem__(address)", + "text": "

    Returns a cell of the cube for a given address. Args: address: A valid cube address. Please refer the documentation for further details.

    Returns:

    • Context \u2013

      A Cell object that represents the cube data related to the address.

    Raises:

    • ValueError \u2013

      If the address is not valid or can not be resolved.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.__setitem__", + "title": "__setitem__(address, value)", + "text": "

    Sets a value for a given address in the cube. Args: address: A valid cube address. Please refer the documentation for further details. value: The value to be set for the data represented by the address. Raises: PermissionError: If write back is attempted on a read-only Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.__delitem__", + "title": "__delitem__(address)", + "text": "

    Deletes the records represented by the given address from the underlying dataframe of the cube. Args: address: A valid cube address. Please refer the documentation for further details. Raises: PermissionError: If write back is attempted on a read-only Cube.

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.slice", + "title": "slice(rows=None, columns=None, config=None)", + "text": "

    Returns a new slice for the cube. A slice represents a table-alike view to data in the cube. Typically, a slice has rows, columns and filters, comparable to an Excel PivotTable. Useful for printing in Jupyter, visual data analysis and reporting purposes. Slices can be easily 'navigated' by setting and changing rows, columns and filters.

    Please refer to the documentation of the Slice class for further details.

    Samples

    cdf = cubed(df) cdf.slice(rows=\"product\", columns=\"region\", filters={\"year\": 2020})

    " + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.slice--year-2000", + "title": "year: 2000", + "text": "" + }, + { + "location": "class-cube/#cubedpandas.cube.Cube.slice--all-north-south", + "title": "| | (all) | North | South |", + "text": "

    | (all) | 550 | 300 | 250 | | Apple | 200 | 100 | 100 | | Banana | 350 | 200 | 150 |

    " + }, + { + "location": "class-dimension/", + "title": "Dimension class", + "text": "

    Bases: Iterable, ABC

    Represents a dimension of a cube, mapped to a column in the underlying Pandas dataframe.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.df", + "title": "df: pd.DataFrame property", + "text": "

    Returns the underlying Pandas dataframe the dimension/column refers to.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.members", + "title": "members: list property", + "text": "

    Returns the list of members of the dimension.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.member_set", + "title": "member_set: set property", + "text": "

    Returns the set of members of the dimension.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.column", + "title": "column property", + "text": "

    Returns the column name in the underlying Pandas dataframe the dimension refers to.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.name", + "title": "name property", + "text": "

    Returns the name (column name in the underlying Pandas dataframe) of the dimension.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.dtype", + "title": "dtype property", + "text": "

    Returns the Pandas data type of the dimension column.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.__init__", + "title": "__init__(df, column, caching=CachingStrategy.LAZY)", + "text": "

    Initializes a new Dimension from a Pandas dataframe and a column name.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.__getattr__", + "title": "__getattr__(name)", + "text": "

    Dynamically resolves a Filter based on member names from the dimension. This enables a more natural access to the cube data using the Python dot notation.

    Member names need to be valid Python identifier/variable name. CubedPandas applies the following rules to resolve member names: - If a member name is also a valid Python identifier, it can be used directly. e.g., Apple - Member name resolving is case-insensitive, e.g., apple will resolve Apple. - White spaces in member names are replaced by underscores, e.g., best_offer will resolve best offer. - Leading numbers in a member name are replaced by underscores, e.g., _2_cute will resolve 2 cute. - Leading and trailing underscores are ignored/removed, e.g., hello will resolve hello. - All other special characters are removed, e.g., 12/4 cars is the same as 124_cars.

    • If the name is not a valid Python identifier (e.g. contains special characters), the slicer method needs to be used to resolve the member name. e.g., 12/4 cars is a valid name for a value

    If the name is not a valid Python identifier (e.g. contains special characters), the slicer method needs to be used to resolve the member name. e.g., 12/4 cars is a valid name for a value in a Pandas dataframe column, but not a valid Python identifier/variable name, hence dimension[\"12/4 cars\"] needs to be used to return the member.

    Parameters:

    • name \u2013

      Name of a member or measure in the cube.

    Returns:

    • \u2013

      A Cell object that represents the cube data related to the address.

    Samples

    cdf = cubed(df) cdf.Online.Apple.cost 50

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.clear_cache", + "title": "clear_cache()", + "text": "

    Clears the cache of the Dimension.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.wildcard_filter", + "title": "wildcard_filter(pattern)", + "text": "

    Returns a list of members that match the given wildcard pattern.

    Parameters:

    • pattern \u2013

      A wildcard pattern to filter the dimension members.

    Returns:

    • (bool, list) \u2013

      A new DimensionFilter object.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.count", + "title": "count(member)", + "text": "

    Returns the number of rows in the underlying dataframe where the dimension column contains the given member.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.choice", + "title": "choice()", + "text": "

    Return a random member from the dimension.

    See https://docs.python.org/3/library/random.html#random.choice for more information.

    Returns:

    • \u2013

      Return a random member from the dimension.

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.choices", + "title": "choices(k=1, weights=None, cum_weights=None)", + "text": "

    Return a k sized list of members chosen from the dimension (with replacement).

    See https://docs.python.org/3/library/random.html#random.choices for more information.

    Returns:

    • \u2013

      Return a k sized list of members chosen from the dimension (with replacement).

    " + }, + { + "location": "class-dimension/#cubedpandas.schema.Dimension.sample", + "title": "sample(k=1, counts=None)", + "text": "

    Return a k sized list of unique members chosen from the dimension (without replacement).

    See https://docs.python.org/3/library/random.html#random.sample for more information.

    Returns:

    • \u2013

      Return a k sized list of unique members chosen from the dimension (without replacement).

    " + }, + { + "location": "class-measure/", + "title": "Measure class", + "text": "

    Represents a measure within a Cube. Each measure is mapped to a column in the underlying Pandas dataframe.

    " + }, + { + "location": "class-measure/#cubedpandas.schema.Measure.column", + "title": "column property", + "text": "

    Returns the column name in underlying Pandas dataframe the measure refers to.

    " + }, + { + "location": "class-measure/#cubedpandas.schema.Measure.df", + "title": "df: pd.DataFrame property", + "text": "

    Returns the underlying Pandas dataframe of the cube.

    " + }, + { + "location": "class-pandas-ext/", + "title": "Extension to Pandas", + "text": "

    lorem ipsum

    A Pandas extension that provides the CubedPandas 'cubed' accessor for Pandas dataframes.

    " + }, + { + "location": "class-pandas-ext/#cubedpandas.pandas_extension.CubedPandasAccessor.cube", + "title": "cube property", + "text": "

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • read_only \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    • ignore_case \u2013

      (optional) If set to True, the case of member names will be ignored, 'Apple' and 'apple' will be treated as the same member. If set to False, member names are case-sensitive, 'Apple' and 'apple' will be treated as different members. Default value is True.

    • ignore_key_errors \u2013

      (optional) If set to True, key errors for members of dimensions will be ignored and cell values will return 0.0 or None if no matching record exists. If set to False, key errors will be raised as exceptions when accessing cell values for non-existing members. Default value is True.

    • caching \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • eager_evaluation \u2013

      (optional) If set to True, the cube will evaluate the context eagerly, i.e. when the context is created. Eager evaluation is recommended for most use cases, as it simplifies debugging and error handling. If set to False, the cube will evaluate the context lazily, i.e. only when the value of a context is accessed/requested.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n
    " + }, + { + "location": "class-schema/", + "title": "SalesDataModel class", + "text": "

    Defines a multidimensional schema, for cell-based data access to a Pandas dataframe using an Cube.

    The schema defines the dimensions and measures of the cube and can be either inferred from the underlying Pandas dataframe automatically or defined explicitly. The schema can be validated against the Pandas dataframe to ensure the schema is valid for the table.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.dimensions", + "title": "dimensions: DimensionCollection property", + "text": "

    Returns the dimensions of the schema.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.measures", + "title": "measures: MeasureCollection property", + "text": "

    Returns the measures of the schema.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.__init__", + "title": "__init__(df=None, schema=None, caching=CachingStrategy.LAZY)", + "text": "

    Initializes a new schema for a Cube upon a given Pandas dataframe. If the dataframe is not provided, the schema needs to be built manually and can also not be validated against the Pandas dataframe.

    For building a schema manually, you can either create a new schema from scratch or you can load, extend and modify an existing schema as defined by parameter schema. The parameter schema can either be another Schema object, a Python dictionary containing valid schema information, a json string containing valid schema information or a file name or path to a json file containing valid schema information.

    :param df: (optional) the Pandas dataframe to build the schema from or for. :param schema: (optional) a schema to initialize the Schema with. The parameter schema can either be another Schema object, a Python dictionary containing valid schema information, a json string containing valid schema information or a file name or path to a json file containing valid schema information. :param caching: The caching strategy to be used for the Cube. Default is CachingStrategy.LAZY. Please refer to the documentation of 'CachingStrategy' for more information.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.validate", + "title": "validate(df)", + "text": "

    Validates the schema against an existing Pandas dataframe.

    If returned True, the schema is valid for the given Pandas dataframe and can be used to access its data. Otherwise, the schema is not valid and will or may lead to errors when accessing its data.

    :param df: The Pandas dataframe to validate the schema against.

    :return: Returns True if the schema is valid for the given Pandas dataframe, otherwise False.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.infer_schema", + "title": "infer_schema(exclude=None)", + "text": "

    Infers a multidimensional schema from the Pandas dataframe of the Schema or another Pandas dataframe by analyzing the columns of the table and their contents.

    This process can be time-consuming for large tables. For such cases, it is recommended to infer the schema only from a sample of the records by setting parameter 'sample_records' to True. By default, the schema is inferred from and validated against all records.

    The inference process tries to identify the dimensions and their hierarchies of the cube as well as the measures of the cube. If no schema cannot be inferred, an exception is raised.

    By default, string, datetime and boolean columns are assumed to be measure columns and numerical columns are assumed to be measures for cube computations. By default, all columns of the Pandas dataframe will be used to infer the schema. However, a subset of columns can be specified to infer the schema from. The subset needs to contain at least two columns, one for a single dimensions and one for a single measures.

    For more complex tables it is possible or even likely that the resulting schema does not match your expectations or requirements. For such cases, you will need to build your schema manually. Please refer the documentation for further details on how to build a schema manually.

    :param exclude: (optional) a list of either column names or ordinal column ids to exclude when inferring the schema.

    :return: Returns the inferred schema.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.from_dict", + "title": "from_dict(dictionary) classmethod", + "text": "

    Creates a new schema from a dictionary containing schema information for a Cube. Please refer to the documentation for further details on valid schema definitions.

    :param dictionary: The dictionary containing the schema information. :return: Returns a new schema object. :exception: Raises an exception if the schema information is not valid or incomplete.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.from_json", + "title": "from_json(json_string) classmethod", + "text": "

    Creates a new schema from a json string containing schema information for a Cube. If the json string is not valid and does refer to a file that contains a valid schema in json format, an exception is raised. Please refer to the documentation for further details on valid schema definitions.

    :param json_string: The json string containing the schema information. :return: Returns a new schema object. :exception: Raises an exception if the schema information is not valid or incomplete.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.to_dict", + "title": "to_dict()", + "text": "

    Converts the schema into a dictionary containing schema information for an Cube.

    :return: Returns a dictionary containing the schema information.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.to_json", + "title": "to_json()", + "text": "

    Converts the schema into a dictionary containing schema information for an Cube.

    :return: Returns a dictionary containing the schema information.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.save_as_json", + "title": "save_as_json(file_name)", + "text": "

    Saves the schema as a json file.

    :param file_name: The name of the file to save the schema to.

    " + }, + { + "location": "class-schema/#cubedpandas.schema.Schema.__len__", + "title": "__len__()", + "text": "

    Returns the number of dimensions of the schema.

    " + }, + { + "location": "contributing/", + "title": "Contributing", + "text": "" + }, + { + "location": "contributing/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "faq/", + "title": "FAQ", + "text": "" + }, + { + "location": "faq/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "installation/", + "title": "Installation", + "text": "" + }, + { + "location": "installation/#cubedpandas-installation", + "title": "CubedPandas Installation", + "text": "

    As always with Python packages, the easiest way to install CubedPandas is by using pip.

    pip install cubedpandas\n

    After installing CubedPandas, you are ready to go.

    You will find the latest version of CubedPandas on PyPi. Alternatively, you can clone the CubedPandas GitHub repo and build it on your own or integrate somehow it in your own source code. Please check the requirements.txt file for the necessary dependencies.

    " + }, + { + "location": "license/", + "title": "License", + "text": "" + }, + { + "location": "license/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "method-cubed/", + "title": "Wrapping a Pandas DataFrame into a CubedPandas Cube", + "text": "" + }, + { + "location": "method-cubed/#using-the-cubed-method", + "title": "Using the 'cubed()' Method", + "text": "

    The cubed function is the most convenient way to wrap and convert a Pandas dataframe into a CubedPandas cube. by the way, cdf is nice and short for a 'cubed data frame' following the Pandas convention of df for a 'data frame'.

    If no schema is provided when applying the cubed method, a schema will be automatically inferred from the DataFrame. By default, all numeric columns will be considered as measures, all other columns as dimensions of the cube.

    import pandas as pd\nfrom cubedpandas import cubed\n\ndf = pd.DataFrame({\"channel\": [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"product\": [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"sales\":   [100,      150,      300,      200,      250,      350     ],})\ncdf = cubed(df)    \nprint(cdf.Online)  # returns 550 = 100 + 150 + 300\n

    Sometimes, e.g. if you want an integer column to be considered as a dimension not as a measure column, you need to provide a schema. Here's a simple example of how to define and use a schema, here identical to schema that will be automatically inferred. For more information please refer to the Schema documentation.

    import pandas as pd\nfrom cubedpandas import cubed\n\ndf = pd.DataFrame({\"channel\": [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"product\": [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"sales\":   [100,      150,      300,      200,      250,      350     ],})\nschema = {\"dimensions\": [{\"column\":\"channel\"}, {\"column\": \"product\"}],\n          \"measures\":   [{\"column\":\"sales\"}]}\ncdf = cubed(df, schema=schema)\nprint(cdf.Online)  # returns 550 = 100 + 150 + 300\n
    " + }, + { + "location": "method-cubed/#using-the-cubed-extension-for-python", + "title": "Using the 'cubed' extension for Python", + "text": "

    After CubedPandas has been loaded, e.g. by import cubedpandas, you can also directly use the cubed extension for Pandas. The only difference to the cubed() function is, that you need to use the cubed attribute of the Pandas DataFrame and either slice it with the [] operator or get access to the cube or any context using the . operator.

    import pandas as pd\nimport cubedpandas\n\ndf = pd.DataFrame({\"channel\": [\"Online\", \"Online\", \"Online\", \"Retail\", \"Retail\", \"Retail\"],\n                   \"product\": [\"Apple\",  \"Pear\",   \"Banana\", \"Apple\",  \"Pear\",   \"Banana\"],\n                   \"sales\":   [100,      150,      300,      200,      250,      350     ],})\n\ncdf = df.cubed.cube  # return a reference to the cube, just 'df.cubed' will not work.\n# or directly access any context the cube either by slicing with the [] operator\nx = df.cubed[\"Online\", \"Apple\", \"sales\"]\n# or by using the . operator\ny = df.cubed.Online.Apple.sales\n\nassert(x == y == 100)\n
    " + }, + { + "location": "method-cubed/#cubedpandas.common.cubed", + "title": "cubed(df, schema=None, infer_schema=True, exclude=None, caching=CachingStrategy.LAZY, caching_threshold=EAGER_CACHING_THRESHOLD, read_only=True)", + "text": "

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df (DataFrame) \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema (bool, default: True ) \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude (str | list | tuple | None, default: None ) \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • caching (CachingStrategy, default: LAZY ) \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold (int, default: EAGER_CACHING_THRESHOLD ) \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • read_only (bool, default: True ) \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n2\n
    " + }, + { + "location": "method-cubed/#cubedpandas.pandas_extension.CubedPandasAccessor", + "title": "CubedPandasAccessor", + "text": "

    A Pandas extension that provides the CubedPandas 'cubed' accessor for Pandas dataframes.

    " + }, + { + "location": "method-cubed/#cubedpandas.pandas_extension.CubedPandasAccessor.cube", + "title": "cube property", + "text": "

    Wraps a Pandas dataframes into a cube to provide convenient multi-dimensional access to the underlying dataframe for easy aggregation, filtering, slicing, reporting and data manipulation and write back.

    Parameters:

    • df \u2013

      The Pandas dataframe to be wrapped into the CubedPandas Cube object.

    • schema \u2013

      (optional) A schema that defines the dimensions and measures of the Cube. If not provided, the schema will be inferred from the dataframe if parameter infer_schema is set to True. For further details please refer to the documentation of the Schema class. Default value is None.

    • infer_schema \u2013

      (optional) If no schema is provided and infer_schema is set to True, a suitable schema will be inferred from the unerlying dataframe. All numerical columns will be treated as measures, all other columns as dimensions. If this behaviour is not desired, a schema must be provided. Default value is True.

    • exclude \u2013

      (optional) Defines the columns that should be excluded from the cube if no schema is provied. If a column is excluded, it will not be part of the schema and can not be accessed through the cube. Excluded columns will be ignored during schema inference. Default value is None.

    • read_only \u2013

      (optional) Defines if write backs to the underlying dataframe are permitted. If read_only is set to True, write back attempts will raise an PermissionError. If read_only is set to False, write backs are permitted and will be pushed back to the underlying dataframe. Default value is True.

    • ignore_case \u2013

      (optional) If set to True, the case of member names will be ignored, 'Apple' and 'apple' will be treated as the same member. If set to False, member names are case-sensitive, 'Apple' and 'apple' will be treated as different members. Default value is True.

    • ignore_key_errors \u2013

      (optional) If set to True, key errors for members of dimensions will be ignored and cell values will return 0.0 or None if no matching record exists. If set to False, key errors will be raised as exceptions when accessing cell values for non-existing members. Default value is True.

    • caching \u2013

      (optional) A caching strategy to be applied for accessing the cube. recommended value for almost all use cases is CachingStrategy.LAZY, which caches dimension members on first access. Caching can be beneficial for performance, but may also consume more memory. To cache all dimension members eagerly (on initialization of the cube), set this parameter to CachingStrategy.EAGER. Please refer to the documentation of 'CachingStrategy' for more information. Default value is CachingStrategy.LAZY.

    • caching_threshold \u2013

      (optional) The threshold as 'number of members' for EAGER caching only. If the number of distinct members in a dimension is below this threshold, the dimension will be cached eargerly, if caching is set to CacheStrategy.EAGER or CacheStrategy.FULL. Above this threshold, the dimension will be cached lazily. Default value is EAGER_CACHING_THRESHOLD, equivalent to 256 unique members per dimension.

    • eager_evaluation \u2013

      (optional) If set to True, the cube will evaluate the context eagerly, i.e. when the context is created. Eager evaluation is recommended for most use cases, as it simplifies debugging and error handling. If set to False, the cube will evaluate the context lazily, i.e. only when the value of a context is accessed/requested.

    Returns:

    • \u2013

      A new Cube object that wraps the dataframe.

    Raises:

    • PermissionError \u2013

      If writeback is attempted on a read-only Cube.

    • ValueError \u2013

      If the schema is not valid or does not match the dataframe or if invalid dimension, member, measure or address agruments are provided.

    Examples:

    >>> df = pd.value([{\"product\": [\"A\", \"B\", \"C\"]}, {\"value\": [1, 2, 3]}])\n>>> cdf = cubed(df)\n>>> cdf[\"product:B\"]\n
    " + }, + { + "location": "readme_mkdocs/", + "title": "CubedPandas website & documentation how-to guide", + "text": "

    by Thomas Zeutschler, June 2024

    " + }, + { + "location": "readme_mkdocs/#introduction", + "title": "Introduction", + "text": "

    The CubedPandas website & documentation is made with MkDocs. MkDocs is a static site generator that creates a website from markdown files and from the actual source code of the project. The CubedPandas website will be hosted on GitHub pages: https://zeutschler.github.io/cubedpandas/.

    The documentation is structured as follows:

    • Homepage - Short Intro to CubedPandas, features, installation, sample code, etc.
    • User documentation - The documentation will be used to explain the usage of the tool to the users.
    • Developer documentation - The documentation will be used to explain the architecture and the design of the tool to developers.
    • Blog - Posts and news about CubedPandas features, development, best practices, etc.
    " + }, + { + "location": "readme_mkdocs/#how-to-build-the-documentation", + "title": "How to build the documentation", + "text": "

    The documentation can be built by using the mkdocs command line tool from the root directory of the project:

    mkdocs build --clean --site-dir 'pages/'\n
    " + }, + { + "location": "readme_mkdocs/#how-to-run-and-view-the-documentation-locally", + "title": "How to run and view the documentation locally", + "text": "

    The documentation can be run and viewed locally by using the mkdocs command line tool from the root directory of the project

    mkdocs serve\n
    This will start a local web server that serves the documentation. Edits to the markdown files will be automatically reloaded in the browser. The documentation is the available at the following URL:

    http://127.0.0.1:8000/data-model-generator/\n
    To stop the local web server, press CTRL+C in the command line tool. That's all, enjoy!

    Additional information on how to use MkDocs can be found in the MkDocs documentation and in the following guide RealPython: Build Your Python Project Documentation With MkDocs.

    " + }, + { + "location": "release-notes/", + "title": "Release Notes", + "text": "" + }, + { + "location": "release-notes/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "thomas/", + "title": "About me", + "text": "" + }, + { + "location": "thomas/#sorry-well-add-that-later", + "title": "Sorry, we'll add that later...", + "text": "

    The current focus of development is on the core features of CubedPandas. Once finished I'll work on this documention...

    !!! Note: To stay tuned for updates and new releases of CubePandas, please add a watch to the CubedPandas GitHub repo.

    Thomas

    " + }, + { + "location": "blog/", + "title": "Blog", + "text": "" + } + ] +} \ No newline at end of file diff --git a/pages/sitemap.xml b/pages/sitemap.xml index 27cdb65..e65a3b3 100644 --- a/pages/sitemap.xml +++ b/pages/sitemap.xml @@ -25,6 +25,11 @@ 2024-09-06 daily + + https://zeutschler.github.io/cubedpandas/best-practise/ + 2024-09-06 + daily + https://zeutschler.github.io/cubedpandas/best-practises/ 2024-09-06 @@ -105,11 +110,6 @@ 2024-09-06 daily - - https://zeutschler.github.io/cubedpandas/use-cases/ - 2024-09-06 - daily - https://zeutschler.github.io/cubedpandas/blog/ 2024-09-06 diff --git a/pages/sitemap.xml.gz b/pages/sitemap.xml.gz index d0862d6bd95b6ff2d92a8e2626cbc2a340bfed19..441638a168831dc9417e48e439016f2643e61ac5 100644 GIT binary patch delta 331 zcmV-R0kr;%1Be5V9e>B~JO%1KO+jcHQz^GS!FEP&V%li=C~-<*PrpFX#C_Vqxg@q6 z=cg$8+o|^#XP0M;WP;xntFkJV=vC0hce~>A<6XQK`({y((GsR*4)(Mw`jmz@DW_>F z$s}|zP#Iz;N%h$v`Ebgr+!oDZna3@`ytrq?Eyl7lsUO?Y1gXXr z^#ESybhW&NKko)gF6d@mtv90Dis~hYJbX`Kst4cUG2*GwVC=P)Po9y}Eq{%dgD|u< zbXB!FO#Sts2F-w2<4tIZabZ zCZU6Y$`CtAs?Q6O52dWiZP6^2S#AO5*Lr^zbWdig2jAf_;;GSK?6sCpnvv8kf8n9EHgr|B zI!yibpa#u=SmoVGsNfS`5+B~IVFRm;Lw_q^Y=|8Kjgij@&kMon2G6|+2i%+*qG`vJ zTP_B;M>sK{G@cSz3yDWoutwz!#vva#OnuPz`oN+3Mg;Bf;J$QF#Ddr6(tyH;#G)U| hm%*|4p^f=a74T_t{~z-{4dhRK^8-*-4+jJg008(zp&$SN diff --git a/pages/thomas/index.html b/pages/thomas/index.html index 0d53635..b409dab 100644 --- a/pages/thomas/index.html +++ b/pages/thomas/index.html @@ -401,11 +401,11 @@
  • - + - Use Cases + Best Practise From 277a67fb1283f40ac2acb6db7e13af783bc34837 Mon Sep 17 00:00:00 2001 From: Thomas Zeutschler Date: Fri, 6 Sep 2024 21:38:09 +0200 Subject: [PATCH 2/2] slice refactored --- .idea/cubedpandas.iml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.idea/cubedpandas.iml b/.idea/cubedpandas.iml index 2d145dd..ffa8ad3 100644 --- a/.idea/cubedpandas.iml +++ b/.idea/cubedpandas.iml @@ -4,8 +4,6 @@ - -