From b6ede7bfd1190088e02559298c4f30a853762d54 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 10:17:49 -0300 Subject: [PATCH 01/32] updated docstrings --- dbastable/_sqldb.py | 265 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 228 insertions(+), 37 deletions(-) diff --git a/dbastable/_sqldb.py b/dbastable/_sqldb.py index bd9a380..0e5ede2 100644 --- a/dbastable/_sqldb.py +++ b/dbastable/_sqldb.py @@ -56,7 +56,7 @@ def _dict2row(self, table, row, add_columns=False): def _add_data_dict(self, table, data, add_columns=False, skip_sanitize=False): - """Add data sotred in a dict to the table.""" + """Add data sotred as columns in a dict to the table.""" # sanitization of keys will be done in the methods below row_list = self._dict2row(table, row=data, add_columns=add_columns) @@ -103,6 +103,10 @@ def add_rows(self, table, data, add_columns=False, skip_sanitize=False): as column names. add_columns : bool (optional) If True, add missing columns to the table. + skip_sanitize: bool (optional) + If True, skip the sanitization of the data. Use this if the data + is already sanitized. Use with caution, as it may cause problems + with your data. """ self._check_table(table) if isinstance(data, (list, tuple)): @@ -132,7 +136,15 @@ def add_rows(self, table, data, add_columns=False, skip_sanitize=False): f'Not {type(data)}.') def delete_row(self, table, index): - """Delete a row from the table.""" + """Delete a row from the table. + + Parameters + ---------- + table: str + Name of the table to delete the row. + index: int + Index of the row to delete. + """ self._check_table(table) row = self._fix_row_index(index, len(self[table])) comm = f"DELETE FROM {table} WHERE {_ID_KEY}={row+1};" @@ -140,13 +152,39 @@ def delete_row(self, table, index): self._update_indexes(table) def get_row(self, table, index): - """Get a row from the table.""" + """Get a row from the table. + + Parameters + ---------- + table: str + Name of the table to get the row. + index: int + Index of the row to get. + + Returns + ------- + res : `~dbastable._sqldb.SQLRow` + The row object viewer. + """ self._check_table(table) index = self._fix_row_index(index, len(self[table])) return SQLRow(self, table, index) def set_row(self, table, row, data): - """Set a row in the table.""" + """Set a row in the table. + + Parameters + ---------- + table: str + Name of the table to set the row. + row: int + Index of the row to set. + data: dict, list or `~numpy.ndarray` + Data to set in the row. If dict, keys are column names, + if list, the order of the values is the same as the order of + the column names. If `~numpy.ndarray`, dtype names are interpreted + as column names. + """ row = self._fix_row_index(row, self.count(table)) colnames = self.column_names(table) @@ -170,24 +208,35 @@ def set_row(self, table, row, data): class _ColumnAccessorMixin: - """Access and manipulate columns.""" - _columns_cache = None + """Access and manipulate columns. + + Notes + ----- + - the column cache will stored in the same variable as the table cache. + """ def column_names(self, table, do_not_decode=False): - """Get the column names of the table.""" - self._check_table(table) + """Get the column names of the table. - # if cache is not initialized, initialize it - if self._columns_cache is None: - self._columns_cache = {} + Parameters + ---------- + table: str + Name of the table to get the column names. + do_not_decode: bool (optional) + If True, do not decode Base32 encoded column names. This is useful + to get the real column names in the database. If False, the + decoded names are returned. + """ + # this will initialize the cache if needed + self._check_table(table) # if names are cached, use the cached names - if table in self._columns_cache.keys(): + if self._table_cache[table] is not None: if do_not_decode: - return self._columns_cache[table] + return self._table_cache[table] return [self._decode_b32(i) if i.startswith(_B32_COL_PREFIX) else i - for i in self._columns_cache[table]] + for i in self._table_cache[table]] # we get the column names from the cursor descriptor, so we need to # select a line @@ -213,12 +262,22 @@ def column_names(self, table, do_not_decode=False): columns.append(i[0].lower()) # add columns to the cache - self._columns_cache[table] = columns + self._table_cache[table] = columns return columns def add_column(self, table, column, data=None): - """Add a column to a table.""" + """Add a column to a table. + + Parameters + ---------- + table: str + Name of the table to add the column. + column: str + Name of the column to add. + data: list (optional) + List of values to add to the column. If None, no data is added. + """ self._check_table(table) # check if the original column name is already in the table @@ -236,14 +295,22 @@ def add_column(self, table, column, data=None): self.execute(comm) # add column to the cache - self._columns_cache[table].append(col) + self._table_cache[table].append(col) # adding the data to the table if data is not None: self.set_column(table, column, data) def delete_column(self, table, column): - """Delete a column from a table.""" + """Delete a column from a table. + + Parameters + ---------- + table: str + Name of the table to delete the column. + column: str + Name of the column to delete. + """ self._check_table(table) if column in (_ID_KEY, 'table', 'default'): @@ -257,7 +324,7 @@ def delete_column(self, table, column): self.execute(comm) # remove column from the cache - self._columns_cache[table].remove(column) + self._table_cache[table].remove(column) def set_column(self, table, column, data): """Set a column in the table.""" @@ -292,7 +359,7 @@ def get_column(self, table, column): class _TableAccessorMixin: """Access and manipulate tables.""" - _table_cache = None + _table_cache = None # a dictionary to store table and column names @property def table_names(self): @@ -300,9 +367,10 @@ def table_names(self): # use a cache for table names. Avoid querying the database every time if self._table_cache is None: comm = "SELECT name FROM sqlite_master WHERE type='table';" - self._table_cache = [i[0] for i in self.execute(comm) - if i[0] != 'sqlite_sequence'] - return list(self._table_cache) + tables = [i[0] for i in self.execute(comm) + if i[0] != 'sqlite_sequence'] + self._table_cache = {t: None for t in tables} + return list(self._table_cache.keys()) def _check_table(self, table): """Check if the table exists in the database.""" @@ -310,7 +378,19 @@ def _check_table(self, table): raise KeyError(f'Table "{table}" does not exist.') def add_table(self, table, columns=None, data=None): - """Create a table in database.""" + """Create a table in database. + + Parameters + ---------- + table : str + Name of the table to create. + columns : list (optional) + List of column names to create in the table. If None, no columns + are created. + data : list (optional) + List of rows to add to the table. If None, no rows are added. + Each row is a list of values in the same order as the columns. + """ self.logger.debug('Initializing "%s" table.', table) if table in self.table_names: raise ValueError('table {table} already exists.') @@ -331,22 +411,39 @@ def add_table(self, table, columns=None, data=None): self.execute(comm) # add table to the cache - self._table_cache.append(table) + self._table_cache[table] = None if data is not None: self.add_rows(table, data, add_columns=True) def drop_table(self, table): - """Drop a table from the database.""" + """Drop a table from the database. + + Parameters + ---------- + table : str + Name of the table to drop. + """ self._check_table(table) comm = f"DROP TABLE {table};" self.execute(comm) # remove table from the cache - self._table_cache.remove(table) + del self._table_cache[table] def get_table(self, table): - """Get a table from the database.""" + """Get a table from the database. + + Parameters + ---------- + table : str + Name of the table to get. + + Returns + ------- + res : `~dbastable._sqldb.SQLTable` + The table object viewer. + """ self._check_table(table) return SQLTable(self, table) @@ -355,13 +452,40 @@ class _ItemAccessorMixin: """Access and manipulate items.""" def get_item(self, table, column, row): - """Get an item from the table.""" + """Get an item from the table. + + Parameters + ---------- + table: str + Name of the table to get the item. + column: str + Name of the column to get the item. + row: int + Index of the row to get the item. + + Returns + ------- + res : object + The item value in the table. + """ self._check_table(table) row = self._fix_row_index(row, len(self[table])) return self.get_column(table, column)[row] def set_item(self, table, column, row, value): - """Set a value in a cell.""" + """Set a value in a cell. + + Parameters + ---------- + table: str + Name of the table to set the item. + column: str + Name of the column to set the item. + row: int + Index of the row to set the item. + value: object + Value to set in the cell. + """ row = self._fix_row_index(row, self.count(table)) col = self._get_column_name(table, column) value = self._sanitize_value(value) @@ -406,13 +530,31 @@ def __init__(self, db=None, autocommit=True, logger=None, self.autocommit = autocommit self.logger = logger or logging.getLogger(__name__) self._allow_b32_colnames = allow_b32_colnames + + # use the sqlite3 trace callback to log all sql commands self._con.set_trace_callback(lambda x: self.logger.debug('executing sql: %s', x.replace('\n', ' '))) def execute(self, command, arguments=None): - """Execute a SQL command in the database.""" + """Execute a SQL command in the database. + + Parameters + ---------- + command : str + SQL command to execute. + arguments : list or tuple (optional) + Arguments to pass to the command. A '?' in the command will be + replaced by the argument. If None, no arguments are passed. + + Returns + ------- + res : list + List of tuples with the results of the command. + """ try: + # sqlite3 have problems with None arguments + # so we should not pass any arguments if None if arguments is None: self._cur.execute(command) else: @@ -427,7 +569,21 @@ def execute(self, command, arguments=None): return res def executemany(self, command, arguments): - """Execute a SQL command in the database.""" + """Execute a SQL command in the database using multiple entries. + + Parameters + ---------- + command : str + SQL command to execute. + arguments : list or tuple (optional) + Arguments to pass to the command. A '?' in the command will be + replaced by the argument. If None, no arguments are passed. + + Returns + ------- + res : list + List of tuples with the results of the command. + """ try: self._cur.executemany(command, arguments) res = self._cur.fetchall() @@ -444,7 +600,22 @@ def commit(self): self._con.commit() def count(self, table, where=None): - """Get the number of rows in the table.""" + """Get the number of rows in the table. + + Parameters + ---------- + table: str + Name of the table to count from. + where : dict (optional) + Dictionary of conditions to count rows. Keys are column names, + values are values to compare. All rows equal to the values will + be counted. If None, all rows are counted. + + Returns + ------- + res : int + Number of rows in the table. + """ self._check_table(table) comm = "SELECT COUNT(*) FROM " comm += f"{table} " @@ -460,16 +631,28 @@ def select(self, table, columns=None, where=None, order=None, limit=None, Parameters ---------- + table: str + Name of the table to select from. columns : list (optional) List of columns to select. If None, select all columns. where : dict (optional) Dictionary of conditions to select rows. Keys are column names, - values are values to compare. All rows equal to the values will - be selected. If None, all rows are selected. + values are values to compare. If it is a dict of values, all rows + equal to the values will be selected. If it is a dict of + `~dbastable.where.Where` objects, the conditions will be combined + with the AND operator. If None, all rows are selected. order : str (optional) Column name to order by. limit : int (optional) Number of rows to select. + offset : int (optional) + Number of rows to skip before selecting. + + Returns + ------- + res : list + List of tuples with the selected rows. Each row values will be + returned in a tuple in the same order as the columns. """ self._check_table(table) if columns is None: @@ -517,7 +700,14 @@ def select(self, table, columns=None, where=None, order=None, limit=None, return res def copy(self, indexes=None): - """Get a copy of the database.""" + """Get a copy of the database. + + Parameters + ---------- + indexes : dict, optional + A dictionary of table names and the indexes of the rows in each + table to copy. If None, all rows are copied. + """ return self.__copy__(indexes=indexes) @property @@ -592,7 +782,8 @@ def __copy__(self, indexes=None): Parameters ---------- indexes : dict, optional - A dictionary of table names and their indexes to copy. + A dictionary of table names and the indexes of the rows in each + table to copy. If None, all rows are copied. Returns ------- From 2eaf059169402fff831653a4e66ad26d51ce06cc Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 10:42:36 -0300 Subject: [PATCH 02/32] add dependencies for docs --- .github/workflows/docs.yml | 42 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..355ba29 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,42 @@ +name: Documentation Building + +on: + pull_request: + push: + branches: [ main ] + workflow_dispatch: + schedule: + - cron: 0 7 * * 1 # 7 A.M. mon + +concurrency: + group: docs-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + tests: + name: Docs Building + runs-on: ubuntu-latest + if: "!(contains(github.event.head_commit.message, '[skip ci]') || contains(github.event.head_commit.message, '[ci skip]'))" + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: pip + cache-dependency-path: '**/pyproject.toml' + - name: Install base dependencies + run: | + python -m pip install --upgrade --quiet pip setuptools + - name: Install package dependencies + run: pip install -U .[docs] + - name: Build with Sphinx + run: | + cd docs + make html diff --git a/pyproject.toml b/pyproject.toml index cea406f..46ca439 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dependencies = ["numpy"] [project.optional-dependencies] test = ["coverage", "astropy"] -docs = ["sphinx", "pydata-sphinx-theme"] +docs = ["sphinx", "pydata-sphinx-theme", "sphinx-automodapi", "numpydoc"] [project.urls] Homepage = "https://github.com/juliotux/dbastable" From b99fea7d40f11ff0c98594428810b01894006b51 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 10:42:55 -0300 Subject: [PATCH 03/32] first scketch of the docs --- docs/api.rst | 5 +++++ docs/conf.py | 10 +++++++--- docs/index.rst | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 docs/api.rst diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..d0ade1b --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,5 @@ +.. automodapi:: dbastable + :members: + :undoc-members: + :show-inheritance: + :inherited-members: diff --git a/docs/conf.py b/docs/conf.py index 159e61d..5055da5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,11 +10,12 @@ extensions = [ 'sphinx.ext.intersphinx', + 'sphinx.ext.extlinks', 'sphinx.ext.todo', + 'sphinx.ext.mathjax', 'sphinx_automodapi.automodapi', 'numpydoc', - 'IPython.sphinxext.ipython_console_highlighting', - 'IPython.sphinxext.ipython_directive' + 'matplotlib.sphinxext.plot_directive' ] todo_include_todos = True @@ -55,5 +56,8 @@ default_role = 'py:obj' intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), - "numpy": ("https://docs.scipy.org/doc/numpy/", None) + "numpy": ("https://docs.scipy.org/doc/numpy/", None), + "matplotlib": ("https://matplotlib.org/", None), + "astropy": ('http://docs.astropy.org/en/latest/', None), + "pandas": ('https://pandas.pydata.org/pandas-docs/stable/', None) } diff --git a/docs/index.rst b/docs/index.rst index e69de29..aaa4de3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -0,0 +1,52 @@ +DBasTable +--------- + +Handle SQLite database tables just as if they were `Numpy structured arrays `_, `~astropy.table.Table` or `~pandas.DataFrame`. + +.. code-block:: python + + >>> from dbastable import SQLDatabase + >>> db = SQLDatabase('test.db', autocommit=True) + >>> db.add_table('table1') + >>> db['table1']['col1'] = [1, 2, 3, 4] + >>> db['table1']['col2'] = ['a', 'b', 'c', 'd'] + >>> print(db['table1'][2].values) + (3, 'c') + + +Installation +------------ + +The easiest way to install dbastable is via `pip `_:: + + pip install dbastable + +Alternatively, you can clone the repository and install it manually:: + + git clone + cd dbastable + pip install -U . + +or + + pip install -U git+https://github.com/juliotux/dbastable + + +Documentation +------------- + +The documentation is available at https://dbastable.readthedocs.io/en/latest/ + + +License +------- + +`dbastable` is licensed under the terms of the `MIT license `_. See the file "LICENSE" for information on the history of this software, terms & conditions for usage, and a DISCLAIMER OF ALL WARRANTIES. + +API Reference +------------- + +.. toctree:: + :maxdepth: 1 + + api From 6eb3fbe020bec515ecfad59a45e32d5ce3625177 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 10:44:04 -0300 Subject: [PATCH 04/32] gitignore generated files --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 09aa1b0..d39d16d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ .vscode version.py +# docs generated files +docs/_build +docs/api + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] From 0f92cf413a82202ab0dafc96298cc82221f478b7 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 10:45:57 -0300 Subject: [PATCH 05/32] removed matplotlib sphinx dependency --- docs/conf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 5055da5..7a319a9 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,8 +14,7 @@ 'sphinx.ext.todo', 'sphinx.ext.mathjax', 'sphinx_automodapi.automodapi', - 'numpydoc', - 'matplotlib.sphinxext.plot_directive' + 'numpydoc' ] todo_include_todos = True From 16c7599494441d1a9bbf147a7de1cb5e86e0f813 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 10:50:45 -0300 Subject: [PATCH 06/32] rtd building --- .readthedocs.yaml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..638a967 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,30 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +formats: + - pdf + - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - method: pip + path: . + extra_requirements: + - docs From b2df75b9719d0354d4b924c4e3cc7bdc05b57e87 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 11:19:44 -0300 Subject: [PATCH 07/32] updated general readme --- README.md | 40 +++++++++++++++++++++++++++++++++++++++- docs/index.rst | 16 ++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7d2185e..01f441e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # DBasTable -[![Unit Tests](https://github.com/juliotux/DBasTable/actions/workflows/unittests.yml/badge.svg)](https://github.com/juliotux/DBasTable/actions/workflows/unittests.yml) [![codecov](https://codecov.io/gh/juliotux/DBasTable/graph/badge.svg?token=r9kulm3ANZ)](https://codecov.io/gh/juliotux/DBasTable) +[![Unit Tests](https://github.com/juliotux/DBasTable/actions/workflows/unittests.yml/badge.svg)](https://github.com/juliotux/DBasTable/actions/workflows/unittests.yml) [![codecov](https://codecov.io/gh/juliotux/DBasTable/graph/badge.svg?token=r9kulm3ANZ)](https://codecov.io/gh/juliotux/DBasTable) [![Documentation Status](https://readthedocs.org/projects/dbastable/badge/?version=latest)](https://dbastable.readthedocs.io/en/latest/?badge=latest) A simplier way to access SQLite tables, just like Numpy structured arrarys or Pandas dataframes. @@ -29,3 +29,41 @@ As menioned above, we intended to perform only simple operations with this packa I'm not a SQL master, nor a digital security guru. I'm an astrophysicist that do some python. So, if you want to use it, use with care. # Install and Documentation + +## Installation + +The easiest way to install dbastable is via [pip](https://pip.pypa.io/en/stable/) + +``` +pip install dbastable +``` + +Alternatively, you can clone the repository and install it manually: + +``` +git clone +cd dbastable +pip install -U . +``` + +or + +``` +pip install -U git+https://github.com/juliotux/dbastable +``` + +Development version is also available in pip: + +``` +pip install -U --pre dbastable +``` + + +## Documentation + + +The documentation is available at https://dbastable.readthedocs.io/en/latest/ + +# License + +`dbastable` is licensed under the terms of the [MIT license](https://opensource.org/license/mit/). See the file `LICENSE` for information on the history of this software, terms & conditions for usage, and a DISCLAIMER OF ALL WARRANTIES. diff --git a/docs/index.rst b/docs/index.rst index aaa4de3..3f7c41c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,6 +13,22 @@ Handle SQLite database tables just as if they were `Numpy structured arrays >> print(db['table1'][2].values) (3, 'c') +Its design is based mainly in the way you interact with [Numpy structured arrays](https://numpy.org/doc/stable/user/basics.rec.html), [Pandas dataframes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) and [Astropy Tables](https://docs.astropy.org/en/stable/table/index.html), while keep the possibility of non-volatile and in-disk data storage. + +What is it for? +=============== + +The design of this package was made for *very simple interactions* with Python's sqlite implementation, so you can interact with sqlite databases without know SQL commands. So, if you need to work with simple tables, composed by columns of data in standard python formats and don't want to perform SQL queries manually, may be this package is for you. + +What is it *NOT* for? +===================== + +As menioned above, we intended to perform only simple operations with this package. So, *we intentionally limited the functionality*. Do not expect perform complex queries here. This package is for simplicity. There are several alternatives that are more feature-complete. + +Do not use in large production +============================== + +I'm not a SQL master, nor a digital security guru. I'm an astrophysicist that do some python. So, if you want to use it, use with care. Installation ------------ From 88a13317ffd71397fdac48bfdf7d3fc3cd1c2524 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 11:27:13 -0300 Subject: [PATCH 08/32] just more general comments --- docs/index.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 3f7c41c..037383b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,7 +13,7 @@ Handle SQLite database tables just as if they were `Numpy structured arrays >> print(db['table1'][2].values) (3, 'c') -Its design is based mainly in the way you interact with [Numpy structured arrays](https://numpy.org/doc/stable/user/basics.rec.html), [Pandas dataframes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) and [Astropy Tables](https://docs.astropy.org/en/stable/table/index.html), while keep the possibility of non-volatile and in-disk data storage. +Its design is based mainly in the way you interact with `Numpy structured arrays `_, while keep the possibility of non-volatile and in-disk data storage. What is it for? =============== @@ -43,10 +43,13 @@ Alternatively, you can clone the repository and install it manually:: cd dbastable pip install -U . -or +or:: pip install -U git+https://github.com/juliotux/dbastable +Development version is also available at pip:: + + pip install -U --pre dbastable Documentation ------------- From d928816dd435631a3312a4369aca5e3beb93b2c5 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 11:29:19 -0300 Subject: [PATCH 09/32] api: remove inheritance diagram --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index d0ade1b..b52d122 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,5 +1,5 @@ .. automodapi:: dbastable :members: :undoc-members: - :show-inheritance: :inherited-members: + :no-inheritance-diagram: From 3e0f147a2b80c3bbc27101d9e9e120e7f748193c Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 11:43:02 -0300 Subject: [PATCH 10/32] improve docstrings of SQLTable --- dbastable/_viewers.py | 157 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 136 insertions(+), 21 deletions(-) diff --git a/dbastable/_viewers.py b/dbastable/_viewers.py index 0b88048..7114d3d 100644 --- a/dbastable/_viewers.py +++ b/dbastable/_viewers.py @@ -4,18 +4,17 @@ class SQLTable: - """Handle an SQL table operations interfacing with the DB.""" + """Handle an SQL table operations interfacing with the DB. - def __init__(self, db, name): - """Initialize the table. + Parameters + ---------- + db : SQLDatabase + The parent database object. + name : str + The name of the table in the database. + """ - Parameters - ---------- - db : SQLDatabase - The parent database object. - name : str - The name of the table in the database. - """ + def __init__(self, db, name): self._db = db self._name = name @@ -41,7 +40,31 @@ def values(self): return self.select() def select(self, **kwargs): - """Select rows from the table.""" + """Select rows from the table. See `~dbastable.SQLDatabase.select`. + + Parameters + ---------- + columns : list (optional) + List of columns to select. If None, select all columns. + where : dict (optional) + Dictionary of conditions to select rows. Keys are column names, + values are values to compare. If it is a dict of values, all rows + equal to the values will be selected. If it is a dict of + `~dbastable.where.Where` objects, the conditions will be combined + with the AND operator. If None, all rows are selected. + order : str (optional) + Column name to order by. + limit : int (optional) + Number of rows to select. + offset : int (optional) + Number of rows to skip before selecting. + + Returns + ------- + res : list + List of tuples with the selected rows. Each row values will be + returned in a tuple in the same order as the columns. + """ return self._db.select(self._name, **kwargs) def as_table(self): @@ -55,40 +78,132 @@ def as_table(self): names=self.column_names) def add_column(self, name, data=None): - """Add a column to the table.""" + """Add a column to the table. See `~dbastable.SQLDatabase.add_column`. + + Parameters + ---------- + name : str + Column name. + data : list (optional) + """ self._db.add_column(self._name, name, data=data) def add_rows(self, data, add_columns=False): - """Add a row to the table.""" - # If keymappging is used, only dict and list + """Add a row to the table. See `~dbastable.SQLDatabase.add_rows`. + + Parameters + ---------- + data : dict, list or `~numpy.ndarray` + Data to add to the table. If dict, keys are column names, + if list, the order of the values is the same as the order of + the column names. If `~numpy.ndarray`, dtype names are interpreted + as column names. + add_columns : bool (optional) + If True, add missing columns to the table. + """ self._db.add_rows(self._name, data, add_columns=add_columns) def get_column(self, column): - """Get a given column from the table.""" + """Get a given column from the table. + See `~dbastable.SQLDatabase.get_column`. + + Parameters + ---------- + column : str + Column name. + + Returns + ------- + res : `~dbastable.SQLColumn` + Column viewer object. + """ return self._db.get_column(self._name, column) def get_row(self, row): - """Get a given row from the table.""" + """Get a given row from the table. + See `~dbastable.SQLDatabase.get_row`. + + Parameters + ---------- + row : int + Row index. + + Returns + ------- + res : `~dbastable.SQLRow` + Row viewer object. + """ return self._db.get_row(self._name, row) def set_column(self, column, data): - """Set a given column in the table.""" + """Set a given column in the table. + See `~dbastable.SQLDatabase.set_column`. + + Parameters + ---------- + column : str + Column name. + data : list + List of values to set. + """ self._db.set_column(self._name, column, data) def set_row(self, row, data): - """Set a given row in the table.""" + """Set a given row in the table. + See `~dbastable.SQLDatabase.set_row`. + + Parameters + ---------- + row : int + Row index. + data : dict, list or `~numpy.ndarray` + Data to add to the table. If dict, keys are column names, + if list, the order of the values is the same as the order of + the column names. If `~numpy.ndarray`, dtype names are interpreted + as column names. + """ self._db.set_row(self._name, row, data) def delete_column(self, column): - """Delete a given column from the table.""" + """Delete a given column from the table. + See `~dbastable.SQLDatabase.delete_column`. + + Parameters + ---------- + column : str + Column name. + """ self._db.delete_column(self._name, column) def delete_row(self, row): - """Delete all rows from the table.""" + """Delete all rows from the table. + See `~dbastable.SQLDatabase.delete_row`. + + Parameters + ---------- + row : int + Row index. + """ self._db.delete_row(self._name, row) def index_of(self, where): - """Get the index of the rows that match the given condition.""" + """Get the index of the rows that match the given condition. + See `~dbastable.SQLDatabase.index_of`. + + Parameters + ---------- + where : dict + Dictionary of conditions to select rows. Keys are column names, + values are values to compare. If it is a dict of values, all rows + equal to the values will be selected. If it is a dict of + `~dbastable.where.Where` objects, the conditions will be combined + with the AND operator. If None, all rows are selected. + + Returns + ------- + res : list + List of row indexes. + """ return self._db.index_of(self._name, where) def _resolve_tuple(self, key): From 3dd95a9568121eba906a49cec3fd895f4f93a99d Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 11:44:36 -0300 Subject: [PATCH 11/32] improved docstrings of SQLRow and SQLColumn --- dbastable/_viewers.py | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/dbastable/_viewers.py b/dbastable/_viewers.py index 7114d3d..efb7d3a 100644 --- a/dbastable/_viewers.py +++ b/dbastable/_viewers.py @@ -280,10 +280,7 @@ def __repr__(self): class SQLColumn: - """Handle an SQL column operations interfacing with the DB.""" - - def __init__(self, db, table, name): - """Initialize the column. + """Handle an SQL column operations interfacing with the DB. Parameters ---------- @@ -292,8 +289,9 @@ def __init__(self, db, table, name): table : str The name of the table in the database. name : str - The column name in the table. - """ + The column name in the table.""" + + def __init__(self, db, table, name): self._db = db self._table = table self._name = name @@ -355,20 +353,18 @@ def __repr__(self): class SQLRow: - """Handle and SQL table row interfacing with the DB.""" + """Handle and SQL table row interfacing with the DB. - def __init__(self, db, table, row): - """Initialize the row. + Parameters + ---------- + db : SQLDatabase + The parent database object. + table : str + The name of the table in the database. + row : int + The row index in the table.""" - Parameters - ---------- - db : SQLDatabase - The parent database object. - table : str - The name of the table in the database. - row : int - The row index in the table. - """ + def __init__(self, db, table, row): self._db = db self._table = table self._row = row From 9a06aa7072b8488a38b74fcddfc833baab655343 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 11:54:19 -0300 Subject: [PATCH 12/32] add structure to general usage --- docs/general_usage.rst | 0 docs/index.rst | 13 ++++++++++++- docs/viewer_classes.rst | 0 docs/where_statements.rst | 0 4 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 docs/general_usage.rst create mode 100644 docs/viewer_classes.rst create mode 100644 docs/where_statements.rst diff --git a/docs/general_usage.rst b/docs/general_usage.rst new file mode 100644 index 0000000..e69de29 diff --git a/docs/index.rst b/docs/index.rst index 037383b..6f79224 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,7 @@ DBasTable --------- -Handle SQLite database tables just as if they were `Numpy structured arrays `_, `~astropy.table.Table` or `~pandas.DataFrame`. +Handle SQLite database tables just as if they were `Numpy structured arrays `_, `~astropy.table.Table` or `~pandas.DataFrame`. This package is intended to be a simple interface to SQLite databases, so you can interact with them without know SQL commands. .. code-block:: python @@ -56,6 +56,17 @@ Documentation The documentation is available at https://dbastable.readthedocs.io/en/latest/ +Usage +----- + +For a general usage guide, see the following sections: + +.. toctree:: + :maxdepth: 1 + + general_usage + viewer_classes + where_statements License ------- diff --git a/docs/viewer_classes.rst b/docs/viewer_classes.rst new file mode 100644 index 0000000..e69de29 diff --git a/docs/where_statements.rst b/docs/where_statements.rst new file mode 100644 index 0000000..e69de29 From 785ec9b39ec1a252dabbf36e6dd9acd7b12c62e7 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 12:12:02 -0300 Subject: [PATCH 13/32] more structure chnges --- docs/general_usage.rst | 2 ++ docs/index.rst | 1 + docs/nonconformant.rst | 2 ++ docs/viewer_classes.rst | 2 ++ docs/where_statements.rst | 2 ++ 5 files changed, 9 insertions(+) create mode 100644 docs/nonconformant.rst diff --git a/docs/general_usage.rst b/docs/general_usage.rst index e69de29..65c0843 100644 --- a/docs/general_usage.rst +++ b/docs/general_usage.rst @@ -0,0 +1,2 @@ +General Usage Guide +------------------- diff --git a/docs/index.rst b/docs/index.rst index 6f79224..07b5564 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -67,6 +67,7 @@ For a general usage guide, see the following sections: general_usage viewer_classes where_statements + nonconformant License ------- diff --git a/docs/nonconformant.rst b/docs/nonconformant.rst new file mode 100644 index 0000000..703eb98 --- /dev/null +++ b/docs/nonconformant.rst @@ -0,0 +1,2 @@ +Non-Conformant Column Names +--------------------------- diff --git a/docs/viewer_classes.rst b/docs/viewer_classes.rst index e69de29..3946930 100644 --- a/docs/viewer_classes.rst +++ b/docs/viewer_classes.rst @@ -0,0 +1,2 @@ +Viewer Classes +-------------- diff --git a/docs/where_statements.rst b/docs/where_statements.rst index e69de29..f87490e 100644 --- a/docs/where_statements.rst +++ b/docs/where_statements.rst @@ -0,0 +1,2 @@ +Where Statements and Queries +---------------------------- From 25d4afe4cf2e83871e8959bda3640a60234e2d06 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 12:20:40 -0300 Subject: [PATCH 14/32] update sanitizer to more sql standards --- dbastable/_sanitizer.py | 3 ++- dbastable/tests/test_sanitizer.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dbastable/_sanitizer.py b/dbastable/_sanitizer.py index b76c3f5..342d792 100644 --- a/dbastable/_sanitizer.py +++ b/dbastable/_sanitizer.py @@ -36,7 +36,8 @@ def _sanitize_key(self, key): # TODO: check for protected names if key.startswith(_B32_COL_PREFIX) or key == _ID_KEY: raise ValueError(f'{key} uses a protected name.') - if len([ch for ch in key if not ch.isalnum() and ch != '_']) != 0: + if len([ch for ch in key if not ch.isalnum() and ch != '_']) != 0 \ + or key[0].isdigit(): # if a name is invalid, encode it in base32 and add a prefix # if it is allowed if self._allow_b32_colnames: diff --git a/dbastable/tests/test_sanitizer.py b/dbastable/tests/test_sanitizer.py index 924660b..9da8261 100644 --- a/dbastable/tests/test_sanitizer.py +++ b/dbastable/tests/test_sanitizer.py @@ -96,6 +96,11 @@ def test_sanitize_dict(self): sanit = s._sanitize_colnames(d) self.assertEqual(sanit, {k.lower(): v for k, v in d.items()}) + def test_first_digit_error(self): + s = _Sanitizer(False) + with self.assertRaisesRegex(ValueError, 'Invalid'): + s._sanitize_colnames('2test') + class TestSanitizeGetColumnName(TestCaseWithNumpyCompare): def test_get_column_name(self): From 214fae8fe38fed402ddac6688ca2f3053ab06a50 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:01:47 -0300 Subject: [PATCH 15/32] fix b32 padding --- dbastable/_sanitizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbastable/_sanitizer.py b/dbastable/_sanitizer.py index 342d792..5e25e4e 100644 --- a/dbastable/_sanitizer.py +++ b/dbastable/_sanitizer.py @@ -1,5 +1,6 @@ import numpy as np import base64 +import math from ._def import _B32_COL_PREFIX, _ID_KEY @@ -9,8 +10,8 @@ def _colname_to_b32_decode(string): # ensure remove the prefix string = string.lstrip(_B32_COL_PREFIX) # base32 need 8 chars padding - remainder = len(string) % 8 - string += '='*(8-remainder) + pad = math.ceil(len(string) / 8) * 8 - len(string) + string += '='*pad return string From b0e3812c52a48dc3e342aaec354ff2b823aebd77 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:05:22 -0300 Subject: [PATCH 16/32] test length of b32 encoding --- dbastable/tests/test_sanitizer.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/dbastable/tests/test_sanitizer.py b/dbastable/tests/test_sanitizer.py index 9da8261..ce39310 100644 --- a/dbastable/tests/test_sanitizer.py +++ b/dbastable/tests/test_sanitizer.py @@ -101,6 +101,14 @@ def test_first_digit_error(self): with self.assertRaisesRegex(ValueError, 'Invalid'): s._sanitize_colnames('2test') + def test_b32_padding(self): + s = _Sanitizer(True) + for i in range(64): + bs = '-'+'a'*i + encoded = s._encode_b32(bs) + decoded = s._decode_b32(encoded) + self.assertEqual(bs, decoded) + class TestSanitizeGetColumnName(TestCaseWithNumpyCompare): def test_get_column_name(self): From 07dc93e3289708bb57af054a8cb7f823561d9bc8 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:06:51 -0300 Subject: [PATCH 17/32] non-conformant names docs --- docs/nonconformant.rst | 76 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/docs/nonconformant.rst b/docs/nonconformant.rst index 703eb98..92823ea 100644 --- a/docs/nonconformant.rst +++ b/docs/nonconformant.rst @@ -1,2 +1,78 @@ Non-Conformant Column Names --------------------------- + +By default, any operation that interfaces with column names will only accept names that conform to the SQLite names standard. This means that column names must begin with a letter, and can only contain letters, numbers, and underscores. If you try to access a column that does not conform to this standard, you will get an error. + +.. code-block:: python + + >>> from dbastable import SQLDatabase + >>> db = SQLDatabase() + >>> db.add_table('my_table') + >>> db.add_column('my_table', '1test') + ... + ValueError: Invalid column name: 1test. + >>> db.add_column('my_table', 'test column') + ... + ValueError: Invalid column name: test column. + >>> db.add_column('my_table', 'test!with!exclamation!points') + ... + ValueError: Invalid column name: test!with!exclamation!points. + +However, you can force the database to accept these non-conformant column names by setting the ``allow_b32_colnames`` parameter to ``True`` when you create the database. This will enable the non-conformant names to be encoded to `Base32 encoding `_, and decoded back to their original names when you access them. This name encoding will be done by `base64.b32encode` and `base64.b32decode` from the Python standard library automatically. + +When using this, the real column name will be ``__b32__``, already striped by ``===`` padding added by the encoder. This will make the string unreadeble by humans, but it will be decoded back to the original name when you access it. For example, if you have a column named ``1test``, it will be encoded to ``__b32__GF2GK43U``, and if you have a column named ``test column``, it will be encoded to ``__b32__ORSXG5BAMNXWY5LNNY``. So, if you want to access the database outside of Python, it is recomended to *do not use this feature*. Also, everytime you use this database, remember to set the ``allow_b32_colnames`` parameter to ``True`` to properly access the column names. + +If you are using `~dbastable.SQLDatabase`, the access of the columns will be fully transparent to the user and you will not need to worry about the encoding and decoding of the column names. + +.. code-block:: python + + >>> db = SQLDatabase(allow_b32_colnames=True) + >>> db.add_table('my_table') + >>> db.add_column('my_table', '1test', data=[1, 2, 3]) + >>> db.add_column('my_table', 'test column', data=[1, 2, 3]) + >>> db.add_column('my_table', 'test!with!exclamation!points', data=[1, 2, 3]) + >>> db.column_names('my_table') + ['1test', 'test column', 'test!with!exclamation!points'] + +If you want to access the original column names, you can use the ``do_not_decode`` parameter of `~dbastable.SQLDatabase.column_names` to get the encoded names. + +.. code-block:: python + + >>> db.column_names('my_table', do_not_decode=True) + ['__b32__GF2GK43U', + '__b32__ORSXG5BAMNXWY5LNNY', + '__b32__ORSXG5BBO5UXI2BBMV4GG3DBNVQXI2LPNYQXA33JNZ2HG'] + +If the column does not need to be encoded, it will not be encoded. + +.. code-block:: python + + >>> db.add_column('my_table', 'test') + >>> db.column_names('my_table') + ['1test', 'test column', 'test!with!exclamation!points', 'test'] + >>> db.column_names('my_table', do_not_decode=True) + ['__b32__GF2GK43U', + '__b32__ORSXG5BAMNXWY5LNNY', + '__b32__ORSXG5BBO5UXI2BBMV4GG3DBNVQXI2LPNYQXA33JNZ2HG', + 'test'] + +Any column access method will work *ONLY* with the original names, so you do not need to worry about the encoded names. + +.. code-block:: python + + >>> db['my_table']['1test'].values + [1, 2, 3] + >>> db.get_column('my_table', 'test column').values + [1, 2, 3] + >>> db.select('my_table', columns=['test!with!exclamation!points']) + [(1,), (2,), (3,)] + +Where statements also use original column names, and not the encoded ones. + +.. code-block:: python + + >>> from dbastable import Where + >>> db.select('my_table', where={'test!with!exclamation!points': 1}) + [(1, 1, 1)] + >>> db.select('my_table', where=Where('1test', '>', 1)) + [(2, 2, 2), (3, 3, 3)] From ba75cb07571a39e03cec38ae89b0c9c1d7e82240 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:07:02 -0300 Subject: [PATCH 18/32] general usage title --- docs/general_usage.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/general_usage.rst b/docs/general_usage.rst index 65c0843..7590469 100644 --- a/docs/general_usage.rst +++ b/docs/general_usage.rst @@ -1,2 +1,4 @@ General Usage Guide ------------------- + + From dcd9c18a51e24e993a1cbc4abb843e0767cdd27a Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:28:37 -0300 Subject: [PATCH 19/32] run pytest on docs --- .github/workflows/docs.yml | 24 ++++++++++++++++++++++++ docs/nonconformant.rst | 6 +++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 355ba29..c5cdfd3 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -40,3 +40,27 @@ jobs: run: | cd docs make html + + doctest: + name: Documentation Testing + runs-on: ubuntu-latest + if: "!(contains(github.event.head_commit.message, '[skip ci]') || contains(github.event.head_commit.message, '[ci skip]'))" + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: "3.11" + cache: pip + cache-dependency-path: '**/pyproject.toml' + - name: Install base dependencies + run: python -m pip install --upgrade --quiet pip setuptools + - name: Install pytest + run: python -m pip install --upgrade --quite pytest pytest-doctestplus + - name: Install package dependencies + run: pip install -U .[docs] .[test] + - name: Run pytest on docs + run: pytest --doctest-rst --doctest-plus docs diff --git a/docs/nonconformant.rst b/docs/nonconformant.rst index 92823ea..4fa2dec 100644 --- a/docs/nonconformant.rst +++ b/docs/nonconformant.rst @@ -8,13 +8,13 @@ By default, any operation that interfaces with column names will only accept nam >>> from dbastable import SQLDatabase >>> db = SQLDatabase() >>> db.add_table('my_table') - >>> db.add_column('my_table', '1test') + >>> db.add_column('my_table', '1test') # doctest: +SKIP ... ValueError: Invalid column name: 1test. - >>> db.add_column('my_table', 'test column') + >>> db.add_column('my_table', 'test column') # doctest: +SKIP ... ValueError: Invalid column name: test column. - >>> db.add_column('my_table', 'test!with!exclamation!points') + >>> db.add_column('my_table', 'test!with!exclamation!points') # doctest: +SKIP ... ValueError: Invalid column name: test!with!exclamation!points. From 0ebf4d0aacc9844d0d965d8edc28ebce2dc9bdf5 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:31:09 -0300 Subject: [PATCH 20/32] update docs action --- .github/workflows/docs.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c5cdfd3..bf9aaed 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -32,10 +32,9 @@ jobs: cache: pip cache-dependency-path: '**/pyproject.toml' - name: Install base dependencies - run: | - python -m pip install --upgrade --quiet pip setuptools + run: pip install -U -q pip setuptools - name: Install package dependencies - run: pip install -U .[docs] + run: pip install -U -q .[docs] - name: Build with Sphinx run: | cd docs @@ -57,9 +56,9 @@ jobs: cache: pip cache-dependency-path: '**/pyproject.toml' - name: Install base dependencies - run: python -m pip install --upgrade --quiet pip setuptools + run: pip install -U -q pip setuptools - name: Install pytest - run: python -m pip install --upgrade --quite pytest pytest-doctestplus + run: pip install -U -q pytest pytest-doctestplus - name: Install package dependencies run: pip install -U .[docs] .[test] - name: Run pytest on docs From a3e7bd38e6aab28853e5266071fb1fe758652cf7 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:43:31 -0300 Subject: [PATCH 21/32] ignore files that may be created during tests --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d39d16d..9e69ec2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ .vscode version.py +# files that may be created during tests +*.db # docs generated files docs/_build From 7824d8b9146a1687903be16548b8711fdb20fb6c Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:43:48 -0300 Subject: [PATCH 22/32] SQLTable: __setitem__ with add_column --- dbastable/_viewers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbastable/_viewers.py b/dbastable/_viewers.py index efb7d3a..a3814f7 100644 --- a/dbastable/_viewers.py +++ b/dbastable/_viewers.py @@ -245,7 +245,11 @@ def __setitem__(self, key, value): if isinstance(key, int): self.set_row(key, value) elif isinstance(key, str): - self.set_column(key, value) + try: + self.set_column(key, value) + except KeyError: + # if the column does not exist, add it + self.add_column(key, data=value) elif isinstance(key, tuple): if len(key) not in (1, 2): raise KeyError(f'{key}') From 30caa0c75405453eb277852a48e5aa283f265b26 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:48:16 -0300 Subject: [PATCH 23/32] fix nonconformant doctests --- .../tests/test_non_conformant_columns.py | 25 +++++++++++++++++++ docs/nonconformant.rst | 4 +-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/dbastable/tests/test_non_conformant_columns.py b/dbastable/tests/test_non_conformant_columns.py index 0dbd97d..a2efd54 100644 --- a/dbastable/tests/test_non_conformant_columns.py +++ b/dbastable/tests/test_non_conformant_columns.py @@ -144,3 +144,28 @@ def test_add_row_dict_add_missing_columns(self): db.add_rows('test', {'TEST!1 ': 5, 'TEST@ 2': 8}) self.assertEqual(db['test']['test!1 '].values, [4, 5]) self.assertEqual(db['test']['test@ 2'].values, [7, 8]) + + def test_add_columns_with_data(self): + from dbastable import SQLDatabase + db = SQLDatabase(allow_b32_colnames=True) + db.add_table('my_table') + db.add_column('my_table', '1test', data=[1, 2, 3]) + db.add_column('my_table', 'test column', data=[1, 2, 3]) + db.add_column('my_table', 'test!with!exclamation!points', data=[1, 2, 3]) + db.add_column('my_table', 'test@with@at@signs', data=[1, 2, 3]) + + self.assertEqual(db['my_table']['1test'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test column'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test!with!exclamation!points'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test@with@at@signs'].values, [1, 2, 3]) + + db.add_column('my_table', 'test') + + self.assertEqual(db['my_table']['1test'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test column'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test!with!exclamation!points'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test@with@at@signs'].values, [1, 2, 3]) + + sel = db.select('my_table', where=Where('1test', '>', 1)) + self.assertEqual(sel, [(2, 2, 2, 2, None), (3, 3, 3, 3, None)]) + diff --git a/docs/nonconformant.rst b/docs/nonconformant.rst index 4fa2dec..4c97587 100644 --- a/docs/nonconformant.rst +++ b/docs/nonconformant.rst @@ -73,6 +73,6 @@ Where statements also use original column names, and not the encoded ones. >>> from dbastable import Where >>> db.select('my_table', where={'test!with!exclamation!points': 1}) - [(1, 1, 1)] + [(1, 1, 1, None)] >>> db.select('my_table', where=Where('1test', '>', 1)) - [(2, 2, 2), (3, 3, 3)] + [(2, 2, 2, None), (3, 3, 3, None)] From 8420cbae4b047638deda8d01f776c6ba9302f49e Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:52:39 -0300 Subject: [PATCH 24/32] fix more tests --- dbastable/tests/test_viewers.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/dbastable/tests/test_viewers.py b/dbastable/tests/test_viewers.py index e9135bc..0606cf9 100644 --- a/dbastable/tests/test_viewers.py +++ b/dbastable/tests/test_viewers.py @@ -444,8 +444,9 @@ def test_table_setitem_str(self): self.assertEqual(table.column_names, ['a', 'b']) self.assertEqualArray(table.values, expect) - with self.assertRaises(KeyError): - table['c'] = np.arange(10, 20) + # if the column does not exists, it will be created + table['c'] = np.arange(10, 20) + self.assertEqualArray(table['c'].values, np.arange(10, 20)) def test_table_setitem_tuple(self): db = self.db @@ -462,8 +463,10 @@ def test_table_setitem_tuple(self): self.assertEqual(table.column_names, ['a', 'b']) self.assertEqualArray(table.values, expect) - with self.assertRaises(KeyError): - table[('c',)] = np.arange(10, 20) + # if the column does not exists, it will be created + table[('c',)] = np.arange(10, 20) + self.assertEqualArray(table['c'].values, np.arange(10, 20)) + with self.assertRaises(IndexError): table[(11,)] = np.arange(10, 20) @@ -487,8 +490,10 @@ def test_table_setitem_tuple_multiple(self): expect[[2, 7], 1] = -888 self.assertEqualArray(table.values, expect) - with self.assertRaises(KeyError): - table[('c',)] = np.arange(10, 20) + # if the column does not exists, it will be created + table[('c',)] = np.arange(10, 20) + self.assertEqualArray(table['c'].values, np.arange(10, 20)) + with self.assertRaises(IndexError): table[(11,)] = np.arange(10, 20) with self.assertRaises(KeyError): From 102d0ad0184f249e16a62305205334fc3d79b8ab Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:53:49 -0300 Subject: [PATCH 25/32] complete skip publish if not at main --- .github/workflows/python-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 42bd17b..44cdcde 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -19,6 +19,7 @@ jobs: publish-pypi: name: Build and publish to PyPi runs-on: ubuntu-latest + if: github.ref == 'refs/heads/main' steps: - uses: actions/checkout@master with: @@ -41,7 +42,6 @@ jobs: --wheel --outdir dist/ - name: Publish package to PyPi - if: github.ref == 'refs/heads/main' uses: pypa/gh-action-pypi-publish@release/v1 with: password: ${{ secrets.PYPI_API_TOKEN }} From 86d9e559f47aa2636b01632fdac3cb74339e2662 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 13:55:09 -0300 Subject: [PATCH 26/32] fix pep8 in tests --- dbastable/tests/test_non_conformant_columns.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/dbastable/tests/test_non_conformant_columns.py b/dbastable/tests/test_non_conformant_columns.py index a2efd54..94129c2 100644 --- a/dbastable/tests/test_non_conformant_columns.py +++ b/dbastable/tests/test_non_conformant_columns.py @@ -151,20 +151,25 @@ def test_add_columns_with_data(self): db.add_table('my_table') db.add_column('my_table', '1test', data=[1, 2, 3]) db.add_column('my_table', 'test column', data=[1, 2, 3]) - db.add_column('my_table', 'test!with!exclamation!points', data=[1, 2, 3]) + db.add_column('my_table', 'test!with!exclamation!points', + data=[1, 2, 3]) db.add_column('my_table', 'test@with@at@signs', data=[1, 2, 3]) self.assertEqual(db['my_table']['1test'].values, [1, 2, 3]) self.assertEqual(db['my_table']['test column'].values, [1, 2, 3]) - self.assertEqual(db['my_table']['test!with!exclamation!points'].values, [1, 2, 3]) - self.assertEqual(db['my_table']['test@with@at@signs'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test!with!exclamation!points'].values, + [1, 2, 3]) + self.assertEqual(db['my_table']['test@with@at@signs'].values, + [1, 2, 3]) db.add_column('my_table', 'test') self.assertEqual(db['my_table']['1test'].values, [1, 2, 3]) self.assertEqual(db['my_table']['test column'].values, [1, 2, 3]) - self.assertEqual(db['my_table']['test!with!exclamation!points'].values, [1, 2, 3]) - self.assertEqual(db['my_table']['test@with@at@signs'].values, [1, 2, 3]) + self.assertEqual(db['my_table']['test!with!exclamation!points'].values, + [1, 2, 3]) + self.assertEqual(db['my_table']['test@with@at@signs'].values, + [1, 2, 3]) sel = db.select('my_table', where=Where('1test', '>', 1)) self.assertEqual(sel, [(2, 2, 2, 2, None), (3, 3, 3, 3, None)]) From 717293a0ff86f04eb829a0af0412f8551ed474cc Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Sun, 15 Oct 2023 14:14:13 -0300 Subject: [PATCH 27/32] raise tests --- dbastable/_sanitizer.py | 2 +- docs/conf.py | 2 +- docs/nonconformant.rst | 15 +++++++++------ 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/dbastable/_sanitizer.py b/dbastable/_sanitizer.py index 5e25e4e..37e642f 100644 --- a/dbastable/_sanitizer.py +++ b/dbastable/_sanitizer.py @@ -43,7 +43,7 @@ def _sanitize_key(self, key): # if it is allowed if self._allow_b32_colnames: return self._encode_b32(key) - raise ValueError(f'Invalid column name: {key}.') + raise ValueError(f'Invalid column name: {key}') return key.lower() def _sanitize_colnames(self, data): diff --git a/docs/conf.py b/docs/conf.py index 7a319a9..3db9213 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,7 +47,7 @@ htmlhelp_basename = 'dbastable' html_theme_options = { "show_prev_next": False, - "footer_items": ["copyright", "sphinx-version", "theme-version"] + "footer_items": ["copyright", "sphinx-version"] } autosummary_generate = True diff --git a/docs/nonconformant.rst b/docs/nonconformant.rst index 4c97587..7823755 100644 --- a/docs/nonconformant.rst +++ b/docs/nonconformant.rst @@ -8,15 +8,18 @@ By default, any operation that interfaces with column names will only accept nam >>> from dbastable import SQLDatabase >>> db = SQLDatabase() >>> db.add_table('my_table') - >>> db.add_column('my_table', '1test') # doctest: +SKIP + >>> db.add_column('my_table', '1test') # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): ... - ValueError: Invalid column name: 1test. - >>> db.add_column('my_table', 'test column') # doctest: +SKIP + ValueError: Invalid column name: 1test + >>> db.add_column('my_table', 'test column') # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): ... - ValueError: Invalid column name: test column. - >>> db.add_column('my_table', 'test!with!exclamation!points') # doctest: +SKIP + ValueError: Invalid column name: test column + >>> db.add_column('my_table', 'test!with!exclamation!points') # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): ... - ValueError: Invalid column name: test!with!exclamation!points. + ValueError: Invalid column name: test!with!exclamation!points However, you can force the database to accept these non-conformant column names by setting the ``allow_b32_colnames`` parameter to ``True`` when you create the database. This will enable the non-conformant names to be encoded to `Base32 encoding `_, and decoded back to their original names when you access them. This name encoding will be done by `base64.b32encode` and `base64.b32decode` from the Python standard library automatically. From 8b7d63f75fc5cbbdb91acd42c054f0e2eda8cd28 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Wed, 18 Oct 2023 18:22:44 -0300 Subject: [PATCH 28/32] changed some reprs and buggy behavior --- dbastable/_sqldb.py | 2 -- dbastable/_viewers.py | 5 +++-- dbastable/tests/test_viewers.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/dbastable/_sqldb.py b/dbastable/_sqldb.py index 0e5ede2..87d3a05 100644 --- a/dbastable/_sqldb.py +++ b/dbastable/_sqldb.py @@ -398,8 +398,6 @@ def add_table(self, table, columns=None, data=None): comm = f"CREATE TABLE '{table}'" comm += f" (\n{_ID_KEY} INTEGER PRIMARY KEY AUTOINCREMENT" - if columns is not None and data is not None: - raise ValueError('cannot specify both columns and data.') if columns is not None: comm += ",\n" for i, name in enumerate(columns): diff --git a/dbastable/_viewers.py b/dbastable/_viewers.py index a3814f7..46ff8f7 100644 --- a/dbastable/_viewers.py +++ b/dbastable/_viewers.py @@ -279,7 +279,7 @@ def __repr__(self): s = f"{self.__class__.__name__} '{self.name}'" s += f" in database '{self.db}':" s += f"({len(self.column_names)} columns x {len(self)} rows)\n" - s += '\n'.join(self.as_table().__repr__().split('\n')[1:]) + s += str(self.as_table()) return s @@ -351,7 +351,8 @@ def __contains__(self, item): def __repr__(self): """Get a string representation of the column.""" - s = f"{self.__class__.__name__} {self._name} in table '{self._table}'" + s = f"{self.__class__.__name__} '{self._name}'" + s += f" in table '{self._table}'" s += f" ({len(self)} rows)" return s diff --git a/dbastable/tests/test_viewers.py b/dbastable/tests/test_viewers.py index 0606cf9..762bde5 100644 --- a/dbastable/tests/test_viewers.py +++ b/dbastable/tests/test_viewers.py @@ -197,7 +197,7 @@ def test_table_repr(self): expect = "SQLTable 'test' in database ':memory:':" expect += "(2 columns x 10 rows)\n" - expect += '\n'.join(table.as_table().__repr__().split('\n')[1:]) + expect += str(table.as_table()) self.assertIsInstance(table, SQLTable) self.assertEqual(repr(table), expect) @@ -574,7 +574,7 @@ def test_column_repr(self): db = self.db table = db['test'] column = table['a'] - self.assertEqual(repr(column), "SQLColumn a in table 'test' (10 rows)") + self.assertEqual(repr(column), "SQLColumn 'a' in table 'test' (10 rows)") def test_column_contains(self): db = self.db From 3b48c8f288b01d4600caf95fd5deed642d18863a Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Wed, 18 Oct 2023 18:22:57 -0300 Subject: [PATCH 29/32] general usage documentation --- docs/general_usage.rst | 361 +++++++++++++++++++++++++++++++++++++++- docs/viewer_classes.rst | 4 +- 2 files changed, 363 insertions(+), 2 deletions(-) diff --git a/docs/general_usage.rst b/docs/general_usage.rst index 7590469..99c94aa 100644 --- a/docs/general_usage.rst +++ b/docs/general_usage.rst @@ -1,4 +1,363 @@ General Usage Guide -------------------- +=================== +`dbastable` intends to create and manipulate tables in SQLite databases in the same way you interact with `~astropy.table.Table` from `~astropy` package. +But, as an SQLite database can contain numerous tables, you need first to create an `~dbastable.SQLDatabase` object, which will be the entry point to the database. This database can be created in memory or on disk. If you pass a filename to the `~dbastable.SQLDatabase` constructor, the database will be created on disk. If you pass ``':memory:'`` as filename, the database will be created in memory. + +.. code-block:: python + + >>> from dbastable import SQLDatabase + >>> # this will create a database on disk + >>> db = SQLDatabase('mydatabase.db') + >>> # this will create a database in memory + >>> db = SQLDatabase(':memory:') + +Another useful argument is the ``autocommit``, which enable `~sqlite3` to commit changes automatically after each command. If you set it to ``False``, you will need to call ``commit()`` method to commit changes to the database. While ``autocommit`` reduces the need of manual method calls, it can reduce the performance of the database if a lot of small operations are done. + +Data Creation, Manipulation and Deletion +---------------------------------------- + +Creating Tables +^^^^^^^^^^^^^^^ + +Once you have a database, you can create a table in it. The creation is performed by the `SQLDatabase.add_table` method. This method takes a table name as the main argument. + +.. code-block:: python + + >>> # this creates a table called 'table_1' in the database + >>> db.add_table('table_1') + >>> # to access the created table, you can use the '[]' operator + >>> db['table_1'] + SQLTable 'table_1' in database ':memory:':(0 columns x 0 rows) + + +You can also create a table with a list of columns and/or data. The ``columns`` can be created as a list of strings, which are the column names. The ``data`` is a list of tuples, where each tuple represents a row of data. The rows must be ordered in the same way as the columns. + +.. code-block:: python + + >>> # this creates a table called 'table_2' with 2 columns and 2 rows + >>> db.add_table('table_with_data', columns=['name', 'birth'], + ... data=[('Someone', 2001), ('No one', 2002)]) + >>> db['table_with_data'] + SQLTable 'table_with_data' in database ':memory:':(2 columns x 2 rows) + name birth + ------- ----- + Someone 2001 + No one 2002 + +.. code-block:: python + + >>> # this creates a table called 'table_with_columns' with 2 columns but not data + >>> db.add_table('table_with_columns', columns=['name', 'birth']) + >>> db['table_with_columns'] + SQLTable 'table_with_columns' in database ':memory:':(2 columns x 0 rows) + name birth + ---- ----- + +Adding Columns and Data +^^^^^^^^^^^^^^^^^^^^^^^ + +To add columns to your table, there is a method called `SQLDatabase.add_column`. The main arguments are the table name and the column name. You can also pass a ``data`` argument, which is a list of data to be added to the column. The data must be ordered in the same way as the rows of the already existing table. If no data is passed, the column will be created with ``None`` values. + +.. code-block:: python + + >>> # column 'email' will be filled with None + >>> db.add_column('table_with_data', 'email') + >>> db.add_column('table_with_data', 'age', data=[22, 21]) + >>> db['table_with_data'] + SQLTable 'table_with_data' in database ':memory:':(4 columns x 2 rows) + name birth email age + ------- ----- ----- --- + Someone 2001 None 22 + No one 2002 None 21 + +You can also create new columns using ``__setitem__`` features. + +.. code-block:: python + + >>> # create a new column 'email' with some data + >>> db['table_with_columns']['email'] = ['test@test.org'] + >>> db['table_with_columns'] + SQLTable 'table_with_columns' in database ':memory:':(3 columns x 1 rows) + name birth email + ---- ----- ------------- + None None test@test.org + +Adding Rows to the Table +^^^^^^^^^^^^^^^^^^^^^^^^ + +To add rows to the table, you can use the `SQLDatabase.add_rows` method. A ``table`` name must be passed as the first argument. The ``data`` fo the rows can be one of two types: + +* A `list` of `tuple`, where each tuple represents a row of data. The rows must be ordered in the same way as the columns. + + .. code-block:: python + + >>> # this will add 2 rows to the table using a list of tuples + >>> db.add_rows('table_with_data', [('Someone else', 2003, 'test@yes.no', 20), + ... ('Another one', 2004, 'test@no.yes', 19)]) + >>> db['table_with_data'] + SQLTable 'table_with_data' in database ':memory:':(4 columns x 4 rows) + name birth email age + ------------ ----- ----------- --- + Someone 2001 None 22 + No one 2002 None 21 + Someone else 2003 test@yes.no 20 + Another one 2004 test@no.yes 19 + +* A `dict`, where each key represents the data of a columns. All the elements must be scalars or 1d-lists with the same length. + + .. code-block:: python + + >>> # this will add 2 rows to the table using a list of tuples + >>> db.add_rows('table_with_data', {'name': ['Mr. Mr', 'Mrs. Mrs'], + ... 'birth': [1990, 1991], + ... 'email': [None, 'test@test.test'], + ... 'age': [30, 29]}) + >>> db['table_with_data'] + SQLTable 'table_with_data' in database ':memory:':(4 columns x 6 rows) + name birth email age + ------------ ----- -------------- --- + Someone 2001 None 22 + No one 2002 None 21 + Someone else 2003 test@yes.no 20 + Another one 2004 test@no.yes 19 + Mr. Mr 1990 None 30 + Mrs. Mrs 1991 test@test.test 29 + +When adding a row with a dict, you can optionally choose to add the missing columns to the table. This is done by passing ``add_columns=True`` to the `SQLDatabase.add_rows` method. The missing columns will be created with ``None`` values. + +.. code-block:: python + + >>> db.add_rows('table_with_data', {'name': 'From Future', 'birth': 3000, + ... 'flying_cars': 2}, add_columns=True) + >>> db['table_with_data'] + SQLTable 'table_with_data' in database ':memory:':(5 columns x 7 rows) + name birth email age flying_cars + ------------ ----- -------------- ---- ----------- + Someone 2001 None 22 None + No one 2002 None 21 None + Someone else 2003 test@yes.no 20 None + Another one 2004 test@no.yes 19 None + Mr. Mr 1990 None 30 None + Mrs. Mrs 1991 test@test.test 29 None + From Future 3000 None None 2 + +Acessing the Data +^^^^^^^^^^^^^^^^^ + +The access of data wase designed to use as mutch as possible the ``[]`` (`__getitem__`) operator. But, as SQLite interacts with a very different way with the data, we developed a series of :doc:`viewer class ` that will interface the queries in the database. These classes are `~dbastable.SQLTable`, `~dbastable.SQLColumn` and `~dbastable.SQLRow`. These three viewer classes are also returned by the `SQLDatabase.get_table`, `SQLDatabase.get_column` and `SQLDatabase.get_row` methods. + +When working with the ``[]`` operator, it's mandatory that the first element of the item to get is a table name, just like we did above to show the results. But you can access a column, a row or a cell with the same way. + +To get a table: + +.. code-block:: python + + >>> # get the table + >>> table = db['table_with_data'] + >>> print(table) # doctest: +ELLIPSIS + SQLTable 'table_with_data' in database ':memory:':(5 columns x 7 rows) + ... + +To work with a column of a table: + +.. code-block:: python + + >>> col = db['table_with_data', 'name'] + >>> print(col) + SQLColumn 'name' in table 'table_with_data' (7 rows) + >>> print(col.values) + ['Someone', 'No one', 'Someone else', 'Another one', 'Mr. Mr', 'Mrs. Mrs', 'From Future'] + +.. code-block:: python + + >>> # or, you can get it from the SQLTable directly + >>> col = table['name'] + >>> print(col) + SQLColumn 'name' in table 'table_with_data' (7 rows) + >>> print(col.values) + ['Someone', 'No one', 'Someone else', 'Another one', 'Mr. Mr', 'Mrs. Mrs', 'From Future'] + +To work with a row of a table: + +.. code-block:: python + + >>> row = db['table_with_data', 0] + >>> print(row) + SQLRow 0 in table 'table_with_data' {'name': 'Someone', 'birth': 2001, 'email': None, 'age': 22, 'flying_cars': None} + >>> print(row.values) + ('Someone', 2001, None, 22, None) + +.. code-block:: python + + >>> # or, you can get it from the SQLTable directly + >>> row = table[3] + >>> print(row) + SQLRow 3 in table 'table_with_data' {'name': 'Another one', 'birth': 2004, 'email': 'test@no.yes', 'age': 19, 'flying_cars': None} + +When the depth level of the `_getitem__` reaches a cell, only the value is returned and not a full new viewer object. So, when you call for a row of a column or a column of a row, you will get the value of the cell. + +.. code-block:: python + + >>> print(db['table_with_data', 0, 'name']) + Someone + >>> print(db['table_with_data', 'birth', 2]) + 2003 + >>> # Also, you can operate rows as slices! + >>> print(db['table_with_data', 'name', 1:3]) + ['No one', 'Someone else'] + +Changing the Data +^^^^^^^^^^^^^^^^^ + +In the same way, you can use the ``[]`` operator (`__setitem___`) to set data for cells, entire columns or entire rows. This operator, as mentioned earlier, can also be used to create new columns. It will perform the data setting by calling `SQLDatabase.set_item`, `SQLDatabase.set_column` or `SQLDatabase.set_row` methods. + +.. code-block:: python + + >>> # everyone has a flying car now! + >>> db['table_with_data', 'flying_cars'] = [2, 1, 3, 4, 1, 2, 6] + >>> # Someone else was born in 1995 + >>> db['table_with_data', 2] = ('Someone else', 1995, 'someone@noplace.org', 28, 1) + >>> # Mr. Mr now has an email + >>> db['table_with_data', 4, 'email'] = 'mistermister@nomail.com' + >>> print(db['table_with_data']) + SQLTable 'table_with_data' in database ':memory:':(5 columns x 7 rows) + name birth email age flying_cars + ------------ ----- ----------------------- ---- ----------- + Someone 2001 None 22 2 + No one 2002 None 21 1 + Someone else 1995 someone@noplace.org 28 1 + Another one 2004 test@no.yes 19 4 + Mr. Mr 1990 mistermister@nomail.com 30 1 + Mrs. Mrs 1991 test@test.test 29 2 + From Future 3000 None None 6 + +It's not possible to change the name of a column or a table. If you need to do that, you will need to create a new column or table with the new name and copy the data from the old one. Also, it's not possible to change all the data of a table at once. If you need to do that, you will need to drop the table and create a new one with the same name. + +Deleting The Data +^^^^^^^^^^^^^^^^^ + +You can delete (drop) a table, delete columns or rows. This is done by calling the `SQLDatabase.drop_table`, `SQLDatabase.delete_column` or `SQLDatabase.delete_row` methods. The `del` methods where not implemented due to data integrity concerns. So, by calling these methods directly you are more aware of what you are doing. + +.. code-block:: python + + >>> # we don't need the table_with_columns anymore + >>> db.drop_table('table_with_columns') + >>> # I don't care if they have a flying car or not + >>> db.delete_column('table_with_data', 'flying_cars') + >>> # From Future returned to future + >>> db.delete_row('table_with_data', 6) + >>> print(db['table_with_data']) + SQLTable 'table_with_data' in database ':memory:':(4 columns x 6 rows) + name birth email age + ------------ ----- ----------------------- --- + Someone 2001 None 22 + No one 2002 None 21 + Someone else 1995 someone@noplace.org 28 + Another one 2004 test@no.yes 19 + Mr. Mr 1990 mistermister@nomail.com 30 + Mrs. Mrs 1991 test@test.test 29 + +Additional Features and Properties +---------------------------------- + +The `~dbastable.SQLDatabase` class has some additional features and properties that can be useful. You can interact with the table in a more SQL-like way, but without the need to write SQL commands. You can also get the list of tables in the database and the list of columns in a table, or the lenght of a table. + +Properties +^^^^^^^^^^ + +The two main properties that you may use to interact with the database are `SQLDatabase.table_names` and `SQLDatabase.column_names`. + +To get the list of tables in the database, you can use the `SQLDatabase.table_names` property. This property returns a list of strings, where each string is a table name. I the first time you call it, it will query the database to get the list of tables, and them, build a internal cache with the names. The next time you call it, it will use the cache instead of querying the database again. Also, modifying the table also leads to changes in the cache. *Because of this, do not change the database externally while the db is opened here!* This reduces the need of querying the database every time you need the list of tables, speeding up a lot the process. + +.. code-block:: python + + >>> # let's add some more tables to get a list of tables + >>> db.add_table('table_2') + >>> db.add_table('table_3') + >>> # get the list of tables in the database + >>> db.table_names + ['table_1', 'table_with_data', 'table_2', 'table_3'] + +A similar method is used to get the list of columns in a table. The `SQLDatabase.column_names` method receives a table name and return the list of the columns in that table. It also caches the names to speed up the process, so *the same warning mentioned earlier is also valid here*. + +.. code-block:: python + + >>> # get the list of columns in a table + >>> db.column_names('table_with_data') + ['name', 'birth', 'email', 'age'] + +SQL Methods +^^^^^^^^^^^ + +There are two high level methods that you may want to use: `SQLDatabase.select` and `SQLDatabase.count`. + +select +"""""" + +`SQLDatabase.select` perform the ``SELECT`` operation inside the database. However, do not need to write the command by yourself. You pass a bunch of pre-defined arguments to the method and it will build the command for you. The main arguments are: + +* ``table``: The name of the table to be selected. +* ``columns``: A list of columns to be selected. If not passed, all the columns will be selected. +* ``where``: Conditions to select a row or not. +* ``order``: Columns to be used to sort the rows. +* ``limit`` and ``offset``: Limits the number of rows to be selected. + +Only values will be returned, so do not expect a full table. The result will be a list of tuple, with each tuple being a row of data. + +Look that not all features of SQL ``SELECT`` can be used. This is intentional to keep the method simple. If you need more complex queries, you need to write the commands by yourself and use the proper functions described below. + +.. code-block:: python + + >>> # select all the columns from the table + >>> db.select('table_with_data', columns=['name', 'birth', 'age'], + ... order=['birth'], limit=4, offset=1) + [('Mrs. Mrs', 1991, 29), ('Someone else', 1995, 28), + ('Someone', 2001, 22), ('No one', 2002, 21)] + +:doc:`Where Statemens ` can be used. A better destription is found in the link, but here is a quick example: + +.. code-block:: python + + >>> from dbastable import Where + >>> # select all the columns from the table where age is greater than 25 + >>> db.select('table_with_data', columns=['name', 'birth', 'age'], + ... where=Where('age', '>', 25), order=['age']) + [('Someone else', 1995, 28), ('Mrs. Mrs', 1991, 29), ('Mr. Mr', 1990, 30)] + +count +""""" + +`SQLDatabase.count` perform the ``COUNT`` operation inside the database. In addition to the table name, it just receives a ``where`` argument, which is a :doc:`Where Statement `. + +.. code-block:: python + + >>> # count the number of rows in the table + >>> db.count('table_with_data') + 6 + >>> # count the number of rows where age is greater than 25 + >>> db.count('table_with_data', where=Where('age', '>', 25)) + 3 + +execute and executemany +""""""""""""""""""""""" + +It you have an SQL command that you manually wrote and want to execute, ou can use the `SQLDatabase.execute` and `SQLDatabase.executemany` method. They are the lower level methods called by all other methods and wraps `~sqlite3.Cursor.execute` and `~sqlite3.Cursor.executemany` methods. The first one is used to execute a single command, while the second one is used to execute a command multiple times, with different parameters. Both accept the `arguments` parameter if you want to use the ``?`` `placeholder `_ in the command and avoid `SQL injection hacks `_. `Learn more with Little Bob `_. + +The direct use of these methods may not be needed, since we already handle almost all operations you may need. But, if you need a very specific case, like `SUM` columns, you can use them. + +.. code-block:: python + + >>> # sum the age of all people in the table + >>> db.execute('SELECT SUM(age) FROM table_with_data') + [(149,)] + >>> # sum the age of all people in the table where age is greater than 25 + >>> db.execute('SELECT SUM(age) FROM table_with_data WHERE age > ?', + ... arguments=[25]) + [(87,)] + +commit +"""""" + +The `SQLDatabase.commit` method is used to apply changes to the database. When a database is changed, `~sqlite3` do not change the original database as default. It save the changes to a separated diff file and only merge it when the ``commit`` command is called. It's called automatically if ``autocommit`` is set to ``True``. If not, you will need to call it manually. diff --git a/docs/viewer_classes.rst b/docs/viewer_classes.rst index 3946930..2014c57 100644 --- a/docs/viewer_classes.rst +++ b/docs/viewer_classes.rst @@ -1,2 +1,4 @@ +.. _viewer: + Viewer Classes --------------- +============== From 69a54b4e09b7fa8e37c26e5448214ae460d55702 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Wed, 18 Oct 2023 19:18:04 -0300 Subject: [PATCH 30/32] SQLRow: mimic dict keys and items --- dbastable/_viewers.py | 4 +--- dbastable/tests/test_viewers.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/dbastable/_viewers.py b/dbastable/_viewers.py index 46ff8f7..067c56d 100644 --- a/dbastable/_viewers.py +++ b/dbastable/_viewers.py @@ -395,19 +395,17 @@ def index(self): """Get the index of the current row.""" return self._row - @property def keys(self): """Get the keys of the current row.""" return self.column_names - @property def items(self): """Get the items of the current row.""" return zip(self.column_names, self.values) def as_dict(self): """Get the row as a dict.""" - return dict(self.items) + return dict(self.items()) def __getitem__(self, key): """Get a column from the row.""" diff --git a/dbastable/tests/test_viewers.py b/dbastable/tests/test_viewers.py index 762bde5..0c145ea 100644 --- a/dbastable/tests/test_viewers.py +++ b/dbastable/tests/test_viewers.py @@ -24,7 +24,7 @@ def test_row_basic_properties(self): self.assertEqual(row.table, 'test') self.assertEqual(row.index, 0) self.assertEqual(row.column_names, ['a', 'b']) - self.assertEqual(row.keys, ['a', 'b']) + self.assertEqual(row.keys(), ['a', 'b']) self.assertEqual(row.values, (10, 20)) self.assertIsInstance(row.values, tuple) self.assertEqual(row.as_dict(), {'a': 10, 'b': 20}) From 514e58c68d83235550912932f1b33c5dd03d9648 Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Wed, 18 Oct 2023 19:25:49 -0300 Subject: [PATCH 31/32] viewer classes docs --- docs/viewer_classes.rst | 134 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 132 insertions(+), 2 deletions(-) diff --git a/docs/viewer_classes.rst b/docs/viewer_classes.rst index 2014c57..527810f 100644 --- a/docs/viewer_classes.rst +++ b/docs/viewer_classes.rst @@ -1,4 +1,134 @@ -.. _viewer: - Viewer Classes ============== + +The planned interaction with the database was only possible by the creation of 3 viewer classes. This classes act like a `view of Numpy arrays `_, but interfacing with all the SQL commands `~sqlite3` needs. So, they make possible the use of all `__getitem__` and `__setitem__` methods (``[]`` operator) inside a database. +` +These classes do not store any data itself. Instead, they just store references of rows, columns or tables to query and access the database. So, any changes in the data inside the database also affects the data returned by the viewer classes. + +Also, it is not recommended to create them manually. Let the `~dbastable.SQLDatabase` create them for you. + +`~dbastable.SQLTable` +--------------------- + +`~dbastable.SQLTable` is a class that can be used to view and manage a single table inside the database. It is return with ``[table]`` operator of `~dbastable.SQLDatabase` or with `SQLDatabase.get_table` method. + +.. code-block:: python + + >>> from dbastable import SQLDatabase + >>> db = SQLDatabase() + >>> db.add_table('table1', columns=['name', 'age'], + ... data=[['John', 20], ['Mary', 30]]) + >>> table = db['table1'] + >>> print(table) + SQLTable 'table1' in database 'None':(2 columns x 2 rows) + name age + ---- --- + John 20 + Mary 30 + +It perform almost any important operation of `~dbastable.SQLDatabase`, but dismissing the ``table`` argument in all methods that methods. For example, ``add_rows``, ``delete_row``, ``add_column``, ``delete_column``, ``select``, ``index_of`` and more. More detais about these functions in the `~dbastable.SQLTable` API documentation. + +One important thing to mention is how ``[]`` operator work in this class. It can be used to access a single row, column or cell inside the table. If a `int` number is used in the operator, it will return a `~dbastable.SQLRow` object. If a `str` is used, it will return a `~dbastable.SQLColumn` object. If a tuple of `int` and `str` is used, it will return a single cell value. No other combination is supported, so you cannot access multiple rows or columns at the same time. + +.. code-block:: python + + >>> # int returns a row + >>> print(table[0]) + SQLRow 0 in table 'table1' {'name': 'John', 'age': 20} + >>> # str returns a column + >>> print(table['name']) + SQLColumn 'name' in table 'table1' (2 rows) + >>> # tuple of int and str returns a cell + >>> print(table[0, 'name']) + John + >>> print(table['name', 1]) + Mary + + +`~dbastable.SQLRow` and `~dbastable.SQLColumn` +---------------------------------------------- + +`~dbastable.SQLRow` and `~dbastable.SQLColumn` are classes that can be used to access and change data of a single row or column inside the database. They are able to query data from the database, change data of a single cell, but they don't inherit methods like ``add_rows`` or ``delete_row``. So you can't change the structure of the table using these classes. For this, you need to use `~dbastable.SQLTable`. + +They are returned with ``[row]`` and ``[column]`` operators of `~dbastable.SQLTable` or with `SQLTable.get_row` and `SQLTable.get_column` methods. + +.. code-block:: python + + >>> row = table[0] + >>> print(row) + SQLRow 0 in table 'table1' {'name': 'John', 'age': 20} + >>> column = table['name'] + >>> print(column) + SQLColumn 'name' in table 'table1' (2 rows) + +.. code-block:: python + + >>> print(table.get_row(0)) + SQLRow 0 in table 'table1' {'name': 'John', 'age': 20} + >>> print(table.get_column('name')) + SQLColumn 'name' in table 'table1' (2 rows) + +Also, they can be directly created with `~dbastable.SQLDatabase.get_row` and `~dbastable.SQLDatabase.get_column` methods. + +.. code-block:: python + + >>> row = db.get_row('table1', 0) + >>> print(row) + SQLRow 0 in table 'table1' {'name': 'John', 'age': 20} + >>> column = db.get_column('table1', 'name') + >>> print(column) + SQLColumn 'name' in table 'table1' (2 rows) + +Both classes have a ``values`` property, which returns a list containing copies of the data from the given row or column. It is important to remember that the values returned are not linked anymore with the original database and any change in this list will not imply in changes in database. + +.. code-block:: python + + >>> print(row.values) + ('John', 20) + >>> print(column.values) + ['John', 'Mary'] + +Also, both methods are iterable using `for` loops. The iteration is always performed in the order of the table. + +.. code-block:: python + + >>> for value in row: + ... print(value) + John + 20 + >>> for value in column: + ... print(value) + John + Mary + +The usage of `in` comparison statment is also possible. It will return ``True`` if the given value is in the row or column values. + +.. code-block:: python + + >>> print('Mary' in row) + False + >>> print('Mary' in column) + True + +`~dbastable.SQLRow` Specifics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +`~dbastable.SQLRow` have another special features that make it act like a `dict`. First of all, you can create a dictionary from it using ``as_dict`` method. + +.. code-block:: python + + >>> print(row.as_dict()) + {'name': 'John', 'age': 20} + +Also, it is iterable just like dicts using ``keys`` and ``items`` properties. + +.. code-block:: python + + >>> for key in row.keys(): + ... print(key) + name + age + >>> for key, value in row.items(): + ... print(key, value) + name John + age 20 From ea0e5fac65bd557005df60e18aca62f5d3e150ba Mon Sep 17 00:00:00 2001 From: Julio Campagnolo Date: Wed, 18 Oct 2023 20:30:10 -0300 Subject: [PATCH 32/32] where statements doc --- docs/nonconformant.rst | 2 +- docs/where_statements.rst | 58 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/docs/nonconformant.rst b/docs/nonconformant.rst index 7823755..32b45f1 100644 --- a/docs/nonconformant.rst +++ b/docs/nonconformant.rst @@ -1,5 +1,5 @@ Non-Conformant Column Names ---------------------------- +=========================== By default, any operation that interfaces with column names will only accept names that conform to the SQLite names standard. This means that column names must begin with a letter, and can only contain letters, numbers, and underscores. If you try to access a column that does not conform to this standard, you will get an error. diff --git a/docs/where_statements.rst b/docs/where_statements.rst index f87490e..a35b0e6 100644 --- a/docs/where_statements.rst +++ b/docs/where_statements.rst @@ -1,2 +1,58 @@ Where Statements and Queries ----------------------------- +============================ + +Any method that receives a ``where`` argument, like `SQLDatabase.select` or `SQLDatabase.count` will work in the same way. To support a wide range of queries, the ``where`` argument can be a dictionary or a `~dbastable.Where` object. + +The dictionary method is the simplest, and is the one used in the examples above. The dictionary keys are the column names, and the values are the values to match. Using dictionary the equality is always assumed. + +.. code-block:: python + + >>> # let's create a db to play with + >>> from dbastable import SQLDatabase + >>> db = SQLDatabase() + >>> db.add_table('table1', columns=['id', 'name', 'value']) + >>> db.add_rows('table1', [[1, 'foo', 10], + ... [2, 'bar', 20], + ... [3, 'baz', 15], + ... [4, 'qux', 20], + ... [5, 'tux', 10]]) + >>> db.select('table1', columns='name', where={'value': 20}) + [('bar',), ('qux',)] + +Multiple statements are supported. They will be combined using the ``AND`` operator. ``OR`` is not supported. + +.. code-block:: python + + >>> db.select('table1', columns='name', where={'value': 20, 'id': 4}) + [('qux',)] + +The `~dbastable.Where` object allows for more complex queries. It supports the following operators: + +* ``=``: equality +* ``!=``: inequality +* ``<``: less than +* ``<=``: less than or equal to +* ``>``: greater than +* ``>=``: greater than or equal to +* ``LIKE``: SQL ``LIKE`` operator +* ``IN``: SQL ``IN`` operator +* ``NOT IN``: SQL ``NOT IN`` operator +* ``IS``: SQL ``IS`` operator +* ``IS NOT``: SQL ``IS NOT`` operator +* ``BETWEEN``: SQL ``BETWEEN`` operator +* ``NOT BETWEEN``: SQL ``NOT BETWEEN`` operator + +This `~dbastable.Where` object can be used directly as the ``where`` argument, as value in `dict`. Also, if a list is passed to the ``where`` argument, it must be a list of `~dbastable.Where` object that will be combined with ``AND``. + +.. code-block:: python + + >>> from dbastable import Where + >>> db.select('table1', columns='name', + ... where={'value': Where('value', '>', 15)}) + [('bar',), ('qux',)] + >>> db.select('table1', columns='name', + ... where=[Where('value', 'IN', [10, 15]), + ... Where('id', '>', 3)]) + [('tux',)] + >>> db.select('table1', where=Where('value', 'BETWEEN', [15, 25])) + [(2, 'bar', 20), (3, 'baz', 15), (4, 'qux', 20)]