Merge pull request #2723 from centerofci/0.1.1

Release 0.1.1
mathesar-foundation · Mar 23, 2023 · a813767 · a813767
2 parents 06abb3c + ef06744
commit a813767
Show file tree

Hide file tree

Showing 112 changed files with 1,490 additions and 457 deletions.
diff --git a/.github/workflows/handle-required-checks.yml b/.github/workflows/handle-required-checks.yml
@@ -5,11 +5,11 @@ name: handle-required-checks
 on:
   push:
     paths-ignore:
-      - mathesar_ui/*
+      - 'mathesar_ui/**'
       - '**.py'
   pull_request:
     paths-ignore:
-      - mathesar_ui/*
+      - 'mathesar_ui/**'
       - '**.py'
 jobs:
   lint:

diff --git a/.github/workflows/run-lint-audit-tests-ui.yml b/.github/workflows/run-lint-audit-tests-ui.yml
@@ -3,17 +3,14 @@ name: UI - Lint, Audit and Tests
 on: 
   push:
     paths:
-      - mathesar_ui/*
+      - 'mathesar_ui/**'
   pull_request:
     paths:
-      - mathesar_ui/*
+      - 'mathesar_ui/**'
 
 jobs:
   format:
     runs-on: ubuntu-latest
-    # We only want to run on external PRs, since internal PRs are covered by "push"
-    # This prevents this from running twice on internal PRs
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
     defaults:
       run:
         working-directory: ./mathesar_ui
@@ -36,9 +33,6 @@ jobs:
 
   lint:
     runs-on: ubuntu-latest
-    # We only want to run on external PRs, since internal PRs are covered by "push"
-    # This prevents this from running twice on internal PRs
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
     defaults:
       run:
         working-directory: ./mathesar_ui
@@ -61,9 +55,6 @@ jobs:
 
   typecheck:
     runs-on: ubuntu-latest
-    # We only want to run on external PRs, since internal PRs are covered by "push"
-    # This prevents this from running twice on internal PRs
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
     defaults:
       run:
         working-directory: ./mathesar_ui
@@ -86,9 +77,6 @@ jobs:
 
   audit:
     runs-on: ubuntu-latest
-    # We only want to run on external PRs, since internal PRs are covered by "push"
-    # This prevents this from running twice on internal PRs
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
     defaults:
       run:
         working-directory: ./mathesar_ui
@@ -120,9 +108,6 @@ jobs:
 
   tests:
     runs-on: ubuntu-latest
-    # We only want to run on external PRs, since internal PRs are covered by "push"
-    # This prevents this from running twice on internal PRs
-    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
     defaults:
       run:
         working-directory: ./mathesar_ui

diff --git a/.github/workflows/test-docs.yml b/.github/workflows/test-docs.yml
@@ -3,10 +3,10 @@ name: test-docs
 on: 
   push:
     paths:
-      - docs/*
+      - 'docs/**'
   pull_request:
     paths:
-      - docs/*
+      - 'docs/**'
 
 jobs:
   deploy:

diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@
 </p>
 
 <p align="center">
-  <a href="https://mathesar.org?ref=github-readme" target="_blank">Website</a> • <a href="https://docs.mathesar.org?ref=github-readme" target="_blank">Docs</a> • <a href="https://demo.mathesar.org?ref=github-readme" target="_blank">Live Demo</a> • <a href="https://wiki.mathesar.org/en/community/matrix" target="_blank">Matrix (chat)</a> • <a href="https://wiki.mathesar.org/" target="_blank">Wiki</a>
+  <a href="https://mathesar.org?ref=github-readme" target="_blank">Website</a> • <a href="https://docs.mathesar.org?ref=github-readme" target="_blank">Docs</a> • <a href="https://demo.mathesar.org?ref=github-readme" target="_blank">Live Demo</a> • <a href="https://wiki.mathesar.org/en/community/matrix" target="_blank">Matrix (chat)</a> • <a href="https://discord.gg/enaKqGn5xx" target="_blank">Discord</a> • <a href="https://wiki.mathesar.org/" target="_blank">Wiki</a>
 </p>
 
 

diff --git a/config/settings/common_settings.py b/config/settings/common_settings.py
@@ -37,6 +37,7 @@ def pipe_delim(pipe_string):
     "django.contrib.contenttypes",
     "django.contrib.sessions",
     "django.contrib.messages",
+    "whitenoise.runserver_nostatic",
     "django.contrib.staticfiles",
     "rest_framework",
     "django_filters",
@@ -46,6 +47,7 @@ def pipe_delim(pipe_string):
 
 MIDDLEWARE = [
     "django.middleware.security.SecurityMiddleware",
+    "whitenoise.middleware.WhiteNoiseMiddleware",
     "django.contrib.sessions.middleware.SessionMiddleware",
     "django.middleware.common.CommonMiddleware",
     "django.middleware.csrf.CsrfViewMiddleware",
@@ -221,6 +223,7 @@ def pipe_delim(pipe_string):
 # https://vitejs.dev/guide/assets.html
 # https://vitejs.dev/guide/backend-integration.html
 STATICFILES_DIRS = [MATHESAR_UI_SOURCE_LOCATION, MATHESAR_STATIC_NON_CODE_FILES_LOCATION] if MATHESAR_MODE == 'DEVELOPMENT' else [MATHESAR_UI_BUILD_LOCATION, MATHESAR_STATIC_NON_CODE_FILES_LOCATION]
+STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"
 
 # Accounts
 AUTH_USER_MODEL = 'mathesar.User'

diff --git a/db/columns/base.py b/db/columns/base.py
@@ -138,6 +138,10 @@ def table_oid(self):
     @property
     def is_default(self):
         default_def = DEFAULT_COLUMNS.get(self.name, False)
+        try:
+            self.type.python_type
+        except NotImplementedError:
+            return False
         return (
             default_def
             and self.type.python_type == default_def[TYPE]().python_type

diff --git a/db/columns/operations/infer_types.py b/db/columns/operations/infer_types.py
@@ -5,6 +5,7 @@
 
 from db.columns.exceptions import DagCycleError
 from db.columns.operations.alter import alter_column_type
+from db.columns.operations.select import determine_whether_column_contains_data
 from db.tables.operations.select import get_oid_from_table, reflect_table
 from db.types.base import PostgresType, MathesarCustomType, get_available_known_db_types
 from db.metadata import get_empty_metadata
@@ -18,19 +19,18 @@
     PostgresType.BOOLEAN: [],
     MathesarCustomType.EMAIL: [],
     PostgresType.INTERVAL: [],
-    PostgresType.NUMERIC: [
-        PostgresType.BOOLEAN,
-    ],
+    PostgresType.NUMERIC: [],
     PostgresType.TEXT: [
         PostgresType.BOOLEAN,
         PostgresType.DATE,
         PostgresType.NUMERIC,
         MathesarCustomType.MATHESAR_MONEY,
         PostgresType.TIMESTAMP_WITHOUT_TIME_ZONE,
         PostgresType.TIMESTAMP_WITH_TIME_ZONE,
-        # We only infer to TIME_WITHOUT_TIME_ZONE as time zones don't make much sense
-        # without additional date information. See postgres documentation for further
-        # details: https://www.postgresql.org/docs/13/datatype-datetime.html
+        # We only infer to TIME_WITHOUT_TIME_ZONE as time zones don't make much
+        # sense without additional date information. See postgres documentation
+        # for further details:
+        # https://www.postgresql.org/docs/13/datatype-datetime.html
         PostgresType.TIME_WITHOUT_TIME_ZONE,
         PostgresType.INTERVAL,
         MathesarCustomType.EMAIL,
@@ -41,26 +41,45 @@
 }
 
 
-def infer_column_type(schema, table_name, column_name, engine, depth=0, type_inference_dag=None, metadata=None, columns_might_have_defaults=True):
+def infer_column_type(
+        schema,
+        table_name,
+        column_name,
+        engine,
+        depth=0,
+        type_inference_dag=None,
+        metadata=None,
+        columns_might_have_defaults=True,
+):
     """
-    Attempts to cast the column to the best type for it, given the mappings defined in TYPE_INFERENCE_DAG
-    and _get_type_classes_mapped_to_dag_nodes. Returns the resulting column type's class.
+    Attempt to cast the column to the best type for it.
+
+    Returns the resulting column type's class.
 
     Algorithm:
-        1. reflect the column's type class;
-        2. use _get_type_classes_mapped_to_dag_nodes to map it to a TYPE_INFERENCE_DAG key;
-        3. look up the sequence of types referred to by that key on the TYPE_INFERENCE_DAG;
-            - if there's no such key on the TYPE_INFERENCE_DAG dict, or if its value is an empty
-            list, return the current column type's class;
-        4. iterate through that sequence of types trying to alter the column's type to them;
-            - if the column's type is altered successfully, break iteration and return the output
-            of running infer_column_type again (trigger tail recursion);
-            - if none of the column type alterations succeed, return the current column type's
-            class.
+        1. Check for any data in the column.
+           - If the column is empty, return the column's current type
+             class.
+        2. reflect the column's type class.
+        3. Use _get_type_classes_mapped_to_dag_nodes to map it to a
+           TYPE_INFERENCE_DAG key.
+        4. Look up the sequence of types referred to by that key on the
+           TYPE_INFERENCE_DAG.
+           - If there's no such key on the TYPE_INFERENCE_DAG dict, or if
+             its value is an empty list, return the current column's type
+             class.
+        5. Iterate through that sequence of types trying to alter the
+           column's type to them.
+           - If the column's type is altered successfully, break
+             iteration and return the output of running infer_column_type
+             again (trigger tail recursion).
+           - If none of the column type alterations succeed, return the
+             current column's type class.
     """
+    metadata = metadata if metadata else get_empty_metadata()
+
     if type_inference_dag is None:
         type_inference_dag = TYPE_INFERENCE_DAG
-    metadata = metadata if metadata else get_empty_metadata()
     if depth > MAX_INFERENCE_DAG_DEPTH:
         raise DagCycleError("The type_inference_dag likely has a cycle")
     type_classes_to_dag_nodes = _get_type_classes_mapped_to_dag_nodes(engine)
@@ -71,11 +90,18 @@ def infer_column_type(schema, table_name, column_name, engine, depth=0, type_inf
         column_name=column_name,
         metadata=metadata,
     )
+    table_oid = get_oid_from_table(table_name, schema, engine)
+    column_contains_data = determine_whether_column_contains_data(
+        table_oid, column_name, engine, metadata
+    )
+    # We short-circuit in this case since we can't infer type without data.
+    if not column_contains_data:
+        return column_type_class
+
     # a DAG node will be a DatabaseType Enum
     dag_node = type_classes_to_dag_nodes.get(column_type_class)
     logger.debug(f"dag_node: {dag_node}")
     types_to_cast_to = type_inference_dag.get(dag_node, [])
-    table_oid = get_oid_from_table(table_name, schema, engine)
     for db_type in types_to_cast_to:
         try:
             with engine.begin() as conn:

diff --git a/db/columns/operations/select.py b/db/columns/operations/select.py
@@ -1,7 +1,7 @@
 import warnings
 
 from pglast import Node, parse_sql
-from sqlalchemy import and_, asc, cast, select, text
+from sqlalchemy import and_, asc, cast, select, text, exists
 
 from db.columns.exceptions import DynamicDefaultWarning
 from db.tables.operations.select import reflect_table_from_oid
@@ -151,6 +151,20 @@ def get_column_default_dict(table_oid, attnum, engine, metadata, connection_to_u
     return {"value": default_value, "is_dynamic": is_dynamic}
 
 
+def determine_whether_column_contains_data(
+        table_oid, column_name, engine, metadata, connection_to_use=None
+):
+    """
+    Given a column, return True if it contains data, False otherwise.
+    """
+    sa_table = reflect_table_from_oid(
+        table_oid, engine, metadata=metadata, connection_to_use=connection_to_use,
+    )
+    sel = select(exists(1).where(sa_table.columns[column_name] != None))  # noqa
+    contains_data = execute_statement(engine, sel, connection_to_use).scalar()
+    return contains_data
+
+
 def get_column_from_oid_and_attnum(table_oid, attnum, engine, metadata, connection_to_use=None):
     sa_table = reflect_table_from_oid(table_oid, engine, metadata=metadata, connection_to_use=connection_to_use)
     column_name = get_column_name_from_attnum(table_oid, attnum, engine, metadata=metadata, connection_to_use=connection_to_use)

diff --git a/db/identifiers.py b/db/identifiers.py
@@ -0,0 +1,64 @@
+import hashlib
+
+
+def truncate_if_necessary(identifier):
+    """
+    Takes an identifier and returns it, truncating it, if it is too long. The truncated version
+    will end with a hash of the passed identifier, therefore column name collision should be very
+    rare.
+
+    Iteratively removes characters from the end of the identifier, until the resulting string, with
+    the suffix hash of the identifier appended, is short enough that it doesn't need to be truncated
+    anymore. Whitespace is trimmed from the truncated identifier before appending the suffix.
+    """
+    assert type(identifier) is str
+    if not is_identifier_too_long(identifier):
+        return identifier
+    right_side = "-" + _get_truncation_hash(identifier)
+    identifier_length = len(identifier)
+    assert len(right_side) < identifier_length  # Sanity check
+    range_of_num_of_chars_to_remove = range(1, identifier_length)
+    for num_of_chars_to_remove in range_of_num_of_chars_to_remove:
+        left_side = identifier[:num_of_chars_to_remove * -1]
+        left_side = left_side.rstrip()
+        truncated_identifier = left_side + right_side
+        if not is_identifier_too_long(truncated_identifier):
+            return truncated_identifier
+    raise Exception(
+        "Acceptable truncation not found; should never happen."
+    )
+
+
+def is_identifier_too_long(identifier):
+    postgres_identifier_size_limit = 63
+    size = _get_size_of_identifier_in_bytes(identifier)
+    return size > postgres_identifier_size_limit
+
+
+def _get_truncation_hash(identifier):
+    """
+    Produces an 8-character string hash of the passed identifier.
+
+    Using hash function blake2s, because it seems fairly recommended and it seems to be better
+    suited for shorter digests than blake2b. We want short digests to not take up too much of the
+    truncated identifier in whose construction this will be used.
+    """
+    h = hashlib.blake2s(digest_size=4)
+    bytes = _get_identifier_in_bytes(identifier)
+    h.update(bytes)
+    return h.hexdigest()
+
+
+def _get_size_of_identifier_in_bytes(s):
+    bytes = _get_identifier_in_bytes(s)
+    return len(bytes)
+
+
+def _get_identifier_in_bytes(s):
+    """
+    Afaict, following Postgres doc [0] says that UTF-8 supports all languages; therefore, different
+    server locale configurations should not break this.
+
+    [0] https://www.postgresql.org/docs/13/multibyte.html
+    """
+    return s.encode('utf-8')
diff --git a/db/records/operations/delete.py b/db/records/operations/delete.py
@@ -8,3 +8,10 @@ def delete_record(table, engine, id_value):
     query = delete(table).where(primary_key_column == id_value)
     with engine.begin() as conn:
         return conn.execute(query)
+
+
+def bulk_delete_records(table, engine, id_values):
+    primary_key_column = get_primary_key_column(table)
+    query = delete(table).where(primary_key_column.in_(id_values))
+    with engine.begin() as conn:
+        return conn.execute(query)
diff --git a/db/records/operations/sort.py b/db/records/operations/sort.py
@@ -44,27 +44,37 @@ def _build_order_by_all_columns_clause(relation):
     To be used when we have failed to find any other ordering criteria,
     since ordering by all columns is inherently inefficient.
 
-    Note the filtering out of internal columns. Before applying this fix, psycopg was throwing an error
-    like "could not identify an ordering operator for type json", because we were trying to
-    sort by an internal column like `__mathesar_group_metadata`, which has type `json`, which
-    requires special handling to be sorted. The problem is bypassed by not attempting to sort on
-    internal columns.
+    Note the filtering out some columns, namely internal columns and non-orderable columns. See
+    their docstrings for details.
     """
     return [
         {'field': col, 'direction': 'asc'}
         for col
         in relation.columns
-        if not _is_internal_column(col)
+        if _is_col_orderable(col) and not _is_internal_column(col)
     ]
 
 
 def _is_internal_column(col):
     """
+    Columns that Mathesar adds for its own devices and does not expose to the user. We don't want
+    to sort by these.
+
     Might not be exhaustive, take care.
     """
     return col.name == '__mathesar_group_metadata'
 
 
+def _is_col_orderable(col):
+    """
+    Some columns are not orderable (or at least don't have a non-ambiguous way to define order
+    without additional logic). We only want to order by orderably columns.
+    """
+    data_type = col.type
+    non_orderable_type = ['Binary', 'LargeBinary', 'PickleType', 'ARRAY', 'JSON', 'JSONB']
+    return str(data_type) not in non_orderable_type
+
+
 def apply_relation_sorting(relation, sort_spec):
     order_by_list = [
         _get_sorted_column_obj_from_spec(relation, spec) for spec in sort_spec