Skip to content

Commit

Permalink
Merge pull request #2723 from centerofci/0.1.1
Browse files Browse the repository at this point in the history
Release 0.1.1
  • Loading branch information
pavish committed Mar 23, 2023
2 parents 06abb3c + ef06744 commit a813767
Show file tree
Hide file tree
Showing 112 changed files with 1,490 additions and 457 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/handle-required-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ name: handle-required-checks
on:
push:
paths-ignore:
- mathesar_ui/*
- 'mathesar_ui/**'
- '**.py'
pull_request:
paths-ignore:
- mathesar_ui/*
- 'mathesar_ui/**'
- '**.py'
jobs:
lint:
Expand Down
19 changes: 2 additions & 17 deletions .github/workflows/run-lint-audit-tests-ui.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@ name: UI - Lint, Audit and Tests
on:
push:
paths:
- mathesar_ui/*
- 'mathesar_ui/**'
pull_request:
paths:
- mathesar_ui/*
- 'mathesar_ui/**'

jobs:
format:
runs-on: ubuntu-latest
# We only want to run on external PRs, since internal PRs are covered by "push"
# This prevents this from running twice on internal PRs
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
defaults:
run:
working-directory: ./mathesar_ui
Expand All @@ -36,9 +33,6 @@ jobs:

lint:
runs-on: ubuntu-latest
# We only want to run on external PRs, since internal PRs are covered by "push"
# This prevents this from running twice on internal PRs
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
defaults:
run:
working-directory: ./mathesar_ui
Expand All @@ -61,9 +55,6 @@ jobs:

typecheck:
runs-on: ubuntu-latest
# We only want to run on external PRs, since internal PRs are covered by "push"
# This prevents this from running twice on internal PRs
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
defaults:
run:
working-directory: ./mathesar_ui
Expand All @@ -86,9 +77,6 @@ jobs:

audit:
runs-on: ubuntu-latest
# We only want to run on external PRs, since internal PRs are covered by "push"
# This prevents this from running twice on internal PRs
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
defaults:
run:
working-directory: ./mathesar_ui
Expand Down Expand Up @@ -120,9 +108,6 @@ jobs:

tests:
runs-on: ubuntu-latest
# We only want to run on external PRs, since internal PRs are covered by "push"
# This prevents this from running twice on internal PRs
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
defaults:
run:
working-directory: ./mathesar_ui
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ name: test-docs
on:
push:
paths:
- docs/*
- 'docs/**'
pull_request:
paths:
- docs/*
- 'docs/**'

jobs:
deploy:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
</p>

<p align="center">
<a href="https://mathesar.org?ref=github-readme" target="_blank">Website</a> • <a href="https://docs.mathesar.org?ref=github-readme" target="_blank">Docs</a> • <a href="https://demo.mathesar.org?ref=github-readme" target="_blank">Live Demo</a> • <a href="https://wiki.mathesar.org/en/community/matrix" target="_blank">Matrix (chat)</a> • <a href="https://wiki.mathesar.org/" target="_blank">Wiki</a>
<a href="https://mathesar.org?ref=github-readme" target="_blank">Website</a> • <a href="https://docs.mathesar.org?ref=github-readme" target="_blank">Docs</a> • <a href="https://demo.mathesar.org?ref=github-readme" target="_blank">Live Demo</a> • <a href="https://wiki.mathesar.org/en/community/matrix" target="_blank">Matrix (chat)</a> • <a href="https://discord.gg/enaKqGn5xx" target="_blank">Discord</a> • <a href="https://wiki.mathesar.org/" target="_blank">Wiki</a>
</p>


Expand Down
3 changes: 3 additions & 0 deletions config/settings/common_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def pipe_delim(pipe_string):
"django.contrib.contenttypes",
"django.contrib.sessions",
"django.contrib.messages",
"whitenoise.runserver_nostatic",
"django.contrib.staticfiles",
"rest_framework",
"django_filters",
Expand All @@ -46,6 +47,7 @@ def pipe_delim(pipe_string):

MIDDLEWARE = [
"django.middleware.security.SecurityMiddleware",
"whitenoise.middleware.WhiteNoiseMiddleware",
"django.contrib.sessions.middleware.SessionMiddleware",
"django.middleware.common.CommonMiddleware",
"django.middleware.csrf.CsrfViewMiddleware",
Expand Down Expand Up @@ -221,6 +223,7 @@ def pipe_delim(pipe_string):
# https://vitejs.dev/guide/assets.html
# https://vitejs.dev/guide/backend-integration.html
STATICFILES_DIRS = [MATHESAR_UI_SOURCE_LOCATION, MATHESAR_STATIC_NON_CODE_FILES_LOCATION] if MATHESAR_MODE == 'DEVELOPMENT' else [MATHESAR_UI_BUILD_LOCATION, MATHESAR_STATIC_NON_CODE_FILES_LOCATION]
STATICFILES_STORAGE = "whitenoise.storage.CompressedManifestStaticFilesStorage"

# Accounts
AUTH_USER_MODEL = 'mathesar.User'
Expand Down
4 changes: 4 additions & 0 deletions db/columns/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ def table_oid(self):
@property
def is_default(self):
default_def = DEFAULT_COLUMNS.get(self.name, False)
try:
self.type.python_type
except NotImplementedError:
return False
return (
default_def
and self.type.python_type == default_def[TYPE]().python_type
Expand Down
68 changes: 47 additions & 21 deletions db/columns/operations/infer_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from db.columns.exceptions import DagCycleError
from db.columns.operations.alter import alter_column_type
from db.columns.operations.select import determine_whether_column_contains_data
from db.tables.operations.select import get_oid_from_table, reflect_table
from db.types.base import PostgresType, MathesarCustomType, get_available_known_db_types
from db.metadata import get_empty_metadata
Expand All @@ -18,19 +19,18 @@
PostgresType.BOOLEAN: [],
MathesarCustomType.EMAIL: [],
PostgresType.INTERVAL: [],
PostgresType.NUMERIC: [
PostgresType.BOOLEAN,
],
PostgresType.NUMERIC: [],
PostgresType.TEXT: [
PostgresType.BOOLEAN,
PostgresType.DATE,
PostgresType.NUMERIC,
MathesarCustomType.MATHESAR_MONEY,
PostgresType.TIMESTAMP_WITHOUT_TIME_ZONE,
PostgresType.TIMESTAMP_WITH_TIME_ZONE,
# We only infer to TIME_WITHOUT_TIME_ZONE as time zones don't make much sense
# without additional date information. See postgres documentation for further
# details: https://www.postgresql.org/docs/13/datatype-datetime.html
# We only infer to TIME_WITHOUT_TIME_ZONE as time zones don't make much
# sense without additional date information. See postgres documentation
# for further details:
# https://www.postgresql.org/docs/13/datatype-datetime.html
PostgresType.TIME_WITHOUT_TIME_ZONE,
PostgresType.INTERVAL,
MathesarCustomType.EMAIL,
Expand All @@ -41,26 +41,45 @@
}


def infer_column_type(schema, table_name, column_name, engine, depth=0, type_inference_dag=None, metadata=None, columns_might_have_defaults=True):
def infer_column_type(
schema,
table_name,
column_name,
engine,
depth=0,
type_inference_dag=None,
metadata=None,
columns_might_have_defaults=True,
):
"""
Attempts to cast the column to the best type for it, given the mappings defined in TYPE_INFERENCE_DAG
and _get_type_classes_mapped_to_dag_nodes. Returns the resulting column type's class.
Attempt to cast the column to the best type for it.
Returns the resulting column type's class.
Algorithm:
1. reflect the column's type class;
2. use _get_type_classes_mapped_to_dag_nodes to map it to a TYPE_INFERENCE_DAG key;
3. look up the sequence of types referred to by that key on the TYPE_INFERENCE_DAG;
- if there's no such key on the TYPE_INFERENCE_DAG dict, or if its value is an empty
list, return the current column type's class;
4. iterate through that sequence of types trying to alter the column's type to them;
- if the column's type is altered successfully, break iteration and return the output
of running infer_column_type again (trigger tail recursion);
- if none of the column type alterations succeed, return the current column type's
class.
1. Check for any data in the column.
- If the column is empty, return the column's current type
class.
2. reflect the column's type class.
3. Use _get_type_classes_mapped_to_dag_nodes to map it to a
TYPE_INFERENCE_DAG key.
4. Look up the sequence of types referred to by that key on the
TYPE_INFERENCE_DAG.
- If there's no such key on the TYPE_INFERENCE_DAG dict, or if
its value is an empty list, return the current column's type
class.
5. Iterate through that sequence of types trying to alter the
column's type to them.
- If the column's type is altered successfully, break
iteration and return the output of running infer_column_type
again (trigger tail recursion).
- If none of the column type alterations succeed, return the
current column's type class.
"""
metadata = metadata if metadata else get_empty_metadata()

if type_inference_dag is None:
type_inference_dag = TYPE_INFERENCE_DAG
metadata = metadata if metadata else get_empty_metadata()
if depth > MAX_INFERENCE_DAG_DEPTH:
raise DagCycleError("The type_inference_dag likely has a cycle")
type_classes_to_dag_nodes = _get_type_classes_mapped_to_dag_nodes(engine)
Expand All @@ -71,11 +90,18 @@ def infer_column_type(schema, table_name, column_name, engine, depth=0, type_inf
column_name=column_name,
metadata=metadata,
)
table_oid = get_oid_from_table(table_name, schema, engine)
column_contains_data = determine_whether_column_contains_data(
table_oid, column_name, engine, metadata
)
# We short-circuit in this case since we can't infer type without data.
if not column_contains_data:
return column_type_class

# a DAG node will be a DatabaseType Enum
dag_node = type_classes_to_dag_nodes.get(column_type_class)
logger.debug(f"dag_node: {dag_node}")
types_to_cast_to = type_inference_dag.get(dag_node, [])
table_oid = get_oid_from_table(table_name, schema, engine)
for db_type in types_to_cast_to:
try:
with engine.begin() as conn:
Expand Down
16 changes: 15 additions & 1 deletion db/columns/operations/select.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import warnings

from pglast import Node, parse_sql
from sqlalchemy import and_, asc, cast, select, text
from sqlalchemy import and_, asc, cast, select, text, exists

from db.columns.exceptions import DynamicDefaultWarning
from db.tables.operations.select import reflect_table_from_oid
Expand Down Expand Up @@ -151,6 +151,20 @@ def get_column_default_dict(table_oid, attnum, engine, metadata, connection_to_u
return {"value": default_value, "is_dynamic": is_dynamic}


def determine_whether_column_contains_data(
table_oid, column_name, engine, metadata, connection_to_use=None
):
"""
Given a column, return True if it contains data, False otherwise.
"""
sa_table = reflect_table_from_oid(
table_oid, engine, metadata=metadata, connection_to_use=connection_to_use,
)
sel = select(exists(1).where(sa_table.columns[column_name] != None)) # noqa
contains_data = execute_statement(engine, sel, connection_to_use).scalar()
return contains_data


def get_column_from_oid_and_attnum(table_oid, attnum, engine, metadata, connection_to_use=None):
sa_table = reflect_table_from_oid(table_oid, engine, metadata=metadata, connection_to_use=connection_to_use)
column_name = get_column_name_from_attnum(table_oid, attnum, engine, metadata=metadata, connection_to_use=connection_to_use)
Expand Down
64 changes: 64 additions & 0 deletions db/identifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import hashlib


def truncate_if_necessary(identifier):
"""
Takes an identifier and returns it, truncating it, if it is too long. The truncated version
will end with a hash of the passed identifier, therefore column name collision should be very
rare.
Iteratively removes characters from the end of the identifier, until the resulting string, with
the suffix hash of the identifier appended, is short enough that it doesn't need to be truncated
anymore. Whitespace is trimmed from the truncated identifier before appending the suffix.
"""
assert type(identifier) is str
if not is_identifier_too_long(identifier):
return identifier
right_side = "-" + _get_truncation_hash(identifier)
identifier_length = len(identifier)
assert len(right_side) < identifier_length # Sanity check
range_of_num_of_chars_to_remove = range(1, identifier_length)
for num_of_chars_to_remove in range_of_num_of_chars_to_remove:
left_side = identifier[:num_of_chars_to_remove * -1]
left_side = left_side.rstrip()
truncated_identifier = left_side + right_side
if not is_identifier_too_long(truncated_identifier):
return truncated_identifier
raise Exception(
"Acceptable truncation not found; should never happen."
)


def is_identifier_too_long(identifier):
postgres_identifier_size_limit = 63
size = _get_size_of_identifier_in_bytes(identifier)
return size > postgres_identifier_size_limit


def _get_truncation_hash(identifier):
"""
Produces an 8-character string hash of the passed identifier.
Using hash function blake2s, because it seems fairly recommended and it seems to be better
suited for shorter digests than blake2b. We want short digests to not take up too much of the
truncated identifier in whose construction this will be used.
"""
h = hashlib.blake2s(digest_size=4)
bytes = _get_identifier_in_bytes(identifier)
h.update(bytes)
return h.hexdigest()


def _get_size_of_identifier_in_bytes(s):
bytes = _get_identifier_in_bytes(s)
return len(bytes)


def _get_identifier_in_bytes(s):
"""
Afaict, following Postgres doc [0] says that UTF-8 supports all languages; therefore, different
server locale configurations should not break this.
[0] https://www.postgresql.org/docs/13/multibyte.html
"""
return s.encode('utf-8')
7 changes: 7 additions & 0 deletions db/records/operations/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,10 @@ def delete_record(table, engine, id_value):
query = delete(table).where(primary_key_column == id_value)
with engine.begin() as conn:
return conn.execute(query)


def bulk_delete_records(table, engine, id_values):
primary_key_column = get_primary_key_column(table)
query = delete(table).where(primary_key_column.in_(id_values))
with engine.begin() as conn:
return conn.execute(query)
22 changes: 16 additions & 6 deletions db/records/operations/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,27 +44,37 @@ def _build_order_by_all_columns_clause(relation):
To be used when we have failed to find any other ordering criteria,
since ordering by all columns is inherently inefficient.
Note the filtering out of internal columns. Before applying this fix, psycopg was throwing an error
like "could not identify an ordering operator for type json", because we were trying to
sort by an internal column like `__mathesar_group_metadata`, which has type `json`, which
requires special handling to be sorted. The problem is bypassed by not attempting to sort on
internal columns.
Note the filtering out some columns, namely internal columns and non-orderable columns. See
their docstrings for details.
"""
return [
{'field': col, 'direction': 'asc'}
for col
in relation.columns
if not _is_internal_column(col)
if _is_col_orderable(col) and not _is_internal_column(col)
]


def _is_internal_column(col):
"""
Columns that Mathesar adds for its own devices and does not expose to the user. We don't want
to sort by these.
Might not be exhaustive, take care.
"""
return col.name == '__mathesar_group_metadata'


def _is_col_orderable(col):
"""
Some columns are not orderable (or at least don't have a non-ambiguous way to define order
without additional logic). We only want to order by orderably columns.
"""
data_type = col.type
non_orderable_type = ['Binary', 'LargeBinary', 'PickleType', 'ARRAY', 'JSON', 'JSONB']
return str(data_type) not in non_orderable_type


def apply_relation_sorting(relation, sort_spec):
order_by_list = [
_get_sorted_column_obj_from_spec(relation, spec) for spec in sort_spec
Expand Down
Loading

0 comments on commit a813767

Please sign in to comment.