Skip to content

Commit

Permalink
feat: Switch to mysqlclient from pymysql (#8)
Browse files Browse the repository at this point in the history
Switch to mysqlclient from pymysql
Fix failing tests and run linting
  • Loading branch information
sebastianswms authored Aug 24, 2023
1 parent 376cb3e commit 3ce8679
Show file tree
Hide file tree
Showing 10 changed files with 492 additions and 152 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ Install from GitHub:
pipx install git+https://github.com/MeltanoLabs/tap-mysql.git@main
```

Note that you will also need to install the requisite dependencies for mysqlclient. Example installation command:

```bash
sudo apt-get update
sudo apt-get install package-cfg libmysqlclient-dev
```

## Configuration

### Accepted Config Options
Expand Down
470 changes: 372 additions & 98 deletions poetry.lock

Large diffs are not rendered by default.

18 changes: 17 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,26 @@ license = "Apache-2.0"
[tool.poetry.dependencies]
python = "<3.12,>=3.8.1"
singer-sdk = { version="^0.30.0" }
pymysql = "^1.1.0"
mysqlclient = "^2.2.0"
fs-s3fs = { version = "^1.1.1", optional = true }
sshtunnel = "0.4.0"

[tool.poetry.dev-dependencies]
pytest = "^7.3.2"
tox = "^3.24.4"
flake8 = "^5.0.4"
flake8-docstrings = "^1.7.0"
black = "23.1.0"
mypy = "^1.0"
isort = "^5.11.5"
remote-pdb="2.1.0"

[tool.isort]
profile = "black"
multi_line_output = 3 # Vertical Hanging Indent
src_paths = "tap_mysql"


[tool.poetry.group.dev.dependencies]
pytest = "^7.2.1"
faker = ">=18.5.1,<20.0.0"
Expand Down
2 changes: 1 addition & 1 deletion tap_mysql/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def patched_conform(
elem: Any, # noqa: ANN401
property_schema: dict,
) -> Any: # noqa: ANN401
"""Overrides Singer SDK type conformance to prevent dates turning into datetimes.
"""Override Singer SDK type conformance to prevent dates turning into datetimes.
Converts a primitive (i.e. not object or array) to a json compatible type.
Expand Down
23 changes: 15 additions & 8 deletions tap_mysql/tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import atexit
import io
import signal
import sys
from functools import cached_property
from typing import Any, Mapping, cast

Expand All @@ -28,7 +29,7 @@ def __init__(
*args: tuple,
**kwargs: dict,
) -> None:
"""Constructor.
"""Construct a MySQL tap.
Should use JSON Schema instead
See https://github.com/MeltanoLabs/tap-postgres/issues/141
Expand Down Expand Up @@ -162,7 +163,7 @@ def get_sqlalchemy_url(self, config: Mapping[str, Any]) -> str:
return cast(str, config["sqlalchemy_url"])

sqlalchemy_url = URL.create(
drivername="mysql+pymysql",
drivername="mysql+mysqldb",
username=config["user"],
password=config["password"],
host=config["host"],
Expand All @@ -189,15 +190,19 @@ def connector(self) -> MySQLConnector:
config=dict(self.config),
sqlalchemy_url=url.render_as_string(hide_password=False),
)

def guess_key_type(self, key_data: str) -> paramiko.PKey:
"""Guess the type of the private key.
We are duplicating some logic from the ssh_tunnel package here,
we could try to use their function instead.
Args:
key_data: The private key data to guess the type of.
Returns:
The private key object.
Raises:
ValueError: If the key type could not be determined.
"""
Expand All @@ -209,7 +214,7 @@ def guess_key_type(self, key_data: str) -> paramiko.PKey:
):
try:
key = key_class.from_private_key(io.StringIO(key_data)) # type: ignore[attr-defined] # noqa: E501
except paramiko.SSHException:
except paramiko.SSHException: # noqa: PERF203
continue
else:
return key
Expand All @@ -219,10 +224,12 @@ def guess_key_type(self, key_data: str) -> paramiko.PKey:

def ssh_tunnel_connect(self, *, ssh_config: dict[str, Any], url: URL) -> URL:
"""Connect to the SSH Tunnel and swap the URL to use the tunnel.
Args:
ssh_config: The SSH Tunnel configuration
url: The original URL to connect to.
Returns:
Returns:
The new URL to connect to, using the tunnel.
"""
self.ssh_tunnel: SSHTunnelForwarder = SSHTunnelForwarder(
Expand Down Expand Up @@ -251,14 +258,14 @@ def clean_up(self) -> None:
self.logger.info("Shutting down SSH Tunnel")
self.ssh_tunnel.stop()

def catch_signal(self, signum, frame) -> None:
def catch_signal(self, signum, frame) -> None: # noqa: ANN001 ARG002
"""Catch signals and exit cleanly.
Args:
signum: The signal number
frame: The current stack frame
"""
exit(1) # Calling this to be sure atexit is called, so clean_up gets called

sys.exit(1) # Calling this to be sure atexit is called, so clean_up gets called

@property
def catalog_dict(self) -> dict:
Expand Down
42 changes: 9 additions & 33 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,19 @@
from tap_mysql.tap import TapMySQL

from .test_replication_key import TABLE_NAME, TapTestReplicationKey
from .test_selected_columns_only import (
TABLE_NAME_SELECTED_COLUMNS_ONLY,
TapTestSelectedColumnsOnly,
)

SAMPLE_CONFIG = {
"start_date": pendulum.datetime(2022, 11, 1).to_iso8601_string(),
"sqlalchemy_url": "mysql+pymysql://root:password@localhost:3306/melty",
# Using 127.0.0.1 instead of localhost because of mysqlclient dialect.
# See: https://stackoverflow.com/questions/72294279/how-to-connect-to-mysql-databas-using-github-actions
"sqlalchemy_url": "mysql+mysqldb://root:[email protected]:3306/melty",
}

NO_SQLALCHEMY_CONFIG = {
"start_date": pendulum.datetime(2022, 11, 1).to_iso8601_string(),
"host": "localhost",
# Using 127.0.0.1 instead of localhost because of mysqlclient dialect.
# See: https://stackoverflow.com/questions/72294279/how-to-connect-to-mysql-databas-using-github-actions
"host": "127.0.0.1",
"port": 3306,
"user": "root",
"password": "password",
Expand Down Expand Up @@ -73,10 +73,6 @@ def teardown_test_table(table_name, sqlalchemy_url):
tests=[TapTestReplicationKey],
)

custom_test_selected_columns_only = suites.TestSuite(
kind="tap",
tests=[TapTestSelectedColumnsOnly],
)

TapMySQLTest = get_tap_test_class(
tap_class=TapMySQL,
Expand All @@ -93,15 +89,6 @@ def teardown_test_table(table_name, sqlalchemy_url):
)


# creating testing instance for isolated table in mysql
TapMySQLTestSelectedColumnsOnly = get_tap_test_class(
tap_class=TapMySQL,
config=SAMPLE_CONFIG,
catalog="tests/resources/data_selected_columns_only.json",
custom_suites=[custom_test_selected_columns_only],
)


class TestTapMySQL(TapMySQLTest):
table_name = TABLE_NAME
sqlalchemy_url = SAMPLE_CONFIG["sqlalchemy_url"]
Expand All @@ -124,17 +111,6 @@ def resource(self):
teardown_test_table(self.table_name, self.sqlalchemy_url)


class TestTapMySQLSelectedColumnsOnly(TapMySQLTestSelectedColumnsOnly):
table_name = TABLE_NAME_SELECTED_COLUMNS_ONLY
sqlalchemy_url = SAMPLE_CONFIG["sqlalchemy_url"]

@pytest.fixture(scope="class")
def resource(self):
setup_test_table(self.table_name, self.sqlalchemy_url)
yield
teardown_test_table(self.table_name, self.sqlalchemy_url)


def test_temporal_datatypes():
"""Dates were being incorrectly parsed as date times (issue #171).
Expand All @@ -149,7 +125,7 @@ def test_temporal_datatypes():
table_name,
metadata_obj,
Column("column_date", DATE),
Column("column_time", TIME),
Column("column_time", TIME(timezone=False, fsp=6)),
Column("column_timestamp", DATETIME),
)
with engine.connect() as conn:
Expand All @@ -164,7 +140,7 @@ def test_temporal_datatypes():
conn.execute(insert)
tap = TapMySQL(config=SAMPLE_CONFIG)
tap_catalog = json.loads(tap.catalog_json_text)
altered_table_name = f"public_{table_name}"
altered_table_name = f"melty-{table_name}"
for stream in tap_catalog["streams"]:
if stream.get("stream") and altered_table_name not in stream["stream"]:
for metadata in stream["metadata"]:
Expand Down Expand Up @@ -221,7 +197,7 @@ def test_jsonb_json():
conn.execute(insert)
tap = TapMySQL(config=SAMPLE_CONFIG)
tap_catalog = json.loads(tap.catalog_json_text)
altered_table_name = f"public_{table_name}"
altered_table_name = f"melty-{table_name}"
for stream in tap_catalog["streams"]:
if stream.get("stream") and altered_table_name not in stream["stream"]:
for metadata in stream["metadata"]:
Expand Down
4 changes: 3 additions & 1 deletion tests/test_replication_key.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
TABLE_NAME = "test_replication_key"
SAMPLE_CONFIG = {
"start_date": pendulum.datetime(2022, 11, 1).to_iso8601_string(),
"sqlalchemy_url": "mysql+pymysql://root:password@localhost:3307/melty",
# Using 127.0.0.1 instead of localhost because of mysqlclient dialect.
# See: https://stackoverflow.com/questions/72294279/how-to-connect-to-mysql-databas-using-github-actions
"sqlalchemy_url": f"mysql+mysqldb://root:[email protected]:3306/melty",
}


Expand Down
34 changes: 31 additions & 3 deletions tests/test_selected_columns_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
# flake8: noqa
import json

import pytest
from singer_sdk.testing import get_tap_test_class, suites
from singer_sdk.testing.templates import TapTestTemplate

from tap_mysql.tap import TapMySQL

from .test_core import setup_test_table, teardown_test_table

TABLE_NAME_SELECTED_COLUMNS_ONLY = "test_selected_columns_only"
SAMPLE_CONFIG = {
"sqlalchemy_url": "mysql+pymysql://root:password@localhost:3307/melty",
# Using 127.0.0.1 instead of localhost because of mysqlclient dialect.
# See: https://stackoverflow.com/questions/72294279/how-to-connect-to-mysql-databas-using-github-actions
"sqlalchemy_url": f"mysql+mysqldb://root:[email protected]:3306/melty",
}


Expand Down Expand Up @@ -40,5 +46,27 @@ class TapTestSelectedColumnsOnly(TapTestTemplate):
name = "selected_columns_only"
table_name = TABLE_NAME_SELECTED_COLUMNS_ONLY

def test(self):
selected_columns_only_test(self.tap, self.table_name)

custom_test_selected_columns_only = suites.TestSuite(
kind="tap",
tests=[TapTestSelectedColumnsOnly],
)

# creating testing instance for isolated table in mysql
TapMySQLTestSelectedColumnsOnly = get_tap_test_class(
tap_class=TapMySQL,
config=SAMPLE_CONFIG,
catalog="tests/resources/data_selected_columns_only.json",
custom_suites=[custom_test_selected_columns_only],
)


class TestTapMySQLSelectedColumnsOnly(TapMySQLTestSelectedColumnsOnly):
table_name = TABLE_NAME_SELECTED_COLUMNS_ONLY
sqlalchemy_url = SAMPLE_CONFIG["sqlalchemy_url"]

@pytest.fixture(scope="class")
def resource(self):
setup_test_table(self.table_name, self.sqlalchemy_url)
yield
teardown_test_table(self.table_name, self.sqlalchemy_url)
9 changes: 5 additions & 4 deletions tests/test_ssh_tunnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

TABLE_NAME = "test_replication_key"
SAMPLE_CONFIG = {
"sqlalchemy_url": "mysql+pymysql://root:[email protected]:3306/melty",
"sqlalchemy_url": "mysql+mysqldb://root:[email protected]:3306/melty",
"ssh_tunnel": {
"enable": True,
"host": "127.0.0.1",
Expand All @@ -15,7 +15,8 @@
},
}

def test_ssh_tunnel():
"""We expect the SSH environment to already be up"""

def test_ssh_tunnel() -> None:
"""We expect the SSH environment to already be up."""
tap = TapMySQL(config=SAMPLE_CONFIG)
tap.sync_all()
tap.sync_all()
35 changes: 32 additions & 3 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,19 +1,48 @@
# This file can be used to customize tox tests as well as other test frameworks like flake8 and mypy

[tox]
envlist = py37, py38, py39, py310, py311
envlist = py39
; envlist = py37, py38, py39
isolated_build = true

[testenv]
allowlist_externals = poetry
whitelist_externals = poetry

commands =
poetry install -v
poetry run pytest
poetry run black --check tap_mysql
poetry run flake8 tap_mysql

[testenv:pytest]
# Run the python tests.
# To execute, run `tox -e pytest`
envlist = py37, py38, py39, py310, py311
envlist = py37, py38, py39
commands =
poetry install -v
poetry run pytest

[testenv:format]
# Attempt to auto-resolve lint errors before they are raised.
# To execute, run `tox -e format`
commands =
poetry install -v
poetry run black tap_mysql
poetry run isort tap_mysql

[testenv:lint]
# Raise an error if lint and style standards are not met.
# To execute, run `tox -e lint`
commands =
poetry install -v
poetry run black --check --diff tap_mysql/
poetry run isort --check tap_mysql
poetry run flake8 tap_mysql

[flake8]
ignore = W503
max-line-length = 88
max-complexity = 10

[pydocstyle]
ignore = D105,D203,D213

0 comments on commit 3ce8679

Please sign in to comment.