Skip to content

Commit

Permalink
Chore: Add anonymous user ID in tracking events (#124)
Browse files Browse the repository at this point in the history
  • Loading branch information
aaronsteers authored Mar 12, 2024
1 parent 1e45e42 commit 2f483ec
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 5 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/autofix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
repository_dispatch:
types: [autofix-command]

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

jobs:
python-autofix:
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/pydoc_preview.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ on:
- main
pull_request: {}

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

jobs:
preview_docs:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/pydoc_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
contents: read
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/pypi_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:

workflow_dispatch:

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

jobs:
build:
runs-on: ubuntu-latest
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/python_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
- main
pull_request: {}

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

jobs:
ruff-lint-check:
name: Ruff Lint Check
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/python_pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ on:
- main
pull_request: {}

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

jobs:
pytest-fast:
name: Pytest (Fast)
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/release_drafter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
branches:
- main

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

permissions:
contents: read

Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/semantic_pr_check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ on:
- edited
- synchronize

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

permissions:
pull-requests: read

Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/slash_command_dispatch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ on:
issue_comment:
types: [created]

env:
AIRBYTE_ANALYTICS_ID: ${{ vars.AIRBYTE_ANALYTICS_ID }}

jobs:
slashCommandDispatch:
runs-on: ubuntu-latest
Expand Down
96 changes: 94 additions & 2 deletions airbyte/_util/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@
from dataclasses import asdict, dataclass
from enum import Enum
from functools import lru_cache
from typing import TYPE_CHECKING, Any
from pathlib import Path
from typing import TYPE_CHECKING, Any, cast

import requests
import ulid
import yaml

from airbyte import exceptions as exc
from airbyte._util import meta
Expand All @@ -52,6 +54,10 @@
from airbyte.sources.base import Source


DEBUG = True
"""Enable debug mode for telemetry code."""


HASH_SEED = "PyAirbyte:"
"""Additional seed for randomizing one-way hashed strings."""

Expand All @@ -73,6 +79,92 @@
DO_NOT_TRACK = "DO_NOT_TRACK"
"""Environment variable to opt-out of telemetry."""

_ENV_ANALYTICS_ID = "AIRBYTE_ANALYTICS_ID" # Allows user to override the anonymous user ID
_ANALYTICS_FILE = Path.home() / ".airbyte" / "analytics.yml"
_ANALYTICS_ID: str | bool | None = None


def _setup_analytics() -> str | bool:
"""Set up the analytics file if it doesn't exist.
Return the anonymous user ID or False if the user has opted out.
"""
anonymous_user_id: str | None = None
issues: list[str] = []

if os.environ.get(DO_NOT_TRACK):
# User has opted out of tracking.
return False

if _ENV_ANALYTICS_ID in os.environ:
# If the user has chosen to override their analytics ID, use that value and
# remember it for future invocations.
anonymous_user_id = os.environ[_ENV_ANALYTICS_ID]

if not _ANALYTICS_FILE.exists():
# This is a one-time message to inform the user that we are tracking anonymous usage stats.
print(
"Anonymous usage reporting is enabled. For more information or to opt out, please"
" see https://docs.airbyte.io/pyairbyte/anonymized-usage-statistics"
)

if _ANALYTICS_FILE.exists():
analytics_text = _ANALYTICS_FILE.read_text()
try:
analytics: dict = yaml.safe_load(analytics_text)
except Exception as ex:
issues += f"File appears corrupted. Error was: {ex!s}"

if analytics and "anonymous_user_id" in analytics:
# The analytics ID was successfully located.
if not anonymous_user_id:
return analytics["anonymous_user_id"]

if anonymous_user_id == analytics["anonymous_user_id"]:
# Values match, no need to update the file.
return analytics["anonymous_user_id"]

issues.append("Provided analytics ID did not match the file. Rewriting the file.")
print(
f"Received a user-provided analytics ID override in the '{_ENV_ANALYTICS_ID}' "
"environment variable."
)

# File is missing, incomplete, or stale. Create a new one.
anonymous_user_id = anonymous_user_id or str(ulid.ULID())
try:
_ANALYTICS_FILE.parent.mkdir(exist_ok=True, parents=True)
_ANALYTICS_FILE.write_text(
"# This file is used by PyAirbyte to track anonymous usage statistics.\n"
"# For more information or to opt out, please see\n"
"# - https://docs.airbyte.com/operator-guides/telemetry\n"
f"anonymous_user_id: {anonymous_user_id}\n"
)
except Exception:
# Failed to create the analytics file. Likely due to a read-only filesystem.
issues.append("Failed to write the analytics file. Check filesystem permissions.")
pass

if DEBUG and issues:
nl = "\n"
print(f"One or more issues occurred when configuring usage tracking:\n{nl.join(issues)}")

return anonymous_user_id


def _get_analytics_id() -> str | None:
result: str | bool | None = _ANALYTICS_ID
if result is None:
result = _setup_analytics()

if result is False:
return None

return cast(str, result)


_ANALYTICS_ID = _get_analytics_id()


class SyncState(str, Enum):
STARTED = "started"
Expand Down Expand Up @@ -174,7 +266,7 @@ def send_telemetry(
"https://api.segment.io/v1/track",
auth=(PYAIRBYTE_APP_TRACKING_KEY, ""),
json={
"anonymousId": "airbyte-lib-user",
"anonymousId": _get_analytics_id(),
"event": "sync",
"properties": payload_props,
"timestamp": datetime.datetime.utcnow().isoformat(), # noqa: DTZ003
Expand Down
78 changes: 75 additions & 3 deletions tests/unit_tests/test_anonymous_usage_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import itertools
from contextlib import nullcontext as does_not_raise
import json
import os
from pathlib import Path
import re
from unittest.mock import Mock, call, patch
from unittest.mock import MagicMock, call, patch
from freezegun import freeze_time

import responses
Expand All @@ -16,8 +18,6 @@
from airbyte.version import get_version
import airbyte as ab
from airbyte._util import telemetry
import requests
import datetime


@responses.activate
Expand Down Expand Up @@ -174,3 +174,75 @@ def test_tracking(
}
)
])


def test_setup_analytics_existing_file(monkeypatch):
# Mock the environment variable and the analytics file
monkeypatch.delenv(telemetry._ENV_ANALYTICS_ID, raising=False)
monkeypatch.delenv(telemetry.DO_NOT_TRACK, raising=False)

monkeypatch.setattr(Path, 'exists', lambda x: True)
monkeypatch.setattr(Path, 'read_text', lambda x: "anonymous_user_id: test_id\n")
assert telemetry._setup_analytics() == 'test_id'


def test_setup_analytics_missing_file(monkeypatch):
"""Mock the environment variable and the missing analytics file."""
monkeypatch.setenv(telemetry._ENV_ANALYTICS_ID, 'test_id')
monkeypatch.delenv(telemetry.DO_NOT_TRACK, raising=False)
monkeypatch.setattr(Path, 'exists', lambda x: False)

mock_path = MagicMock()
monkeypatch.setattr(Path, 'write_text', mock_path)

assert telemetry._setup_analytics() == 'test_id'

assert mock_path.call_count == 1


def test_setup_analytics_read_only_filesystem(monkeypatch, capfd):
"""Mock the environment variable and simulate a read-only filesystem."""
monkeypatch.setenv(telemetry._ENV_ANALYTICS_ID, 'test_id')
monkeypatch.delenv(telemetry.DO_NOT_TRACK, raising=False)
monkeypatch.setattr(Path, 'exists', lambda x: False)

mock_write_text = MagicMock(side_effect=PermissionError("Read-only filesystem"))
monkeypatch.setattr(Path, 'write_text', mock_write_text)

# We should not raise an exception
assert telemetry._setup_analytics() == "test_id"

assert mock_write_text.call_count == 1

# Capture print outputs
captured = capfd.readouterr()

# Validate print message
assert "Read-only filesystem" not in captured.out


def test_setup_analytics_corrupt_file(monkeypatch):
"""Mock the environment variable and the missing analytics file."""
monkeypatch.delenv(telemetry._ENV_ANALYTICS_ID, raising=False)
monkeypatch.delenv(telemetry.DO_NOT_TRACK, raising=False)
monkeypatch.setattr(Path, 'exists', lambda x: True)
monkeypatch.setattr(Path, 'read_text', lambda x: "not-a-valid ::: yaml file\n")

mock = MagicMock()
monkeypatch.setattr(Path, 'write_text', mock)

assert telemetry._setup_analytics()

assert mock.call_count == 1


def test_get_analytics_id(monkeypatch):
# Mock the _ANALYTICS_ID variable
monkeypatch.delenv(telemetry._ENV_ANALYTICS_ID, raising=False)
monkeypatch.delenv(telemetry.DO_NOT_TRACK, raising=False)
monkeypatch.setattr(telemetry, '_ANALYTICS_ID', 'test_id')

mock = MagicMock()
monkeypatch.setattr(Path, 'write_text', mock)

assert telemetry._get_analytics_id() == 'test_id'

0 comments on commit 2f483ec

Please sign in to comment.