Skip to content

Commit

Permalink
Merge pull request #14 from graphsense/feature/tron
Browse files Browse the repository at this point in the history
Changes to support tron
  • Loading branch information
soad003 authored Dec 11, 2023
2 parents 545d38b + ae0a829 commit f21223a
Show file tree
Hide file tree
Showing 49 changed files with 9,686 additions and 137 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -186,3 +186,4 @@ dmypy.json

# Pyre type checker
.pyre/
tester.py
7 changes: 6 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ repos:
rev: 23.7.0
hooks:
- id: black
exclude: |
(?x)^(
( .+_pb2_grpc.py
| .+_pb2.py)
)$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
Expand Down Expand Up @@ -37,4 +42,4 @@ repos:
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "src"]
args: ["--filter-files", "--sg", "*pb2_grpc.py", "--sg", "*pb2.py", "--profile", "black", "src"]
15 changes: 13 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ SHELL := /bin/bash
PROJECT := graphsense-lib
VENV := venv
RELEASE := 'v23.09'
RELEASESEM := 'v2.0.0'
RELEASESEM := 'v2.1.0'

all: format lint test build

Expand Down Expand Up @@ -55,7 +55,18 @@ publish: build version
version:
python -m setuptools_scm

generate-tron-grpc-code:
python -m grpc_tools.protoc\
--python_out=./src/\
--grpc_python_out=./src/\
--proto_path=./src/\
./src/graphsenselib/ingest/tron/grpc/api/tron_api.proto
python -m grpc_tools.protoc\
--python_out=./src/\
--proto_path=./src/\
./src/graphsenselib/ingest/tron/grpc/core/*.proto

click-bash-completion:
_GRAPHSENSE_CLI_COMPLETE=bash_source graphsense-cli

.PHONY: all test install lint format build pre-commit docs test-all docs-latex publish tpublish tag-version click-bash-completion
.PHONY: all test install lint format build pre-commit docs test-all docs-latex publish tpublish tag-version click-bash-completion generate-tron-grpc-code
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# graphsense-lib

[![Test and Build Status](https://github.com/graphsense/graphsense-lib/actions/workflows/run_tests.yaml/badge.svg)](https://github.com/graphsense/graphsense-lib/actions)

[![PyPI version](https://badge.fury.io/py/graphsense-lib.svg)](https://badge.fury.io/py/graphsense-lib)
[![Test and Build Status](https://github.com/graphsense/graphsense-lib/actions/workflows/run_tests.yaml/badge.svg)](https://github.com/graphsense/graphsense-lib/actions) [![PyPI version](https://badge.fury.io/py/graphsense-lib.svg)](https://badge.fury.io/py/graphsense-lib) [![Python](https://img.shields.io/pypi/pyversions/graphsense-lib)](https://pypi.org/project/graphsense-lib/) [![Downloads](https://static.pepy.tech/badge/graphsense-lib)](https://pepy.tech/project/graphsense-lib)

A central repository for python utility functions and everything that deals with the graphsense backend. Its CLI interface can be used to control important graphsense maintainance tasks.

Expand Down Expand Up @@ -244,6 +242,7 @@ or
## Development
Caution: python 3.11 is currently not supported. Please use ```python3.10```.
it is advised to use a virtual environment (venv) for development. Run the following command to initialize one
```bash
Expand Down Expand Up @@ -306,3 +305,6 @@ To apply the tags run
```bash
> make tag-version
```
### Open SSL - Errors running test suites
Some components used by graphsense-lib use OpenSSL, to provide certain hash functions. Since OpenSSL 3.0 some hash functions needed are not available anymore by default (e.g. ripemd160). This leads to errors while running the test suite. To avoid these errors, enable legacy providers in the OpenSSL config. An example of the necessary changes can be found in the "fix openssl legacy mode" step in .github/workflows/run_tests.yaml
13 changes: 13 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,16 @@ build-backend = "setuptools.build_meta"
# For smarter version schemes and other configuration options,
# check out https://github.com/pypa/setuptools_scm
version_scheme = "no-guess-dev"

[tool.black]
# include = '\.pyi?$'
# 'extend-exclude' excludes files or directories in addition to the defaults
extend-exclude = '''
# A regex preceded with ^/ will apply only to files and directories
# in the root of the project.
(
.*_pb2_grpc.py # exclude autogenerated Protocol Buffer files anywhere in the project
| .*_pb2.py # exclude autogenerated Protocol Buffer files anywhere in the project
)
'''
# exclude = ['\.pb2_grpc.py$','\.pb2.py$']
19 changes: 16 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ platforms = any
classifiers =
Development Status :: 4 - Beta
Programming Language :: Python
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Intended Audience :: Developers
License :: OSI Approved :: MIT License
Topic :: Utilities
Expand All @@ -45,7 +48,7 @@ package_dir =
=src

# Require a min/specific Python version (comma-separated conditions)
# python_requires = >=3.9
python_requires = >=3.8, <3.11

# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.
# Version specifiers like >=2.2,<3.0 avoid problems due to API changes in
Expand All @@ -66,17 +69,22 @@ install_requires =
rich>=12.6.0, <13.0
eth-event>=1.2.3, <2.0
eth-hash>=0.3.0
ethereum-etl==2.1.2
ethereum-etl~=2.3
pyarrow>=11.0.0,<12.0
chainside-btcpy==0.6.5
chainside-btcpy~=0.6
cashaddress>=1.0.6, < 1.1
tenacity~=8.2.3
graphsense-bitcoin-etl==1.5.4
base58~=2.1
typeguard~=4.1
grpcio


[options.packages.find]
where = src
exclude =
tests
src/grapsenselib/ingest/tron/protos

[options.extras_require]
# Add here additional requirements for extra features, to install with:
Expand Down Expand Up @@ -106,6 +114,7 @@ dev =
pre-commit
setuptools_scm
pdbpp
grpcio-tools

[options.entry_points]
# Add here console scripts like:
Expand Down Expand Up @@ -151,11 +160,15 @@ exclude =
build
dist
.eggs
*pb2.py
*pb2_grpc.py


[isort]
profile=black
src_paths=src,tests
skip_glob = *pb2_grpc.py
*pb2.py


[pyscaffold]
Expand Down
18 changes: 14 additions & 4 deletions src/graphsenselib/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
from goodconf import Field, GoodConf
from pydantic import BaseModel, validator

from ..utils import flatten
from ..utils import first_or_default, flatten

supported_base_currencies = ["btc", "eth", "ltc", "bch", "zec", "trx"]
default_environments = ["prod", "test", "dev", "exp1", "exp2", "exp3"]
schema_types = ["utxo", "account"]
schema_types = ["utxo", "account", "account_trx"]
keyspace_types = ["raw", "transformed"]
currency_to_schema_type = {
cur: "account" if cur == "eth" or cur == "trx" else "utxo"
cur: "account_trx" if cur == "trx" else "account" if cur == "eth" else "utxo"
for cur in supported_base_currencies
}
supported_fiat_currencies = ["USD", "EUR"]
Expand All @@ -32,7 +32,7 @@
)


def get_approx_reorg_backoff_blocks(network: str, lag_in_hours: int = 2) -> int:
def get_approx_reorg_backoff_blocks(network: str, lag_in_hours: int = 1.2) -> int:
"""For imports we do not want to always catch up with the latest block
since we want to avoid reorgs lead to spurious data in the database.
Expand All @@ -52,9 +52,19 @@ class FileSink(BaseModel):

class IngestConfig(BaseModel):
node_reference: str = Field(default_factory=lambda: "")
secondary_node_references: List[str] = Field(default_factory=lambda: [])
raw_keyspace_file_sinks: Dict[str, FileSink] = Field(default_factory=lambda: {})
# raw_keyspace_file_sink_directory: str = Field(default_factory=lambda: None)

@property
def all_node_references(self) -> List[str]:
return [self.node_reference] + self.secondary_node_references

def get_first_node_reference(self, protocol: str = "http") -> Optional[str]:
return first_or_default(
self.all_node_references, lambda x: x.startswith(protocol)
)


class KeyspaceSetupConfig(BaseModel):
replication_config: str = Field(
Expand Down
1 change: 1 addition & 0 deletions src/graphsenselib/datatypes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# flake8: noqa: F401
from .address import AddressAccount, AddressUtxo
from .common import *
from .errors import *
2 changes: 2 additions & 0 deletions src/graphsenselib/datatypes/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class BadUserInputError(Exception):
pass
4 changes: 4 additions & 0 deletions src/graphsenselib/db/account.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ def get_addresses_in_block(self, block: int) -> Iterable:
fetch_size=10000,
)
return result


class RawDbAccountTrx(RawDbAccount):
pass
12 changes: 8 additions & 4 deletions src/graphsenselib/db/analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,16 +1025,16 @@ class AnalyticsDb:
def __init__(
self, raw: KeyspaceConfig, transformed: KeyspaceConfig, db: CassandraDb
):
self._raw_keyspace = raw.keyspace_name
self._transformed_keyspace = transformed.keyspace_name
self._raw_config = raw
self._transformed_config = transformed
self._db = db
self._raw = raw.db_type(raw, db)
self._transformed = transformed.db_type(transformed, db)

def __repr__(self):
return (
f"Raw: {self._raw_keyspace}, "
f"Transformed: {self._transformed_keyspace}, "
f"Raw: {self._raw_config.keyspace_name}, "
f"Transformed: {self._transformed_config.keyspace_name}, "
f"DB: {self._db}"
)

Expand All @@ -1051,6 +1051,10 @@ def __enter__(self):
self.open()
return self

def clone(self) -> "AnalyticsDb":
db_cloned = self._db.clone()
return AnalyticsDb(self._raw_config, self._transformed_config, db_cloned)

def __exit__(self, exc_type, exc_value, tb):
self.close()

Expand Down
53 changes: 50 additions & 3 deletions src/graphsenselib/db/cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from functools import wraps
from typing import Iterable, List, Optional, Sequence, Union

from cassandra.cluster import Cluster
from cassandra.cluster import EXEC_PROFILE_DEFAULT, Cluster, ExecutionProfile

# Session,
from cassandra.concurrent import execute_concurrent, execute_concurrent_with_args
from cassandra.policies import RetryPolicy
from cassandra.query import (
UNSET_VALUE,
BatchStatement,
Expand All @@ -24,6 +25,44 @@
logger = logging.getLogger(__name__)


# taken from https://github.com/apache/cassandra-dtest/blob/0085d21bc687995478e338302e619e82ad4a4644/dtest.py#L88C5-L88C5 # noqa
class ReadRetryPolicy(RetryPolicy):
"""
A retry policy that retries 5 times by default, but can be configured to
retry more times.
"""

def __init__(self, max_retries=5):
self.max_retries = max_retries

def on_read_timeout(self, *args, **kwargs):
if kwargs["retry_num"] < self.max_retries:
logger.debug(
"Retrying read after timeout. Attempt #" + str(kwargs["retry_num"])
)
return (self.RETRY, None)
else:
return (self.RETHROW, None)

# def on_write_timeout(self, *args, **kwargs):
# if kwargs["retry_num"] < self.max_retries:
# logger.debug(
# "Retrying write after timeout. Attempt #" + str(kwargs["retry_num"])
# )
# return (self.RETRY, None)
# else:
# return (self.RETHROW, None)

# def on_unavailable(self, *args, **kwargs):
# if kwargs["retry_num"] < self.max_retries:
# logger.debug(
# "Retrying request after UE. Attempt #" + str(kwargs["retry_num"])
# )
# return (self.RETRY, None)
# else:
# return (self.RETHROW, None)


def normalize_cql_statement(stmt: str) -> str:
return remove_multi_whitespace(stmt.lower().strip()).rstrip(";")

Expand Down Expand Up @@ -254,15 +293,22 @@ def __init__(self, db_nodes: Iterable, default_timeout=DEFAULT_TIMEOUT) -> None:
self.prep_stmts = {}
self._session_timeout = default_timeout

def clone(self) -> "CassandraDb":
return CassandraDb(self.db_nodes, self._session_timeout)

def __repr__(self):
return f"{', '.join(self.db_nodes)}"

def connect(self):
"""Connect to given Cassandra cluster nodes."""
self.cluster = Cluster(self.db_nodes)
exec_prof = ExecutionProfile(retry_policy=ReadRetryPolicy(), request_timeout=30)
self.cluster = Cluster(
self.db_nodes,
execution_profiles={EXEC_PROFILE_DEFAULT: exec_prof},
connect_timeout=self._session_timeout,
)
try:
self.session = self.cluster.connect()
self.session.default_timeout = self._session_timeout
except Exception as e:
raise StorageError(f"Cannot connect to {self.db_nodes}") from e

Expand Down Expand Up @@ -427,6 +473,7 @@ def get_prepared_statement(self, stmt):
self.prep_stmts[hex_dig] = self.session.prepare(stmt)
return self.prep_stmts[hex_dig]

@needs_session
def ingest(
self,
table: str,
Expand Down
Loading

0 comments on commit f21223a

Please sign in to comment.