Skip to content

Commit

Permalink
add openvpn data formats & observations
Browse files Browse the repository at this point in the history
  • Loading branch information
ainghazal committed Sep 19, 2024
1 parent b8c5d50 commit 9edf29e
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 11 deletions.
40 changes: 40 additions & 0 deletions oonidata/src/oonidata/models/dataformats.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ class DNSQuery(BaseModel):
dial_id: Optional[int] = None



@add_slots
@dataclass
class TCPConnectStatus(BaseModel):
Expand Down Expand Up @@ -365,3 +366,42 @@ class NetworkEvent(BaseModel):
# Deprecated fields
dial_id: Optional[int] = None
conn_id: Optional[int] = None


@add_slots
@dataclass
class OpenVPNHandshake(BaseModel):
handshake_time: float
endpoint: str
ip: str # we might want to make this optional, and scrub in favor of ASN/prefix
port: int
transport: str
provider: str
t0: float
t: float
openvpn_options: Optional[Dict[str, str]] = None
tags: Optional[List[str]] = None
transaction_id: Optional[str] = None
failure: Failure = None

@add_slots
@dataclass
class OpenVPNPacket(BaseModel):
operation: str
opcode: str
id: int
payload_size: int
acks: Optional[List[int]] = None
send_attempts: Optional[int] = None


@add_slots
@dataclass
class OpenVPNNetworkEvent(BaseModel):
operation: str
stage: str
t: float
tags: Optional[List[str]] = None
packet: Optional[OpenVPNPacket] = None
transaction_id: Optional[int] = None

3 changes: 3 additions & 0 deletions oonidata/src/oonidata/models/nettests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .whatsapp import Whatsapp
from .http_invalid_request_line import HTTPInvalidRequestLine
from .http_header_field_manipulation import HTTPHeaderFieldManipulation
from .openvpn import OpenVPN

SUPPORTED_CLASSES = [
HTTPHeaderFieldManipulation,
Expand All @@ -27,6 +28,7 @@
Signal,
FacebookMessenger,
Whatsapp,
OpenVPN,
BaseMeasurement,
]
SupportedDataformats = Union[
Expand All @@ -42,6 +44,7 @@
Signal,
FacebookMessenger,
Whatsapp,
OpenVPN,
BaseMeasurement,
]

Expand Down
36 changes: 36 additions & 0 deletions oonidata/src/oonidata/models/nettests/openvpn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from dataclasses import dataclass
from typing import List, Optional

from ..base import BaseModel

from oonidata.compat import add_slots
from oonidata.models.dataformats import (
BaseTestKeys,
Failure,
TCPConnect,
OpenVPNHandshake,
OpenVPNNetworkEvent,
)
from oonidata.models.nettests.base_measurement import BaseMeasurement


@add_slots
@dataclass
class OpenVPNTestKeys(BaseTestKeys):
success: Optional[bool] = False
failure: Failure = None

network_events: Optional[List[OpenVPNNetworkEvent]] = None
tcp_connect: Optional[List[TCPConnect]] = None
openvpn_handshake: Optional[List[OpenVPNHandshake]] = None

bootstrap_time: Optional[float] = None
tunnel: str = None


@add_slots
@dataclass
class OpenVPN(BaseMeasurement):
__test_name__ = "openvpn"

test_keys: OpenVPNTestKeys
85 changes: 85 additions & 0 deletions oonidata/src/oonidata/models/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,3 +383,88 @@ class HTTPMiddleboxObservation:
hfm_diff: Optional[str] = None
hfm_failure: Optional[str] = None
hfm_success: Optional[bool] = None


@table_model(
table_name="obs_openvpn",
table_index=(
"measurement_start_time",
"measurement_uid",
"observation_idx",
),
)
@dataclass
class OpenVPNObservation:
measurement_meta: MeasurementMeta

probe_meta: ProbeMeta

observation_idx: int = 0

created_at: Optional[datetime] = None

timestamp: datetime = None

# Fields added by the processor

ip: str = ""
port: int = 0
transport: str = ""

success: bool = False
failure: Failure = None

protocol: str = ""
variant: Optional[str] = None

# TCP related observation
tcp_failure: Optional[Failure] = None
tcp_success: Optional[bool] = None
tcp_t: Optional[float] = None

# OpenVPN handshake observation
openvpn_handshake_failure: Optional[Failure] = None
openvpn_handshake_t: Optional[float] = None
openvpn_handshake_t0: Optional[float] = None
openvpn_bootstrap_time: Optional[float] = None

# timing info about the handshake packets
openvpn_handshake_hr_client_t: Optional[float] = None
openvpn_handshake_hr_server_t: Optional[float] = None
openvpn_handshake_clt_hello_t: Optional[float] = None
openvpn_handshake_srv_hello_t: Optional[float] = None
openvpn_handshake_key_exchg_n: Optional[int] = None
openvpn_handshake_got_keys__t: Optional[float] = None
openvpn_handshake_gen_keys__t: Optional[float] = None




@table_model(
table_name="obs_tunnel",
table_index= ("measurement_uid", "observation_idx", "measurement_start_time"),
)
@dataclass
class TunnelEndpointObservation:
measurement_meta: MeasurementMeta
probe_meta: ProbeMeta

measurement_start_time: datetime

ip: str
port: int
transport: str

# definition of success will need to change when/if we're able to gather metrics
# through the tunnel.
success: bool
failure: Failure

protocol: str
family: str

# indicates obfuscation or modifications from the main protocol family.
variant: Optional[str] = None

# any metadata about the providers behind the endpoints.
provider: Optional[str] = None
13 changes: 7 additions & 6 deletions oonipipeline/Design.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ needed.

### Expose a queriable low level view on measurements

Currently it's only possible to query measurement at a granuliaty which is as
fine a measurement.
Currently it's only possible to query measurement at a granularity which is as
fine as a measurement.

This means that it's only possible to answer questions which the original
designer of the experiment had already throught of.
designer of the experiment had already thought of.

On the other hand the new pipeline breaks down measurements into distinct
observations (think 1 DNS query and answer or 1 TLS handshake towards a
Expand Down Expand Up @@ -145,16 +145,17 @@ port combination.

You can run the observation generation with a clickhouse backend like so:

TODO(art): check this is correct.

```
poetry run python -m oonidata mkobs --clickhouse clickhouse://localhost/ --data-dir tests/data/datadir/ --start-day 2022-08-01 --end-day 2022-10-01 --create-tables --parallelism 20
hatch run oonipipeline --probe-cc US --test-name signal --workflow-name observations --start-at 2022-08-01 --end-at 2022-10-01
```

Here is the list of supported observations so far:

- [x] WebObservation, which has information about DNS, TCP, TLS and HTTP(s)
- [x] WebControlObservation, has the control measurements run by web connectivity (is used to generate ground truths)
- [ ] CircumventionToolObservation, still needs to be designed and implemented
(ideally we would use the same for OpenVPN, Psiphon, VanillaTor)
- [x] OpenVPNObservation, with measurements run by the openvpn experiment.

### Response body archiving

Expand Down
12 changes: 9 additions & 3 deletions oonipipeline/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ For historical context, these are the major revisions:
- `v1` - OONI Pipeline based on custom CLI scripts using mongodb as a backend. Used until ~2015.
- `v2` - OONI Pipeline based on [luigi](https://luigi.readthedocs.io/en/stable/). Used until ~2017.
- `v3` - OONI Pipeline based on [airflow](https://airflow.apache.org/). Used until ~2020.
- `v4` - OONI Pipeline basedon custom script and systemd units (aka fastpath). Currently in use in production.
- `v4` - OONI Pipeline based on custom script and systemd units (aka fastpath). Currently in use in production.
- `v5` - Next generation OONI Pipeline. What this readme is relevant to. Expected to become in production by Q4 2024.

## Setup
Expand Down Expand Up @@ -41,13 +41,19 @@ clickhouse server

Workflows are started by first scheduling them and then triggering a backfill operation on them. When they are scheduled they will also run on a daily basis.


```
hatch run oonipipeline schedule --probe-cc US --test-name signal --create-tables
hatch run oonipipeline schedule --probe-cc US --test-name signal
```

You can then trigger the backfill operation like so:
```
hatch run oonipipeline backfill --probe-cc US --test-name signal --workflow-name observations --start-at 2024-01-01 --end-at 2024-02-01
hatch run oonipipeline backfill --create-tables --probe-cc US --test-name signal --workflow-name observations --start-at 2024-01-01 --end-at 2024-02-01
```

If you need to re-create the database tables (because the schema has changed), you want to add the `--drop-tables` flag to the invocation:
```
hatch run oonipipeline backfill --create-tables --drop-tables --probe-cc US --test-name signal --workflow-name observations --start-at 2024-01-01 --end-at 2024-02-01
```

You will then need some workers to actually perform the task you backfilled, these can be started like so:
Expand Down
2 changes: 1 addition & 1 deletion oonipipeline/src/oonipipeline/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ async def main():
@click.option(
"--analysis/--no-analysis",
default=True,
help="should we drop tables before creating them",
help="schedule analysis too",
)
def schedule(probe_cc: List[str], test_name: List[str], analysis: bool):
"""
Expand Down
2 changes: 2 additions & 0 deletions oonipipeline/src/oonipipeline/db/create_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
WebControlObservation,
WebObservation,
HTTPMiddleboxObservation,
OpenVPNObservation,
)

from .connections import ClickhouseConnection
Expand Down Expand Up @@ -170,6 +171,7 @@ def format_create_query(
table_models = [
WebObservation,
WebControlObservation,
OpenVPNObservation,
HTTPMiddleboxObservation,
WebAnalysis,
MeasurementExperimentResult,
Expand Down
Loading

0 comments on commit 9edf29e

Please sign in to comment.