From 03e6e4a26446e2ca06f383c0b0d4c047cc51afcd Mon Sep 17 00:00:00 2001 From: XmasApple Date: Thu, 7 Sep 2023 17:30:45 +0300 Subject: [PATCH 01/46] slo --- .github/workflows/slo.yml | 65 ++ .gitignore | 2 + slo/.gitignore | 5 + slo/playground/README.md | 40 ++ .../provisioning/dashboards/dashboard.yml | 6 + .../grafana/provisioning/dashboards/slo.json | 646 ++++++++++++++++++ .../provisioning/datasources/datasource.yml | 11 + .../configs/prometheus/prometheus.yml | 8 + slo/playground/docker-compose.yml | 92 +++ slo/src/Cli/Cli.cs | 174 +++++ slo/src/Cli/CliCommands.cs | 94 +++ slo/src/Cli/ConfigBinders.cs | 135 ++++ slo/src/Cli/Configs.cs | 10 + slo/src/DataGenerator.cs | 35 + slo/src/Dockerfile | 20 + slo/src/Executor.cs | 48 ++ slo/src/Jobs/Job.cs | 91 +++ slo/src/Jobs/ReadJob.cs | 25 + slo/src/Jobs/WriteJob.cs | 20 + slo/src/Program.cs | 4 + slo/src/Queries.cs | 58 ++ slo/src/README.md | 147 ++++ slo/src/RateLimitedCaller.cs | 47 ++ slo/src/Table.cs | 33 + slo/src/TokenBucket.cs | 62 ++ slo/src/slo.csproj | 19 + slo/src/src.sln | 25 + 27 files changed, 1922 insertions(+) create mode 100644 .github/workflows/slo.yml create mode 100644 slo/.gitignore create mode 100644 slo/playground/README.md create mode 100644 slo/playground/configs/grafana/provisioning/dashboards/dashboard.yml create mode 100644 slo/playground/configs/grafana/provisioning/dashboards/slo.json create mode 100644 slo/playground/configs/grafana/provisioning/datasources/datasource.yml create mode 100644 slo/playground/configs/prometheus/prometheus.yml create mode 100644 slo/playground/docker-compose.yml create mode 100644 slo/src/Cli/Cli.cs create mode 100644 slo/src/Cli/CliCommands.cs create mode 100644 slo/src/Cli/ConfigBinders.cs create mode 100644 slo/src/Cli/Configs.cs create mode 100644 slo/src/DataGenerator.cs create mode 100644 slo/src/Dockerfile create mode 100644 slo/src/Executor.cs create mode 100644 slo/src/Jobs/Job.cs create mode 100644 slo/src/Jobs/ReadJob.cs create mode 100644 slo/src/Jobs/WriteJob.cs create mode 100644 slo/src/Program.cs create mode 100644 slo/src/Queries.cs create mode 100644 slo/src/README.md create mode 100644 slo/src/RateLimitedCaller.cs create mode 100644 slo/src/Table.cs create mode 100644 slo/src/TokenBucket.cs create mode 100644 slo/src/slo.csproj create mode 100644 slo/src/src.sln diff --git a/.github/workflows/slo.yml b/.github/workflows/slo.yml new file mode 100644 index 00000000..27909df1 --- /dev/null +++ b/.github/workflows/slo.yml @@ -0,0 +1,65 @@ +on: + push: + # branches: [main] + pull_request: + branches: [main] + workflow_dispatch: + +name: SLO + +jobs: + test-slo: + concurrency: + group: slo-${{ github.ref }} + if: (!contains(github.event.pull_request.labels.*.name, 'no slo')) + + runs-on: ubuntu-latest + name: SLO test + permissions: + checks: write + pull-requests: write + contents: read + issues: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + if: env.DOCKER_REPO != null + env: + DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} + + - name: Run SLO + uses: ydb-platform/slo-tests@php-version + if: env.DOCKER_REPO != null + env: + DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + KUBECONFIG_B64: ${{ secrets.SLO_KUBE_CONFIG }} + AWS_CREDENTIALS_B64: ${{ secrets.SLO_AWS_CREDENTIALS }} + AWS_CONFIG_B64: ${{ secrets.SLO_AWS_CONFIG }} + DOCKER_USERNAME: ${{ secrets.SLO_DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.SLO_DOCKER_PASSWORD }} + DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} + DOCKER_FOLDER: ${{ secrets.SLO_DOCKER_FOLDER }} + s3_endpoint: ${{ secrets.SLO_S3_ENDPOINT }} + s3_images_folder: ${{ vars.SLO_S3_IMAGES_FOLDER }} + grafana_domain: ${{ vars.SLO_GRAFANA_DOMAIN }} + grafana_dashboard: ${{ vars.SLO_GRAFANA_DASHBOARD }} + ydb_version: 'newest' + timeBetweenPhases: 30 + shutdownTime: 30 + + language_id0: 'dotnet' + workload_path0: 'slo/src' + language0: '.NET SDK' + workload_build_context0: ../.. + workload_build_options0: -f Dockerfile + + - uses: actions/upload-artifact@v3 + if: env.DOCKER_REPO != null + env: + DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} + with: + name: slo-logs + path: logs/ diff --git a/.gitignore b/.gitignore index f7fb764f..98937dd9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ +.idea/ .vs/ .vscode/ bin/ obj/ launchSettings.json +*.user diff --git a/slo/.gitignore b/slo/.gitignore new file mode 100644 index 00000000..00868c0f --- /dev/null +++ b/slo/.gitignore @@ -0,0 +1,5 @@ +data/ +.idea/ +bin/ +obj/ + diff --git a/slo/playground/README.md b/slo/playground/README.md new file mode 100644 index 00000000..eefb5cf0 --- /dev/null +++ b/slo/playground/README.md @@ -0,0 +1,40 @@ +# SLO playground + +Playground may be used for testing SLO workloads locally + +It has several services: + +- `prometheus` - storage for metrics +- `prometheus-pushgateway` - push acceptor for prometheus +- `grafana` - provides chats for metrics +- `ydb` - local instance of ydb-database to run workload with + +## Network addresses + +- Grafana dashboard: http://localhost:3000 +- Prometheus pushgateway: http://localhost:9091 +- YDB monitoring: http://localhost:8765 +- YDB GRPC: grpc://localhost:2136 +- YDB GRPC TLS: grpcs://localhost:2135 + +## Start + +```shell +docker-compose up -d +``` + +## Stop + +```shell +docker-compose down +``` + +## Configs + +Grafana's dashboards stored in `configs/grafana/provisioning/dashboards` + +## Data + +YDB databases are not persistent + +All other data like metrics and certs stored in `data/` \ No newline at end of file diff --git a/slo/playground/configs/grafana/provisioning/dashboards/dashboard.yml b/slo/playground/configs/grafana/provisioning/dashboards/dashboard.yml new file mode 100644 index 00000000..c6784142 --- /dev/null +++ b/slo/playground/configs/grafana/provisioning/dashboards/dashboard.yml @@ -0,0 +1,6 @@ +apiVersion: 1 + +providers: + - name: 'SLO' + options: + path: /etc/grafana/provisioning/dashboards diff --git a/slo/playground/configs/grafana/provisioning/dashboards/slo.json b/slo/playground/configs/grafana/provisioning/dashboards/slo.json new file mode 100644 index 00000000..69d76bf7 --- /dev/null +++ b/slo/playground/configs/grafana/provisioning/dashboards/slo.json @@ -0,0 +1,646 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "rate(oks[$__rate_interval]) > 0", + "hide": false, + "legendFormat": "({{sdk}}-{{sdkVersion}}) {{jobName}} OK", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "rate(not_oks[$__rate_interval]) > 0", + "hide": false, + "legendFormat": "({{sdk}}-{{sdkVersion}}) {{jobName}} not OK", + "range": true, + "refId": "C" + } + ], + "title": "SLO Requests RPS", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "histogram_quantile(1, rate(attempts_bucket[$__rate_interval]))", + "hide": false, + "legendFormat": "{{sdk}}-{{sdkVersion}} {{jobName}}-{{status}}", + "range": true, + "refId": "A" + } + ], + "title": "Attempts", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 7, + "panels": [], + "title": "Latencies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"read\", status=\"ok\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Latencies (OK)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"write\", status=\"ok\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Write Latencies (OK)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"read\", status=\"err\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Read Latencies (NOT OK)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "builder", + "expr": "latency{jobName=\"write\", status=\"err\"} > 0", + "legendFormat": "{{sdk}}-{{sdkVersion}}-p{{quantile}}", + "range": true, + "refId": "A" + } + ], + "title": "Write Latencies (NOT OK)", + "type": "timeseries" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "filters": [], + "hide": 0, + "label": "", + "name": "filter", + "skipUrlSync": false, + "type": "adhoc" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "SLO", + "uid": "7CzMl5t4k", + "version": 1, + "weekStart": "" +} diff --git a/slo/playground/configs/grafana/provisioning/datasources/datasource.yml b/slo/playground/configs/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 00000000..0b62b9c3 --- /dev/null +++ b/slo/playground/configs/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,11 @@ +apiVersion: 1 + +datasources: + - name: prometheus + type: prometheus + access: proxy + orgId: 1 + url: http://prometheus:9090 + basicAuth: false + isDefault: true + editable: true diff --git a/slo/playground/configs/prometheus/prometheus.yml b/slo/playground/configs/prometheus/prometheus.yml new file mode 100644 index 00000000..281b390b --- /dev/null +++ b/slo/playground/configs/prometheus/prometheus.yml @@ -0,0 +1,8 @@ +global: + scrape_interval: 1s + evaluation_interval: 1s + +scrape_configs: + - job_name: 'slo' + static_configs: + - targets: ['prometheus-pushgateway:9091'] diff --git a/slo/playground/docker-compose.yml b/slo/playground/docker-compose.yml new file mode 100644 index 00000000..b53d4b72 --- /dev/null +++ b/slo/playground/docker-compose.yml @@ -0,0 +1,92 @@ +version: '2.1' + +networks: + monitor-net: + driver: bridge + +services: + prometheus: + image: prom/prometheus:v2.44.0 + container_name: prometheus + user: "$UID:$GID" + volumes: + - ./configs/prometheus:/etc/prometheus + - ../data/prometheus:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=200h' + - '--web.enable-lifecycle' + restart: unless-stopped + ports: + - "9090:9090" + networks: + - monitor-net + + prometheus-pushgateway: + image: prom/pushgateway:v1.6.0 + container_name: prometheus-pushgateway + ports: + - "9091:9091" + networks: + - monitor-net + + grafana: + image: grafana/grafana:9.5.3 + container_name: grafana + user: "$UID:$GID" + volumes: + - ./configs/grafana/provisioning:/etc/grafana/provisioning + - ../data/grafana:/var/lib/grafana + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=passw0rD + restart: unless-stopped + ports: + - "3000:3000" + networks: + - monitor-net + + ydb: + image: cr.yandex/yc/yandex-docker-local-ydb:23.1 + container_name: ydb + # hostname: localhost + environment: + - GRPC_TLS_PORT=2135 + - GRPC_PORT=2136 + - MON_PORT=8765 + - YDB_USE_IN_MEMORY_PDISKS=true + - YDB_DEFAULT_LOG_LEVEL=NOTICE + ports: + - "2135:2135" + - "2136:2136" + - "8765:8765" + volumes: + - ../data/ydb_certs:/ydb_certs + networks: + - monitor-net + + slo-runner: + build: + context: ../.. + dockerfile: slo/src/Dockerfile + command: + - 'create' + - 'http://ydb:2135' + - '/local' + - '--table-name' + - 'slo-dotnet' + - '--min-partitions-count' + - '6' + - '--max-partitions-count' + - '1000' + - '--partition-size' + - '1' + - '--initial-data-count' + - '1000' + networks: + - monitor-net + depends_on: + ydb: + condition: service_healthy + diff --git a/slo/src/Cli/Cli.cs b/slo/src/Cli/Cli.cs new file mode 100644 index 00000000..589a2f84 --- /dev/null +++ b/slo/src/Cli/Cli.cs @@ -0,0 +1,174 @@ +using System.CommandLine; + +namespace slo.Cli; + +internal static class Cli +{ + private static readonly Argument EndpointArgument = new( + "endpoint", + "YDB endpoint to connect to"); + + private static readonly Argument DbArgument = new( + "db", + "YDB database to connect to"); + + private static readonly Option TableOption = new( + new[] { "-t", "--table-name" }, + () => "testingTable", + "table name to create\n "); + + private static readonly Option WriteTimeoutOption = new( + "--write-timeout", + () => 10000, + "write timeout milliseconds"); + + + private static readonly Option MinPartitionsCountOption = new( + "--min-partitions-count", + () => 6, + "minimum amount of partitions in table"); + + private static readonly Option MaxPartitionsCountOption = new( + "--max-partitions-count", + () => 1000, + "maximum amount of partitions in table"); + + private static readonly Option PartitionSizeOption = new( + "--partition-size", + () => 1, + "partition size in mb"); + + private static readonly Option InitialDataCountOption = new( + new[] { "-c", "--initial-data-count" }, + () => 1000, + "amount of initially created rows"); + + + private static readonly Option PromPgwOption = new( + "--prom-pgw", + "minimum amount of partitions in table") { IsRequired = true }; + + private static readonly Option ReportPeriodOption = new( + "--report-period", + () => 250, + "prometheus push period in milliseconds"); + + private static readonly Option ReadRpsOption = new( + "--read-rps", + () => 1000, + "read RPS"); + + private static readonly Option ReadTimeoutOption = new( + "--read-timeout", + () => 10000, + "read timeout milliseconds"); + + private static readonly Option WriteRpsOption = new( + "--write-rps", + () => 100, + "write RPS"); + + private static readonly Option TimeOption = new( + "--time", + () => 600, + "run time in seconds"); + + private static readonly Option ShutdownTimeOption = new( + "--shutdown-time", + () => 30, + "time to wait before force kill workers"); + + private static readonly Option X509CertPathOption = new( + "--cert", + result => + { + const string defaultX590Path = "/ydb-ca.pem"; + if (result.Tokens.Count == 0) + { + return new FileInfo(defaultX590Path); + } + + var filePath = result.Tokens.Single().Value; + if (File.Exists(filePath)) + { + return new FileInfo(filePath); + } + + result.ErrorMessage = "File does not exist"; + return null; + }, + true, + "Path to x509 certificate file" + ); + + private static readonly Command CreateCommand = new( + "create", + "creates table in database") + { + TableOption, + WriteTimeoutOption, + EndpointArgument, + DbArgument, + MinPartitionsCountOption, + MaxPartitionsCountOption, + PartitionSizeOption, + InitialDataCountOption, + X509CertPathOption + }; + + + private static readonly Command CleanupCommand = new( + "cleanup", + "drops table in database") + { + TableOption, + WriteTimeoutOption, + EndpointArgument, + DbArgument, + X509CertPathOption + }; + + private static readonly Command RunCommand = new( + "run", + "runs workload (read and write to table with sets RPS)") + { + TableOption, + WriteTimeoutOption, + EndpointArgument, + DbArgument, + PromPgwOption, + ReportPeriodOption, + ReadRpsOption, + ReadTimeoutOption, + WriteRpsOption, + TimeOption, + ShutdownTimeOption, + X509CertPathOption + }; + + private static readonly RootCommand RootCommand = new("SLO app") + { + CreateCommand, CleanupCommand, RunCommand + }; + + internal static async Task Run(string[] args) + { + CreateCommand.SetHandler( + async createConfig => { await CliCommands.Create(createConfig); }, + new CreateConfigBinder(EndpointArgument, DbArgument, TableOption, MinPartitionsCountOption, + MaxPartitionsCountOption, PartitionSizeOption, InitialDataCountOption, WriteTimeoutOption, + X509CertPathOption) + ); + + CleanupCommand.SetHandler( + async cleanUpConfig => { await CliCommands.CleanUp(cleanUpConfig); }, + new CleanUpConfigBinder(EndpointArgument, DbArgument, TableOption, WriteTimeoutOption, X509CertPathOption) + ); + + RunCommand.SetHandler(async runConfig => { await CliCommands.Run(runConfig); }, + new RunConfigBinder(EndpointArgument, DbArgument, TableOption, InitialDataCountOption, PromPgwOption, + ReportPeriodOption, ReadRpsOption, ReadTimeoutOption, WriteRpsOption, WriteTimeoutOption, TimeOption, + ShutdownTimeOption, X509CertPathOption)); + return await RootCommand.InvokeAsync(args); + } +} \ No newline at end of file diff --git a/slo/src/Cli/CliCommands.cs b/slo/src/Cli/CliCommands.cs new file mode 100644 index 00000000..d5b9297f --- /dev/null +++ b/slo/src/Cli/CliCommands.cs @@ -0,0 +1,94 @@ +using System.Reflection; +using System.Text.RegularExpressions; +using Prometheus; +using slo.Jobs; +using Ydb.Sdk; +using Ydb.Sdk.Table; + +namespace slo.Cli; + +public static class CliCommands +{ + internal static async Task Create(CreateConfig createConfig) + { + Console.WriteLine(createConfig); + var config = new DriverConfig( + createConfig.Endpoint, + createConfig.Db + ); + + await using var driver = await Driver.CreateInitialized(config); + + using var tableClient = new TableClient(driver); + + var executor = new Executor(tableClient); + + var table = new Table(createConfig.TableName, executor); + await table.Init(createConfig.InitialDataCount, createConfig.PartitionSize, createConfig.MinPartitionsCount, + createConfig.MaxPartitionsCount); + } + + internal static async Task CleanUp(CleanUpConfig cleanUpConfig) + { + Console.WriteLine(cleanUpConfig); + var config = new DriverConfig( + cleanUpConfig.Endpoint, + cleanUpConfig.Db + ); + + await using var driver = await Driver.CreateInitialized(config); + + using var tableClient = new TableClient(driver); + + var executor = new Executor(tableClient); + + var table = new Table(cleanUpConfig.TableName, executor); + await table.CleanUp(); + } + + internal static async Task Run(RunConfig runConfig) + { + var config = new DriverConfig( + runConfig.Endpoint, + runConfig.Db + ); + + await using var driver = await Driver.CreateInitialized(config); + + using var tableClient = new TableClient(driver); + + var executor = new Executor(tableClient); + + var table = new Table(runConfig.TableName, executor); + await table.Init(runConfig.InitialDataCount, 1, 6, 1000); + + Console.WriteLine(runConfig.PromPgw); + + using var prometheus = new MetricPusher(endpoint: runConfig.PromPgw, job: "slo"); + + prometheus.Start(); + + var duration = TimeSpan.FromSeconds(runConfig.Time); + + var readJob = new ReadJob(table, new RateLimitedCaller( + runConfig.ReadRps, + duration + )); + + var writeJob = new WriteJob(table, new RateLimitedCaller( + runConfig.WriteRps, + duration + )); + + var readThread = new Thread(readJob.Start); + var writeThread = new Thread(writeJob.Start); + + readThread.Start(); + writeThread.Start(); + + await Task.Delay(duration + TimeSpan.FromSeconds(runConfig.ShutdownTime)); + + readThread.Join(); + writeThread.Join(); + } +} \ No newline at end of file diff --git a/slo/src/Cli/ConfigBinders.cs b/slo/src/Cli/ConfigBinders.cs new file mode 100644 index 00000000..f5624eef --- /dev/null +++ b/slo/src/Cli/ConfigBinders.cs @@ -0,0 +1,135 @@ +using System.CommandLine; +using System.CommandLine.Binding; + +namespace slo.Cli; + +internal class CreateConfigBinder : BinderBase +{ + private readonly Argument _dbArgument; + private readonly Argument _endpointArgument; + private readonly Option _initialDataCountOption; + private readonly Option _maxPartitionsCountOption; + private readonly Option _minPartitionsCountOption; + private readonly Option _partitionSizeOption; + private readonly Option _tableOption; + private readonly Option _writeTimeoutOption; + private readonly Option _x590CertPathOption; + + public CreateConfigBinder(Argument endpointArgument, Argument dbArgument, + Option tableOption, Option minPartitionsCountOption, Option maxPartitionsCountOption, + Option partitionSizeOption, Option initialDataCountOption, Option writeTimeoutOption, + Option x590CertPathOption) + { + _endpointArgument = endpointArgument; + _dbArgument = dbArgument; + _tableOption = tableOption; + _minPartitionsCountOption = minPartitionsCountOption; + _maxPartitionsCountOption = maxPartitionsCountOption; + _partitionSizeOption = partitionSizeOption; + _initialDataCountOption = initialDataCountOption; + _writeTimeoutOption = writeTimeoutOption; + _x590CertPathOption = x590CertPathOption; + } + + protected override CreateConfig GetBoundValue(BindingContext bindingContext) + { + return new CreateConfig( + bindingContext.ParseResult.GetValueForArgument(_endpointArgument), + bindingContext.ParseResult.GetValueForArgument(_dbArgument), + bindingContext.ParseResult.GetValueForOption(_tableOption)!, + bindingContext.ParseResult.GetValueForOption(_minPartitionsCountOption), + bindingContext.ParseResult.GetValueForOption(_maxPartitionsCountOption), + bindingContext.ParseResult.GetValueForOption(_partitionSizeOption), + bindingContext.ParseResult.GetValueForOption(_initialDataCountOption), + bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption), + bindingContext.ParseResult.GetValueForOption(_x590CertPathOption) + ); + } +} + +internal class CleanUpConfigBinder : BinderBase +{ + private readonly Argument _dbArgument; + private readonly Argument _endpointArgument; + private readonly Option _tableOption; + private readonly Option _writeTimeoutOption; + private readonly Option _x590CertPathOption; + + public CleanUpConfigBinder(Argument endpointArgument, Argument dbArgument, + Option tableOption, Option writeTimeoutOption, Option x590CertPathOption) + { + _endpointArgument = endpointArgument; + _dbArgument = dbArgument; + _tableOption = tableOption; + _writeTimeoutOption = writeTimeoutOption; + _x590CertPathOption = x590CertPathOption; + } + + protected override CleanUpConfig GetBoundValue(BindingContext bindingContext) + { + return new CleanUpConfig( + bindingContext.ParseResult.GetValueForArgument(_endpointArgument), + bindingContext.ParseResult.GetValueForArgument(_dbArgument), + bindingContext.ParseResult.GetValueForOption(_tableOption)!, + bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption), + bindingContext.ParseResult.GetValueForOption(_x590CertPathOption) + ); + } +} + +internal class RunConfigBinder : BinderBase +{ + private readonly Argument _dbArgument; + private readonly Argument _endpointArgument; + private readonly Option _initialDataCountOption; + private readonly Option _promPgwOption; + private readonly Option _readRpsOption; + private readonly Option _readTimeoutOption; + private readonly Option _reportPeriodOption; + private readonly Option _shutdownTimeOption; + private readonly Option _tableOption; + private readonly Option _timeOption; + private readonly Option _writeRpsOption; + private readonly Option _writeTimeoutOption; + private readonly Option _x590CertPathOption; + + public RunConfigBinder(Argument endpointArgument, Argument dbArgument, + Option tableOption, Option initialDataCountOption, Option promPgwOption, + Option reportPeriodOption, Option readRpsOption, Option readTimeoutOption, + Option writeRpsOption, Option writeTimeoutOption, Option timeOption, + Option shutdownTimeOption, Option x590CertPathOption) + { + _endpointArgument = endpointArgument; + _dbArgument = dbArgument; + _tableOption = tableOption; + _initialDataCountOption = initialDataCountOption; + _promPgwOption = promPgwOption; + _reportPeriodOption = reportPeriodOption; + _readRpsOption = readRpsOption; + _readTimeoutOption = readTimeoutOption; + _writeRpsOption = writeRpsOption; + _writeTimeoutOption = writeTimeoutOption; + _timeOption = timeOption; + _shutdownTimeOption = shutdownTimeOption; + _x590CertPathOption = x590CertPathOption; + } + + protected override RunConfig GetBoundValue(BindingContext bindingContext) + { + return new RunConfig( + bindingContext.ParseResult.GetValueForArgument(_endpointArgument), + bindingContext.ParseResult.GetValueForArgument(_dbArgument), + bindingContext.ParseResult.GetValueForOption(_tableOption)!, + bindingContext.ParseResult.GetValueForOption(_initialDataCountOption), + bindingContext.ParseResult.GetValueForOption(_promPgwOption)!, + bindingContext.ParseResult.GetValueForOption(_reportPeriodOption), + bindingContext.ParseResult.GetValueForOption(_readRpsOption), + bindingContext.ParseResult.GetValueForOption(_readTimeoutOption), + bindingContext.ParseResult.GetValueForOption(_writeRpsOption), + bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption), + bindingContext.ParseResult.GetValueForOption(_timeOption), + bindingContext.ParseResult.GetValueForOption(_shutdownTimeOption), + bindingContext.ParseResult.GetValueForOption(_x590CertPathOption) + ); + } +} \ No newline at end of file diff --git a/slo/src/Cli/Configs.cs b/slo/src/Cli/Configs.cs new file mode 100644 index 00000000..6126de75 --- /dev/null +++ b/slo/src/Cli/Configs.cs @@ -0,0 +1,10 @@ +namespace slo.Cli; + +internal record CreateConfig(string Endpoint, string Db, string TableName, int MinPartitionsCount, + int MaxPartitionsCount, int PartitionSize, int InitialDataCount, int WriteTimeout, FileInfo? X509CertPath); + +internal record CleanUpConfig(string Endpoint, string Db, string TableName, int WriteTimeout, FileInfo? X509CertPath); + +internal record RunConfig(string Endpoint, string Db, string TableName, int InitialDataCount, string PromPgw, + int ReportPeriod, int ReadRps, int ReadTimeout, int WriteRps, int WriteTimeout, int Time, int ShutdownTime, + FileInfo? X509CertPath); \ No newline at end of file diff --git a/slo/src/DataGenerator.cs b/slo/src/DataGenerator.cs new file mode 100644 index 00000000..745b712a --- /dev/null +++ b/slo/src/DataGenerator.cs @@ -0,0 +1,35 @@ +using Ydb.Sdk.Value; + +namespace slo; + +public static class DataGenerator +{ + private static readonly Random Random = new(); + + public static int MaxId { get; private set; } + + public static async Task LoadMaxId(string tableName, Executor executor) + { + var response = await executor.ExecuteDataQuery(Queries.GetLoadMaxIdQuery(tableName)); + var row = response.Result.ResultSets[0].Rows[0]; + var value = row[0]; + MaxId = (int?)value.GetOptionalUint64() ?? 0; + } + + public static Dictionary GetUpsertData() + { + MaxId++; + return new Dictionary + { + { "$id", YdbValue.MakeUint64((ulong)MaxId) }, + { + "$payload_str", + YdbValue.MakeUtf8(string.Join("", Enumerable + .Repeat(0, Random.Next(20, 40)) + .Select(_ => (char)new Random().Next(127)))) + }, + { "$payload_double", YdbValue.MakeDouble(Random.NextDouble()) }, + { "$payload_timestamp", YdbValue.MakeTimestamp(DateTime.Now) } + }; + } +} \ No newline at end of file diff --git a/slo/src/Dockerfile b/slo/src/Dockerfile new file mode 100644 index 00000000..3b582784 --- /dev/null +++ b/slo/src/Dockerfile @@ -0,0 +1,20 @@ +FROM mcr.microsoft.com/dotnet/sdk:7.0 AS build + +COPY ../ /src +RUN ls /src + + +WORKDIR /src/slo/src +RUN ls +RUN dotnet restore *.sln +RUN dotnet publish *.sln -c release -o /app --no-restore -f net6.0 + +##################### + +FROM mcr.microsoft.com/dotnet/runtime:6.0 AS run + +WORKDIR /app + +COPY --from=build /app ./ + +ENTRYPOINT ["./slo"] diff --git a/slo/src/Executor.cs b/slo/src/Executor.cs new file mode 100644 index 00000000..fb09b2ff --- /dev/null +++ b/slo/src/Executor.cs @@ -0,0 +1,48 @@ +using Prometheus; +using Ydb.Sdk.Table; +using Ydb.Sdk.Value; + +namespace slo; + +public class Executor +{ + private readonly TableClient _tableClient; + + public Executor(TableClient tableClient) + { + _tableClient = tableClient; + } + + public async Task ExecuteSchemeQuery(string query) + { + var response = await _tableClient.SessionExec( + async session => await session.ExecuteSchemeQuery(query)); + response.Status.EnsureSuccess(); + } + + public async Task ExecuteDataQuery(string query, + Dictionary? parameters = null, Histogram? histogram = null) + { + var txControl = TxControl.BeginSerializableRW().Commit(); + + var attempts = 0; + var response = await _tableClient.SessionExec( + async session => + { + attempts++; + return parameters == null + ? await session.ExecuteDataQuery( + query, + txControl) + : await session.ExecuteDataQuery( + query, + txControl, + parameters); + }); + histogram?.WithLabels(response.Status.IsSuccess ? "ok" : "err").Observe(attempts); + + response.Status.EnsureSuccess(); + + return (ExecuteDataQueryResponse)response; + } +} \ No newline at end of file diff --git a/slo/src/Jobs/Job.cs b/slo/src/Jobs/Job.cs new file mode 100644 index 00000000..6eb1ba9b --- /dev/null +++ b/slo/src/Jobs/Job.cs @@ -0,0 +1,91 @@ +using System.Diagnostics; +using Prometheus; + +namespace slo.Jobs; + +public abstract class Job +{ + private readonly Gauge _inFlightGauge; + + private readonly Summary _latencySummary; + private readonly Counter _notOkCounter; + + private readonly Counter _okCounter; + private readonly RateLimitedCaller _rateLimitedCaller; + + protected readonly Histogram AttemptsHistogram; + protected readonly Random Random = new(); + + protected readonly Table Table; + + protected Job(Table table, RateLimitedCaller rateLimitedCaller, string jobName) + { + Table = table; + _rateLimitedCaller = rateLimitedCaller; + + var metricFactory = Metrics.WithLabels(new Dictionary + { + { "jobName", jobName }, + { "sdk", "dotnet" }, + { "sdkVersion", Environment.Version.ToString() } + }); + + _okCounter = metricFactory.CreateCounter("oks", "Count of OK"); + _notOkCounter = metricFactory.CreateCounter("not_oks", "Count of not OK"); + _inFlightGauge = metricFactory.CreateGauge("in_flight", "amount of requests in flight"); + + _latencySummary = metricFactory.CreateSummary( + "latency", + "Latencies (OK)", + new[] { "status" }, + new SummaryConfiguration + { + MaxAge = TimeSpan.FromSeconds(15), + Objectives = new QuantileEpsilonPair[] + { + new(0.5, 0.05), + new(0.99, 0.005), + new(1, 0.0005) + } + } + ); + + AttemptsHistogram = metricFactory.CreateHistogram( + "attempts", + "summary of amount for request", + new[] { "status" }, + new HistogramConfiguration { Buckets = Histogram.LinearBuckets(1, 1, 10) }); + } + + public async void Start() + { + await _rateLimitedCaller.StartCalling(async () => await DoJob(), _inFlightGauge); + } + + private async Task DoJob() + { + var sw = Stopwatch.StartNew(); + _inFlightGauge.Inc(); + try + { + await PerformQuery(); + sw.Stop(); + + _latencySummary.WithLabels("ok").Observe(sw.ElapsedMilliseconds); + _okCounter.Inc(); + _inFlightGauge.Dec(); + } + catch (Exception e) + { + Console.WriteLine(e); + sw.Stop(); + + _latencySummary.WithLabels("err").Observe(sw.ElapsedMilliseconds); + _notOkCounter.Inc(); + _inFlightGauge.Dec(); + throw; + } + } + + protected abstract Task PerformQuery(); +} \ No newline at end of file diff --git a/slo/src/Jobs/ReadJob.cs b/slo/src/Jobs/ReadJob.cs new file mode 100644 index 00000000..c883e847 --- /dev/null +++ b/slo/src/Jobs/ReadJob.cs @@ -0,0 +1,25 @@ +using Ydb.Sdk.Value; + +namespace slo.Jobs; + +internal class ReadJob : Job +{ + public ReadJob(Table table, RateLimitedCaller rateLimitedCaller) : base(table, rateLimitedCaller, "read") + { + } + + + protected override async Task PerformQuery() + { + var parameters = new Dictionary + { + { "$id", YdbValue.MakeUint64((ulong)Random.Next(DataGenerator.MaxId)) } + }; + + await Table.Executor.ExecuteDataQuery( + Queries.GetReadQuery(Table.TableName), + parameters, + AttemptsHistogram + ); + } +} \ No newline at end of file diff --git a/slo/src/Jobs/WriteJob.cs b/slo/src/Jobs/WriteJob.cs new file mode 100644 index 00000000..2225db58 --- /dev/null +++ b/slo/src/Jobs/WriteJob.cs @@ -0,0 +1,20 @@ +namespace slo.Jobs; + +internal class WriteJob : Job +{ + public WriteJob(Table table, RateLimitedCaller rateLimitedCaller) : base(table, rateLimitedCaller, "write") + { + } + + + protected override async Task PerformQuery() + { + var parameters = DataGenerator.GetUpsertData(); + + await Table.Executor.ExecuteDataQuery( + Queries.GetWriteQuery(Table.TableName), + parameters, + AttemptsHistogram + ); + } +} \ No newline at end of file diff --git a/slo/src/Program.cs b/slo/src/Program.cs new file mode 100644 index 00000000..4bbd8e6c --- /dev/null +++ b/slo/src/Program.cs @@ -0,0 +1,4 @@ +using slo; +using slo.Cli; + +return await Cli.Run(args); \ No newline at end of file diff --git a/slo/src/Queries.cs b/slo/src/Queries.cs new file mode 100644 index 00000000..26d71c4c --- /dev/null +++ b/slo/src/Queries.cs @@ -0,0 +1,58 @@ +namespace slo; + +public static class Queries +{ + public static string GetCreateQuery(string tableName, int partitionSize, int minPartitionsCount, + int maxPartitionsCount) + { + return $@" +CREATE TABLE `{tableName}` ( + `hash` UINT64, + `id` UINT64, + `payload_str` UTF8, + `payload_double` DOUBLE, + `payload_timestamp` TIMESTAMP, + `payload_hash` UINT64, + PRIMARY KEY (`hash`, `id`) +) +WITH ( + AUTO_PARTITIONING_BY_SIZE = ENABLED, + AUTO_PARTITIONING_PARTITION_SIZE_MB = {partitionSize}, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = {minPartitionsCount}, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = {maxPartitionsCount} +); +"; + } + + public static string GetDropQuery(string tableName) + { + return $"DROP TABLE `{tableName}`"; + } + + public static string GetLoadMaxIdQuery(string tableName) + { + return $"SELECT MAX(id) as max_id FROM `{tableName}`"; + } + + public static string GetReadQuery(string tableName) + { + return $@" +DECLARE $id AS Uint64; +SELECT id, payload_str, payload_double, payload_timestamp, payload_hash +FROM `{tableName}` +WHERE id = $id AND hash = Digest::NumericHash($id) +"; + } + + public static string GetWriteQuery(string tableName) + { + return $@" +DECLARE $id AS Uint64; +DECLARE $payload_str AS Utf8; +DECLARE $payload_double AS Double; +DECLARE $payload_timestamp AS Timestamp; +INSERT INTO `{tableName}` (id, hash, payload_str, payload_double, payload_timestamp) +VALUES ($id, Digest::NumericHash($id), $payload_str, $payload_double, $payload_timestamp) +"; + } +} \ No newline at end of file diff --git a/slo/src/README.md b/slo/src/README.md new file mode 100644 index 00000000..d4085dd7 --- /dev/null +++ b/slo/src/README.md @@ -0,0 +1,147 @@ +# SLO workload + +SLO is the type of test where app based on ydb-sdk is tested against falling YDB cluster nodes, tablets, network +(that is possible situations for distributed DBs with hundreds of nodes) + +### Usage: + +It has 3 commands: + +- `create` - creates table in database +- `cleanup` - drops table in database +- `run` - runs workload (read and write to table with sets RPS) + +### Run examples with all arguments: + +create: + +`$APP create grpcs://ydb.cool.example.com:2135 /some/folder -t tableName +-min-partitions-count 6 -max-partitions-count 1000 -partition-size 1 -с 1000 +-write-timeout 10000` + +cleanup: + +`$APP cleanup grpcs://ydb.cool.example.com:2135 /some/folder -t tableName` + +run: + +`$APP create run grpcs://ydb.cool.example.com:2135 /some/folder -t tableName +-prom-pgw http://prometheus-pushgateway:9091 -report-period 250 +-read-rps 1000 -read-timeout 10000 +-write-rps 100 -write-timeout 10000 +-time 600 -shutdown-time 30` + +## Arguments for commands: + +### create +`$APP create [options]` + +``` +Arguments: + endpoint YDB endpoint to connect to + db YDB database to connect to + +Options: + -t -table-name table name to create + + -min-partitions-count minimum amount of partitions in table + -max-partitions-count maximum amount of partitions in table + -partition-size partition size in mb + + -c -initial-data-count amount of initially created rows + + -write-timeout write timeout milliseconds +``` + +### cleanup +`$APP cleanup [options]` + +``` +Arguments: + endpoint YDB endpoint to connect to + db YDB database to connect to + +Options: + -t -table-name table name to create + + -write-timeout write timeout milliseconds +``` + +### run +`$APP run [options]` + +``` +Arguments: + endpoint YDB endpoint to connect to + db YDB database to connect to + +Options: + -t -table-name table name to create + + -initial-data-count amount of initially created rows + + -prom-pgw prometheus push gateway + -report-period prometheus push period in milliseconds + + -read-rps read RPS + -read-timeout read timeout milliseconds + + -write-rps write RPS + -write-timeout write timeout milliseconds + + -time run time in seconds + -shutdown-time time to wait before force kill workers +``` + +## Authentication + +Workload using anonymous credentials. + +## What's inside +When running `run` command, the program creates three jobs: `readJob`, `writeJob`, `metricsJob`. + +- `readJob` reads rows from the table one by one with random identifiers generated by writeJob +- `writeJob` generates and inserts rows +- `metricsJob` periodically sends metrics to Prometheus + +Table have these fields: +- `id Uint64` +- `hash Uint64 Digest::NumericHash(id)` +- `payload_str UTF8` +- `payload_double Double` +- `payload_timestamp Timestamp` +- `payload_hash Uint64` + +Primary key: `("hash", "id")` + +## Collected metrics +- `oks` - amount of OK requests +- `not_oks` - amount of not OK requests +- `inflight` - amount of requests in flight +- `latency` - summary of latencies in ms +- `attempts` - summary of amount for request + +> You must reset metrics to keep them `0` in prometheus and grafana before beginning and after ending of jobs + +In `go` it looks like that: +```go +func (m *Metrics) Reset() error { + m.oks.WithLabelValues(JobRead).Set(0) + m.oks.WithLabelValues(JobWrite).Set(0) + + m.notOks.WithLabelValues(JobRead).Set(0) + m.notOks.WithLabelValues(JobWrite).Set(0) + + m.inflight.WithLabelValues(JobRead).Set(0) + m.inflight.WithLabelValues(JobWrite).Set(0) + + m.latencies.Reset() + + m.attempts.Reset() + + return m.Push() +} +``` + +## Look at metrics in grafana +You can get dashboard used in that test [here](https://github.com/ydb-platform/slo-tests/blob/main/k8s/helms/grafana.yaml#L69) - you will need to import json into grafana. \ No newline at end of file diff --git a/slo/src/RateLimitedCaller.cs b/slo/src/RateLimitedCaller.cs new file mode 100644 index 00000000..cb8f373a --- /dev/null +++ b/slo/src/RateLimitedCaller.cs @@ -0,0 +1,47 @@ +using Prometheus; + +namespace slo; + +public class RateLimitedCaller +{ + private readonly TimeSpan _duration; + + private readonly int _rate; + private readonly TokenBucket _tokenBucket; + + public RateLimitedCaller(int rate, TimeSpan duration = default, int bunchCount = 10) + { + _rate = rate; + _duration = duration; + + _tokenBucket = new TokenBucket(rate / bunchCount, (int)(1000.0f / bunchCount)); + } + + public Task StartCalling(Func action, Gauge inFlightGauge) + { + var endTime = DateTime.Now + _duration; + + // var i = 0; + + while (_duration == default || DateTime.Now < endTime) + { + // i++; + while (inFlightGauge.Value > _rate) + Thread.Sleep(1); + while (true) + try + { + _tokenBucket.UseToken(); + // Console.WriteLine($"{DateTime.Now.Second}:{DateTime.Now.Millisecond} i={i}"); + _ = action(); + break; + } + catch (NoTokensAvailableException) + { + Thread.Sleep(1); + } + } + + return Task.CompletedTask; + } +} \ No newline at end of file diff --git a/slo/src/Table.cs b/slo/src/Table.cs new file mode 100644 index 00000000..e45dd92b --- /dev/null +++ b/slo/src/Table.cs @@ -0,0 +1,33 @@ +namespace slo; + +public class Table +{ + public readonly Executor Executor; + public readonly string TableName; + + public Table(string tableName, Executor executor) + { + TableName = tableName; + Executor = executor; + } + + public async Task Init(int initialDataCount, int partitionSize, int minPartitionsCount, int maxPartitionsCount) + { + await Executor.ExecuteSchemeQuery(Queries.GetCreateQuery(TableName, partitionSize, minPartitionsCount, + maxPartitionsCount)); + + await DataGenerator.LoadMaxId(TableName, Executor); + + var tasks = new List { Capacity = initialDataCount }; + + for (var i = 0; i < initialDataCount; i++) + tasks.Add(Executor.ExecuteDataQuery(Queries.GetWriteQuery(TableName), DataGenerator.GetUpsertData())); + + await Task.WhenAll(tasks); + } + + public async Task CleanUp() + { + await Executor.ExecuteSchemeQuery(Queries.GetDropQuery(TableName)); + } +} \ No newline at end of file diff --git a/slo/src/TokenBucket.cs b/slo/src/TokenBucket.cs new file mode 100644 index 00000000..aec680bd --- /dev/null +++ b/slo/src/TokenBucket.cs @@ -0,0 +1,62 @@ +using System.Collections.Concurrent; +using System.Runtime.Serialization; +using System.Timers; +using Timer = System.Timers.Timer; + +namespace slo; + +public record Token; + +[Serializable] +internal class NoTokensAvailableException : Exception +{ + public static NoTokensAvailableException Instance = new(); + + public NoTokensAvailableException() + { + } + + public NoTokensAvailableException(string? message) : base(message) + { + } + + public NoTokensAvailableException(string? message, Exception? innerException) : base(message, innerException) + { + } + + protected NoTokensAvailableException(SerializationInfo info, StreamingContext context) : base(info, context) + { + } +} + +public class TokenBucket +{ + private readonly int _maxTokens; + private readonly BlockingCollection _tokens; + + public TokenBucket(int maxNumberOfTokens, int refillRateMilliseconds) + { + _maxTokens = maxNumberOfTokens; + var timer = new Timer(refillRateMilliseconds); + _tokens = new BlockingCollection(); + + for (var i = 0; i < maxNumberOfTokens; i++) _tokens.Add(new Token()); + + timer.AutoReset = true; + timer.Enabled = true; + timer.Elapsed += OnTimerElapsed; + } + + private void OnTimerElapsed(object? sender, ElapsedEventArgs e) + { + var token = new Token(); + var refill = _maxTokens - _tokens.Count; + for (var i = 0; i < refill; i++) + _tokens.Add(token); + } + + public void UseToken() + { + if (!_tokens.TryTake(out _)) throw NoTokensAvailableException.Instance; + } +} \ No newline at end of file diff --git a/slo/src/slo.csproj b/slo/src/slo.csproj new file mode 100644 index 00000000..2748e03d --- /dev/null +++ b/slo/src/slo.csproj @@ -0,0 +1,19 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + + + + + + diff --git a/slo/src/src.sln b/slo/src/src.sln new file mode 100644 index 00000000..fa807aa4 --- /dev/null +++ b/slo/src/src.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.002.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "slo", "slo.csproj", "{72D7B0FC-A7B4-4640-9494-829881EE363A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {72D7B0FC-A7B4-4640-9494-829881EE363A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {72D7B0FC-A7B4-4640-9494-829881EE363A}.Debug|Any CPU.Build.0 = Debug|Any CPU + {72D7B0FC-A7B4-4640-9494-829881EE363A}.Release|Any CPU.ActiveCfg = Release|Any CPU + {72D7B0FC-A7B4-4640-9494-829881EE363A}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {A41B3ACF-3DD5-4932-B7F3-E33ADE6D062D} + EndGlobalSection +EndGlobal From ec1bc9fd7dcf110b9153437c556e89a9bed03ae8 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Thu, 7 Sep 2023 18:03:22 +0300 Subject: [PATCH 02/46] add retry for `slo create` command --- .github/workflows/slo.yml | 4 ++-- slo/src/Cli/CliCommands.cs | 20 ++++++++++++++++---- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.github/workflows/slo.yml b/.github/workflows/slo.yml index 27909df1..c5c8f4d5 100644 --- a/.github/workflows/slo.yml +++ b/.github/workflows/slo.yml @@ -1,6 +1,6 @@ on: push: - # branches: [main] + branches: [main] pull_request: branches: [main] workflow_dispatch: @@ -29,7 +29,7 @@ jobs: DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} - name: Run SLO - uses: ydb-platform/slo-tests@php-version + uses: ydb-platform/slo-tests@js-version if: env.DOCKER_REPO != null env: DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} diff --git a/slo/src/Cli/CliCommands.cs b/slo/src/Cli/CliCommands.cs index d5b9297f..cb5d5183 100644 --- a/slo/src/Cli/CliCommands.cs +++ b/slo/src/Cli/CliCommands.cs @@ -1,5 +1,3 @@ -using System.Reflection; -using System.Text.RegularExpressions; using Prometheus; using slo.Jobs; using Ydb.Sdk; @@ -24,8 +22,22 @@ internal static async Task Create(CreateConfig createConfig) var executor = new Executor(tableClient); var table = new Table(createConfig.TableName, executor); - await table.Init(createConfig.InitialDataCount, createConfig.PartitionSize, createConfig.MinPartitionsCount, - createConfig.MaxPartitionsCount); + const int maxCreateAttempts = 10; + for (var i = 0; i < maxCreateAttempts; i++) + { + try + { + await table.Init(createConfig.InitialDataCount, createConfig.PartitionSize, + createConfig.MinPartitionsCount, + createConfig.MaxPartitionsCount); + break; + } + catch (Exception e) + { + Console.WriteLine(e); + Thread.Sleep(millisecondsTimeout: 1000); + } + } } internal static async Task CleanUp(CleanUpConfig cleanUpConfig) From 0dfeb66bd63937fd28221025b7d34e53198d9869 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Fri, 8 Sep 2023 17:00:15 +0300 Subject: [PATCH 03/46] try add verbosity --- slo/src/Jobs/Job.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/slo/src/Jobs/Job.cs b/slo/src/Jobs/Job.cs index 6eb1ba9b..2e8657e3 100644 --- a/slo/src/Jobs/Job.cs +++ b/slo/src/Jobs/Job.cs @@ -71,6 +71,7 @@ private async Task DoJob() await PerformQuery(); sw.Stop(); + Console.WriteLine($"job OK {sw.ElapsedMilliseconds}"); _latencySummary.WithLabels("ok").Observe(sw.ElapsedMilliseconds); _okCounter.Inc(); _inFlightGauge.Dec(); @@ -79,6 +80,7 @@ private async Task DoJob() { Console.WriteLine(e); sw.Stop(); + Console.WriteLine($"job ERR {sw.ElapsedMilliseconds}"); _latencySummary.WithLabels("err").Observe(sw.ElapsedMilliseconds); _notOkCounter.Inc(); From 4a0f0165c187c78eff24bd94d7b4103075c2897e Mon Sep 17 00:00:00 2001 From: XmasApple Date: Fri, 8 Sep 2023 18:09:23 +0300 Subject: [PATCH 04/46] try change gh action --- .github/workflows/slo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/slo.yml b/.github/workflows/slo.yml index c5c8f4d5..d2624c9c 100644 --- a/.github/workflows/slo.yml +++ b/.github/workflows/slo.yml @@ -29,7 +29,7 @@ jobs: DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} - name: Run SLO - uses: ydb-platform/slo-tests@js-version + uses: ydb-platform/slo-tests@php-version if: env.DOCKER_REPO != null env: DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} From 4c932b8dea153387c1e9739c3206a43b6e61dc9f Mon Sep 17 00:00:00 2001 From: XmasApple Date: Fri, 8 Sep 2023 18:46:06 +0300 Subject: [PATCH 05/46] change `job` for `MetricPusher` --- slo/src/Cli/CliCommands.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slo/src/Cli/CliCommands.cs b/slo/src/Cli/CliCommands.cs index cb5d5183..e0646b7d 100644 --- a/slo/src/Cli/CliCommands.cs +++ b/slo/src/Cli/CliCommands.cs @@ -76,7 +76,7 @@ internal static async Task Run(RunConfig runConfig) Console.WriteLine(runConfig.PromPgw); - using var prometheus = new MetricPusher(endpoint: runConfig.PromPgw, job: "slo"); + using var prometheus = new MetricPusher(endpoint: runConfig.PromPgw, job: "workload-dotnet"); prometheus.Start(); From ed3171fd48fa66b24bca57e4893d9d9c1da81359 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Fri, 8 Sep 2023 18:58:36 +0300 Subject: [PATCH 06/46] add report period to MetricPusher --- slo/src/Cli/CliCommands.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/slo/src/Cli/CliCommands.cs b/slo/src/Cli/CliCommands.cs index e0646b7d..0eb66328 100644 --- a/slo/src/Cli/CliCommands.cs +++ b/slo/src/Cli/CliCommands.cs @@ -76,7 +76,8 @@ internal static async Task Run(RunConfig runConfig) Console.WriteLine(runConfig.PromPgw); - using var prometheus = new MetricPusher(endpoint: runConfig.PromPgw, job: "workload-dotnet"); + using var prometheus = new MetricPusher(endpoint: runConfig.PromPgw, job: "workload-dotnet", + intervalMilliseconds: runConfig.ReportPeriod); prometheus.Start(); From f455b4a3e32bfa9089f59c9673ce463d02ec537a Mon Sep 17 00:00:00 2001 From: XmasApple Date: Fri, 8 Sep 2023 19:58:39 +0300 Subject: [PATCH 07/46] fix wrong endpoint --- slo/playground/docker-compose.yml | 22 ++++++++++++---------- slo/src/Cli/CliCommands.cs | 2 +- slo/src/Jobs/Job.cs | 2 -- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/slo/playground/docker-compose.yml b/slo/playground/docker-compose.yml index b53d4b72..6318db74 100644 --- a/slo/playground/docker-compose.yml +++ b/slo/playground/docker-compose.yml @@ -71,19 +71,21 @@ services: context: ../.. dockerfile: slo/src/Dockerfile command: - - 'create' - - 'http://ydb:2135' + - 'run' + - 'http://ydb:2136' - '/local' + - '--prom-pgw' + - 'http://prometheus-pushgateway:9091' - '--table-name' - 'slo-dotnet' - - '--min-partitions-count' - - '6' - - '--max-partitions-count' - - '1000' - - '--partition-size' - - '1' - - '--initial-data-count' - - '1000' + # - '--min-partitions-count' + # - '6' + # - '--max-partitions-count' + # - '1000' + # - '--partition-size' + # - '1' + # - '--initial-data-count' + # - '1000' networks: - monitor-net depends_on: diff --git a/slo/src/Cli/CliCommands.cs b/slo/src/Cli/CliCommands.cs index 0eb66328..b05f2d2a 100644 --- a/slo/src/Cli/CliCommands.cs +++ b/slo/src/Cli/CliCommands.cs @@ -76,7 +76,7 @@ internal static async Task Run(RunConfig runConfig) Console.WriteLine(runConfig.PromPgw); - using var prometheus = new MetricPusher(endpoint: runConfig.PromPgw, job: "workload-dotnet", + using var prometheus = new MetricPusher(endpoint: $"{runConfig.PromPgw}/metrics", job: "workload-dotnet", intervalMilliseconds: runConfig.ReportPeriod); prometheus.Start(); diff --git a/slo/src/Jobs/Job.cs b/slo/src/Jobs/Job.cs index 2e8657e3..6eb1ba9b 100644 --- a/slo/src/Jobs/Job.cs +++ b/slo/src/Jobs/Job.cs @@ -71,7 +71,6 @@ private async Task DoJob() await PerformQuery(); sw.Stop(); - Console.WriteLine($"job OK {sw.ElapsedMilliseconds}"); _latencySummary.WithLabels("ok").Observe(sw.ElapsedMilliseconds); _okCounter.Inc(); _inFlightGauge.Dec(); @@ -80,7 +79,6 @@ private async Task DoJob() { Console.WriteLine(e); sw.Stop(); - Console.WriteLine($"job ERR {sw.ElapsedMilliseconds}"); _latencySummary.WithLabels("err").Observe(sw.ElapsedMilliseconds); _notOkCounter.Inc(); From 1b737a3a230febc35fc7edb5a2dfc994523a27f4 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Mon, 11 Sep 2023 18:04:48 +0300 Subject: [PATCH 08/46] Fix prometheus-pushgateway error last push failed --- slo/src/Jobs/Job.cs | 9 +++++---- slo/src/Program.cs | 3 +-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/slo/src/Jobs/Job.cs b/slo/src/Jobs/Job.cs index 6eb1ba9b..44bf50ab 100644 --- a/slo/src/Jobs/Job.cs +++ b/slo/src/Jobs/Job.cs @@ -7,10 +7,11 @@ public abstract class Job { private readonly Gauge _inFlightGauge; + private readonly Gauge _okCounter; + private readonly Gauge _notOkCounter; + private readonly Summary _latencySummary; - private readonly Counter _notOkCounter; - private readonly Counter _okCounter; private readonly RateLimitedCaller _rateLimitedCaller; protected readonly Histogram AttemptsHistogram; @@ -30,8 +31,8 @@ protected Job(Table table, RateLimitedCaller rateLimitedCaller, string jobName) { "sdkVersion", Environment.Version.ToString() } }); - _okCounter = metricFactory.CreateCounter("oks", "Count of OK"); - _notOkCounter = metricFactory.CreateCounter("not_oks", "Count of not OK"); + _okCounter = metricFactory.CreateGauge("oks", "Count of OK"); + _notOkCounter = metricFactory.CreateGauge("not_oks", "Count of not OK"); _inFlightGauge = metricFactory.CreateGauge("in_flight", "amount of requests in flight"); _latencySummary = metricFactory.CreateSummary( diff --git a/slo/src/Program.cs b/slo/src/Program.cs index 4bbd8e6c..9f41679d 100644 --- a/slo/src/Program.cs +++ b/slo/src/Program.cs @@ -1,4 +1,3 @@ -using slo; -using slo.Cli; +using slo.Cli; return await Cli.Run(args); \ No newline at end of file From d8421cf43e69eb9cf70924f3966f3e258bbef7b6 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Mon, 11 Sep 2023 18:18:55 +0300 Subject: [PATCH 09/46] fix run time --- .github/workflows/slo.yml | 3 ++- slo/src/Cli/Cli.cs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/slo.yml b/.github/workflows/slo.yml index d2624c9c..1a8a9e0a 100644 --- a/.github/workflows/slo.yml +++ b/.github/workflows/slo.yml @@ -45,7 +45,8 @@ jobs: s3_endpoint: ${{ secrets.SLO_S3_ENDPOINT }} s3_images_folder: ${{ vars.SLO_S3_IMAGES_FOLDER }} grafana_domain: ${{ vars.SLO_GRAFANA_DOMAIN }} - grafana_dashboard: ${{ vars.SLO_GRAFANA_DASHBOARD }} + # grafana_dashboard: ${{ vars.SLO_GRAFANA_DASHBOARD }} + grafana_dashboard: dca60386-0d3d-43f5-a2af-5f3fd3e3b295 ydb_version: 'newest' timeBetweenPhases: 30 shutdownTime: 30 diff --git a/slo/src/Cli/Cli.cs b/slo/src/Cli/Cli.cs index 589a2f84..15b63bf9 100644 --- a/slo/src/Cli/Cli.cs +++ b/slo/src/Cli/Cli.cs @@ -70,7 +70,7 @@ internal static class Cli private static readonly Option TimeOption = new( "--time", - () => 600, + () => 140, "run time in seconds"); private static readonly Option ShutdownTimeOption = new( From a1d4a5b47eb20160577f457a36c7ef45feaf1900 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Mon, 11 Sep 2023 18:26:02 +0300 Subject: [PATCH 10/46] Update slo.yml --- .github/workflows/slo.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/slo.yml b/.github/workflows/slo.yml index 1a8a9e0a..c5c8f4d5 100644 --- a/.github/workflows/slo.yml +++ b/.github/workflows/slo.yml @@ -29,7 +29,7 @@ jobs: DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} - name: Run SLO - uses: ydb-platform/slo-tests@php-version + uses: ydb-platform/slo-tests@js-version if: env.DOCKER_REPO != null env: DOCKER_REPO: ${{ secrets.SLO_DOCKER_REPO }} @@ -45,8 +45,7 @@ jobs: s3_endpoint: ${{ secrets.SLO_S3_ENDPOINT }} s3_images_folder: ${{ vars.SLO_S3_IMAGES_FOLDER }} grafana_domain: ${{ vars.SLO_GRAFANA_DOMAIN }} - # grafana_dashboard: ${{ vars.SLO_GRAFANA_DASHBOARD }} - grafana_dashboard: dca60386-0d3d-43f5-a2af-5f3fd3e3b295 + grafana_dashboard: ${{ vars.SLO_GRAFANA_DASHBOARD }} ydb_version: 'newest' timeBetweenPhases: 30 shutdownTime: 30 From c1df7ac8334dd8a6f7d206d1ad19dad9fa7d7b37 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Mon, 11 Sep 2023 19:12:07 +0300 Subject: [PATCH 11/46] Remove redundant code --- slo/playground/docker-compose.yml | 9 ------- slo/src/Cli/Cli.cs | 39 +++++-------------------------- slo/src/Cli/ConfigBinders.cs | 22 +++++------------ slo/src/Cli/Configs.cs | 7 +++--- slo/src/RateLimitedCaller.cs | 9 +++---- 5 files changed, 20 insertions(+), 66 deletions(-) diff --git a/slo/playground/docker-compose.yml b/slo/playground/docker-compose.yml index 6318db74..355318d6 100644 --- a/slo/playground/docker-compose.yml +++ b/slo/playground/docker-compose.yml @@ -50,7 +50,6 @@ services: ydb: image: cr.yandex/yc/yandex-docker-local-ydb:23.1 container_name: ydb - # hostname: localhost environment: - GRPC_TLS_PORT=2135 - GRPC_PORT=2136 @@ -78,14 +77,6 @@ services: - 'http://prometheus-pushgateway:9091' - '--table-name' - 'slo-dotnet' - # - '--min-partitions-count' - # - '6' - # - '--max-partitions-count' - # - '1000' - # - '--partition-size' - # - '1' - # - '--initial-data-count' - # - '1000' networks: - monitor-net depends_on: diff --git a/slo/src/Cli/Cli.cs b/slo/src/Cli/Cli.cs index 15b63bf9..247fdd15 100644 --- a/slo/src/Cli/Cli.cs +++ b/slo/src/Cli/Cli.cs @@ -78,29 +78,6 @@ internal static class Cli () => 30, "time to wait before force kill workers"); - private static readonly Option X509CertPathOption = new( - "--cert", - result => - { - const string defaultX590Path = "/ydb-ca.pem"; - if (result.Tokens.Count == 0) - { - return new FileInfo(defaultX590Path); - } - - var filePath = result.Tokens.Single().Value; - if (File.Exists(filePath)) - { - return new FileInfo(filePath); - } - - result.ErrorMessage = "File does not exist"; - return null; - }, - true, - "Path to x509 certificate file" - ); - private static readonly Command CreateCommand = new( "create", "creates table in database") @@ -112,8 +89,7 @@ internal static class Cli MinPartitionsCountOption, MaxPartitionsCountOption, PartitionSizeOption, - InitialDataCountOption, - X509CertPathOption + InitialDataCountOption }; @@ -124,8 +100,7 @@ internal static class Cli TableOption, WriteTimeoutOption, EndpointArgument, - DbArgument, - X509CertPathOption + DbArgument }; private static readonly Command RunCommand = new( @@ -142,8 +117,7 @@ internal static class Cli ReadTimeoutOption, WriteRpsOption, TimeOption, - ShutdownTimeOption, - X509CertPathOption + ShutdownTimeOption }; private static readonly RootCommand RootCommand = new("SLO app") @@ -156,19 +130,18 @@ internal static async Task Run(string[] args) CreateCommand.SetHandler( async createConfig => { await CliCommands.Create(createConfig); }, new CreateConfigBinder(EndpointArgument, DbArgument, TableOption, MinPartitionsCountOption, - MaxPartitionsCountOption, PartitionSizeOption, InitialDataCountOption, WriteTimeoutOption, - X509CertPathOption) + MaxPartitionsCountOption, PartitionSizeOption, InitialDataCountOption, WriteTimeoutOption) ); CleanupCommand.SetHandler( async cleanUpConfig => { await CliCommands.CleanUp(cleanUpConfig); }, - new CleanUpConfigBinder(EndpointArgument, DbArgument, TableOption, WriteTimeoutOption, X509CertPathOption) + new CleanUpConfigBinder(EndpointArgument, DbArgument, TableOption, WriteTimeoutOption) ); RunCommand.SetHandler(async runConfig => { await CliCommands.Run(runConfig); }, new RunConfigBinder(EndpointArgument, DbArgument, TableOption, InitialDataCountOption, PromPgwOption, ReportPeriodOption, ReadRpsOption, ReadTimeoutOption, WriteRpsOption, WriteTimeoutOption, TimeOption, - ShutdownTimeOption, X509CertPathOption)); + ShutdownTimeOption)); return await RootCommand.InvokeAsync(args); } } \ No newline at end of file diff --git a/slo/src/Cli/ConfigBinders.cs b/slo/src/Cli/ConfigBinders.cs index f5624eef..a4b32493 100644 --- a/slo/src/Cli/ConfigBinders.cs +++ b/slo/src/Cli/ConfigBinders.cs @@ -13,12 +13,10 @@ internal class CreateConfigBinder : BinderBase private readonly Option _partitionSizeOption; private readonly Option _tableOption; private readonly Option _writeTimeoutOption; - private readonly Option _x590CertPathOption; public CreateConfigBinder(Argument endpointArgument, Argument dbArgument, Option tableOption, Option minPartitionsCountOption, Option maxPartitionsCountOption, - Option partitionSizeOption, Option initialDataCountOption, Option writeTimeoutOption, - Option x590CertPathOption) + Option partitionSizeOption, Option initialDataCountOption, Option writeTimeoutOption) { _endpointArgument = endpointArgument; _dbArgument = dbArgument; @@ -28,7 +26,6 @@ public CreateConfigBinder(Argument endpointArgument, Argument db _partitionSizeOption = partitionSizeOption; _initialDataCountOption = initialDataCountOption; _writeTimeoutOption = writeTimeoutOption; - _x590CertPathOption = x590CertPathOption; } protected override CreateConfig GetBoundValue(BindingContext bindingContext) @@ -41,8 +38,7 @@ protected override CreateConfig GetBoundValue(BindingContext bindingContext) bindingContext.ParseResult.GetValueForOption(_maxPartitionsCountOption), bindingContext.ParseResult.GetValueForOption(_partitionSizeOption), bindingContext.ParseResult.GetValueForOption(_initialDataCountOption), - bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption), - bindingContext.ParseResult.GetValueForOption(_x590CertPathOption) + bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption) ); } } @@ -53,16 +49,14 @@ internal class CleanUpConfigBinder : BinderBase private readonly Argument _endpointArgument; private readonly Option _tableOption; private readonly Option _writeTimeoutOption; - private readonly Option _x590CertPathOption; public CleanUpConfigBinder(Argument endpointArgument, Argument dbArgument, - Option tableOption, Option writeTimeoutOption, Option x590CertPathOption) + Option tableOption, Option writeTimeoutOption) { _endpointArgument = endpointArgument; _dbArgument = dbArgument; _tableOption = tableOption; _writeTimeoutOption = writeTimeoutOption; - _x590CertPathOption = x590CertPathOption; } protected override CleanUpConfig GetBoundValue(BindingContext bindingContext) @@ -71,8 +65,7 @@ protected override CleanUpConfig GetBoundValue(BindingContext bindingContext) bindingContext.ParseResult.GetValueForArgument(_endpointArgument), bindingContext.ParseResult.GetValueForArgument(_dbArgument), bindingContext.ParseResult.GetValueForOption(_tableOption)!, - bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption), - bindingContext.ParseResult.GetValueForOption(_x590CertPathOption) + bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption) ); } } @@ -91,13 +84,12 @@ internal class RunConfigBinder : BinderBase private readonly Option _timeOption; private readonly Option _writeRpsOption; private readonly Option _writeTimeoutOption; - private readonly Option _x590CertPathOption; public RunConfigBinder(Argument endpointArgument, Argument dbArgument, Option tableOption, Option initialDataCountOption, Option promPgwOption, Option reportPeriodOption, Option readRpsOption, Option readTimeoutOption, Option writeRpsOption, Option writeTimeoutOption, Option timeOption, - Option shutdownTimeOption, Option x590CertPathOption) + Option shutdownTimeOption) { _endpointArgument = endpointArgument; _dbArgument = dbArgument; @@ -111,7 +103,6 @@ public RunConfigBinder(Argument endpointArgument, Argument dbArg _writeTimeoutOption = writeTimeoutOption; _timeOption = timeOption; _shutdownTimeOption = shutdownTimeOption; - _x590CertPathOption = x590CertPathOption; } protected override RunConfig GetBoundValue(BindingContext bindingContext) @@ -128,8 +119,7 @@ protected override RunConfig GetBoundValue(BindingContext bindingContext) bindingContext.ParseResult.GetValueForOption(_writeRpsOption), bindingContext.ParseResult.GetValueForOption(_writeTimeoutOption), bindingContext.ParseResult.GetValueForOption(_timeOption), - bindingContext.ParseResult.GetValueForOption(_shutdownTimeOption), - bindingContext.ParseResult.GetValueForOption(_x590CertPathOption) + bindingContext.ParseResult.GetValueForOption(_shutdownTimeOption) ); } } \ No newline at end of file diff --git a/slo/src/Cli/Configs.cs b/slo/src/Cli/Configs.cs index 6126de75..d5ad8d95 100644 --- a/slo/src/Cli/Configs.cs +++ b/slo/src/Cli/Configs.cs @@ -1,10 +1,9 @@ namespace slo.Cli; internal record CreateConfig(string Endpoint, string Db, string TableName, int MinPartitionsCount, - int MaxPartitionsCount, int PartitionSize, int InitialDataCount, int WriteTimeout, FileInfo? X509CertPath); + int MaxPartitionsCount, int PartitionSize, int InitialDataCount, int WriteTimeout); -internal record CleanUpConfig(string Endpoint, string Db, string TableName, int WriteTimeout, FileInfo? X509CertPath); +internal record CleanUpConfig(string Endpoint, string Db, string TableName, int WriteTimeout); internal record RunConfig(string Endpoint, string Db, string TableName, int InitialDataCount, string PromPgw, - int ReportPeriod, int ReadRps, int ReadTimeout, int WriteRps, int WriteTimeout, int Time, int ShutdownTime, - FileInfo? X509CertPath); \ No newline at end of file + int ReportPeriod, int ReadRps, int ReadTimeout, int WriteRps, int WriteTimeout, int Time, int ShutdownTime); \ No newline at end of file diff --git a/slo/src/RateLimitedCaller.cs b/slo/src/RateLimitedCaller.cs index cb8f373a..a90ee3d7 100644 --- a/slo/src/RateLimitedCaller.cs +++ b/slo/src/RateLimitedCaller.cs @@ -21,18 +21,18 @@ public Task StartCalling(Func action, Gauge inFlightGauge) { var endTime = DateTime.Now + _duration; - // var i = 0; - while (_duration == default || DateTime.Now < endTime) { - // i++; while (inFlightGauge.Value > _rate) + { Thread.Sleep(1); + } + while (true) + { try { _tokenBucket.UseToken(); - // Console.WriteLine($"{DateTime.Now.Second}:{DateTime.Now.Millisecond} i={i}"); _ = action(); break; } @@ -40,6 +40,7 @@ public Task StartCalling(Func action, Gauge inFlightGauge) { Thread.Sleep(1); } + } } return Task.CompletedTask; From ab23f2822753ee650adc30fb3512b1b3103f2f2e Mon Sep 17 00:00:00 2001 From: XmasApple Date: Mon, 11 Sep 2023 21:46:30 +0300 Subject: [PATCH 12/46] Add metrics reset --- slo/src/Cli/CliCommands.cs | 15 +++++++++++---- slo/src/Jobs/Job.cs | 12 ++++++------ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/slo/src/Cli/CliCommands.cs b/slo/src/Cli/CliCommands.cs index b05f2d2a..bdbfa61c 100644 --- a/slo/src/Cli/CliCommands.cs +++ b/slo/src/Cli/CliCommands.cs @@ -60,6 +60,13 @@ internal static async Task CleanUp(CleanUpConfig cleanUpConfig) internal static async Task Run(RunConfig runConfig) { + var promPgwEndpoint = $"{runConfig.PromPgw}/metrics"; + const string job = "workload-dotnet"; + using var httpClient = new HttpClient(); + var deleteUri = $"{promPgwEndpoint}/job/{job}"; + + await httpClient.DeleteAsync(deleteUri); + var config = new DriverConfig( runConfig.Endpoint, runConfig.Db @@ -76,8 +83,7 @@ internal static async Task Run(RunConfig runConfig) Console.WriteLine(runConfig.PromPgw); - using var prometheus = new MetricPusher(endpoint: $"{runConfig.PromPgw}/metrics", job: "workload-dotnet", - intervalMilliseconds: runConfig.ReportPeriod); + using var prometheus = new MetricPusher(promPgwEndpoint, job, intervalMilliseconds: runConfig.ReportPeriod); prometheus.Start(); @@ -98,10 +104,11 @@ internal static async Task Run(RunConfig runConfig) readThread.Start(); writeThread.Start(); - await Task.Delay(duration + TimeSpan.FromSeconds(runConfig.ShutdownTime)); - readThread.Join(); writeThread.Join(); + + await prometheus.StopAsync(); + await httpClient.DeleteAsync(deleteUri); } } \ No newline at end of file diff --git a/slo/src/Jobs/Job.cs b/slo/src/Jobs/Job.cs index 44bf50ab..0c18e658 100644 --- a/slo/src/Jobs/Job.cs +++ b/slo/src/Jobs/Job.cs @@ -7,8 +7,8 @@ public abstract class Job { private readonly Gauge _inFlightGauge; - private readonly Gauge _okCounter; - private readonly Gauge _notOkCounter; + private readonly Gauge _okGauge; + private readonly Gauge _notOkGauge; private readonly Summary _latencySummary; @@ -31,8 +31,8 @@ protected Job(Table table, RateLimitedCaller rateLimitedCaller, string jobName) { "sdkVersion", Environment.Version.ToString() } }); - _okCounter = metricFactory.CreateGauge("oks", "Count of OK"); - _notOkCounter = metricFactory.CreateGauge("not_oks", "Count of not OK"); + _okGauge = metricFactory.CreateGauge("oks", "Count of OK"); + _notOkGauge = metricFactory.CreateGauge("not_oks", "Count of not OK"); _inFlightGauge = metricFactory.CreateGauge("in_flight", "amount of requests in flight"); _latencySummary = metricFactory.CreateSummary( @@ -73,7 +73,7 @@ private async Task DoJob() sw.Stop(); _latencySummary.WithLabels("ok").Observe(sw.ElapsedMilliseconds); - _okCounter.Inc(); + _okGauge.Inc(); _inFlightGauge.Dec(); } catch (Exception e) @@ -82,7 +82,7 @@ private async Task DoJob() sw.Stop(); _latencySummary.WithLabels("err").Observe(sw.ElapsedMilliseconds); - _notOkCounter.Inc(); + _notOkGauge.Inc(); _inFlightGauge.Dec(); throw; } From 863e4326ee44a452ad66f130310175b555047644 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Tue, 12 Sep 2023 18:13:21 +0300 Subject: [PATCH 13/46] refactoring --- slo/src/Cli/Cli.cs | 17 +++++++++-------- slo/src/Cli/CliCommands.cs | 12 ++++++++---- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/slo/src/Cli/Cli.cs b/slo/src/Cli/Cli.cs index 247fdd15..f19dfc63 100644 --- a/slo/src/Cli/Cli.cs +++ b/slo/src/Cli/Cli.cs @@ -82,14 +82,14 @@ internal static class Cli "create", "creates table in database") { - TableOption, - WriteTimeoutOption, EndpointArgument, DbArgument, + TableOption, MinPartitionsCountOption, MaxPartitionsCountOption, PartitionSizeOption, - InitialDataCountOption + InitialDataCountOption, + WriteTimeoutOption }; @@ -97,25 +97,26 @@ internal static class Cli "cleanup", "drops table in database") { - TableOption, - WriteTimeoutOption, EndpointArgument, - DbArgument + DbArgument, + TableOption, + WriteTimeoutOption }; private static readonly Command RunCommand = new( "run", "runs workload (read and write to table with sets RPS)") { - TableOption, - WriteTimeoutOption, EndpointArgument, DbArgument, + TableOption, + InitialDataCountOption, PromPgwOption, ReportPeriodOption, ReadRpsOption, ReadTimeoutOption, WriteRpsOption, + WriteTimeoutOption, TimeOption, ShutdownTimeOption }; diff --git a/slo/src/Cli/CliCommands.cs b/slo/src/Cli/CliCommands.cs index bdbfa61c..57490561 100644 --- a/slo/src/Cli/CliCommands.cs +++ b/slo/src/Cli/CliCommands.cs @@ -62,10 +62,6 @@ internal static async Task Run(RunConfig runConfig) { var promPgwEndpoint = $"{runConfig.PromPgw}/metrics"; const string job = "workload-dotnet"; - using var httpClient = new HttpClient(); - var deleteUri = $"{promPgwEndpoint}/job/{job}"; - - await httpClient.DeleteAsync(deleteUri); var config = new DriverConfig( runConfig.Endpoint, @@ -83,6 +79,7 @@ internal static async Task Run(RunConfig runConfig) Console.WriteLine(runConfig.PromPgw); + await MetricReset(promPgwEndpoint, job); using var prometheus = new MetricPusher(promPgwEndpoint, job, intervalMilliseconds: runConfig.ReportPeriod); prometheus.Start(); @@ -109,6 +106,13 @@ internal static async Task Run(RunConfig runConfig) writeThread.Join(); await prometheus.StopAsync(); + await MetricReset(promPgwEndpoint, job); + } + + private static async Task MetricReset(string promPgwEndpoint, string job) + { + var deleteUri = $"{promPgwEndpoint}/job/{job}"; + using var httpClient = new HttpClient(); await httpClient.DeleteAsync(deleteUri); } } \ No newline at end of file From 7a81e60891eebb89db4d615a7c93fae198faabe5 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Tue, 12 Sep 2023 18:13:55 +0300 Subject: [PATCH 14/46] add create and cleanup to playground --- slo/playground/docker-compose.yml | 45 ++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/slo/playground/docker-compose.yml b/slo/playground/docker-compose.yml index 355318d6..dabdb5ba 100644 --- a/slo/playground/docker-compose.yml +++ b/slo/playground/docker-compose.yml @@ -64,8 +64,32 @@ services: - ../data/ydb_certs:/ydb_certs networks: - monitor-net + + slo-create: + build: + context: ../.. + dockerfile: slo/src/Dockerfile + command: + - 'create' + - 'http://ydb:2136' + - '/local' + - '--table-name' + - 'slo-dotnet' + - '--min-partitions-count' + - '6' + - '--max-partitions-count' + - '1000' + - '--partition-size' + - '1' + - '--initial-data-count' + - '1000' + networks: + - monitor-net + depends_on: + ydb: + condition: service_healthy - slo-runner: + slo-run: build: context: ../.. dockerfile: slo/src/Dockerfile @@ -80,6 +104,21 @@ services: networks: - monitor-net depends_on: - ydb: - condition: service_healthy + slo-create: + condition: service_completed_successfully + slo-cleanup: + build: + context: ../.. + dockerfile: slo/src/Dockerfile + command: + - 'cleanup' + - 'http://ydb:2136' + - '/local' + - '--table-name' + - 'slo-dotnet' + networks: + - monitor-net + depends_on: + slo-run: + condition: service_completed_successfully From 06f42f894e3889994e02603e5b007fe55b65eae8 Mon Sep 17 00:00:00 2001 From: XmasApple Date: Tue, 12 Sep 2023 18:16:41 +0300 Subject: [PATCH 15/46] Update `README.md` --- slo/src/README.md | 111 ++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 64 deletions(-) diff --git a/slo/src/README.md b/slo/src/README.md index d4085dd7..7ce80dbd 100644 --- a/slo/src/README.md +++ b/slo/src/README.md @@ -15,82 +15,76 @@ It has 3 commands: create: -`$APP create grpcs://ydb.cool.example.com:2135 /some/folder -t tableName --min-partitions-count 6 -max-partitions-count 1000 -partition-size 1 -с 1000 --write-timeout 10000` +`slo create grpcs://ydb.cool.example.com:2135 /some/folder -t tableName +--min-partitions-count 6 --max-partitions-count 1000 --partition-size 1 -с 1000 +--write-timeout 10000` cleanup: -`$APP cleanup grpcs://ydb.cool.example.com:2135 /some/folder -t tableName` +`slo cleanup grpcs://ydb.cool.example.com:2135 /some/folder -t tableName` run: -`$APP create run grpcs://ydb.cool.example.com:2135 /some/folder -t tableName --prom-pgw http://prometheus-pushgateway:9091 -report-period 250 --read-rps 1000 -read-timeout 10000 --write-rps 100 -write-timeout 10000 --time 600 -shutdown-time 30` +`slo create run grpcs://ydb.cool.example.com:2135 /some/folder -t tableName +--prom-pgw http://prometheus-pushgateway:9091 -report-period 250 +--read-rps 1000 --read-timeout 10000 +--write-rps 100 --write-timeout 10000 +--time 600 --shutdown-time 30` ## Arguments for commands: ### create -`$APP create [options]` +`slo create [ []] [options]` ``` Arguments: - endpoint YDB endpoint to connect to - db YDB database to connect to + YDB endpoint to connect to + YDB database to connect to Options: - -t -table-name table name to create - - -min-partitions-count minimum amount of partitions in table - -max-partitions-count maximum amount of partitions in table - -partition-size partition size in mb - - -c -initial-data-count amount of initially created rows - - -write-timeout write timeout milliseconds + -t, --table-name table name to create + [default: testingTable] + --min-partitions-count minimum amount of partitions in table [default: 6] + --max-partitions-count maximum amount of partitions in table [default: 1000] + --partition-size partition size in mb [default: 1] + -c, --initial-data-count amount of initially created rows [default: 1000] + --write-timeout write timeout milliseconds [default: 10000] ``` ### cleanup -`$APP cleanup [options]` +`slo cleanup [ []] [options]` ``` Arguments: - endpoint YDB endpoint to connect to - db YDB database to connect to + YDB endpoint to connect to + YDB database to connect to Options: - -t -table-name table name to create - - -write-timeout write timeout milliseconds + -t, --table-name table name to create + [default: testingTable] + --write-timeout write timeout milliseconds [default: 10000] ``` ### run -`$APP run [options]` +`slo run [ []] [options]` ``` Arguments: - endpoint YDB endpoint to connect to - db YDB database to connect to + YDB endpoint to connect to + YDB database to connect to Options: - -t -table-name table name to create - - -initial-data-count amount of initially created rows - - -prom-pgw prometheus push gateway - -report-period prometheus push period in milliseconds - - -read-rps read RPS - -read-timeout read timeout milliseconds - - -write-rps write RPS - -write-timeout write timeout milliseconds - - -time run time in seconds - -shutdown-time time to wait before force kill workers + -t, --table-name table name to create + [default: testingTable] + --prom-pgw (REQUIRED) minimum amount of partitions in table + --report-period prometheus push period in milliseconds [default: 250] + --read-rps read RPS [default: 1000] + --read-timeout read timeout milliseconds [default: 10000] + --write-rps write RPS [default: 100] + --time