Skip to content

Commit

Permalink
chore(ci): convert flask_sqli benchmark to use Flask test client inst…
Browse files Browse the repository at this point in the history
…ead of gunicorn (#9253)

Follow up from #8902 to also convert the `flask_sqli` benchmark to using
the Flask test client instead of gunicorn.

This cuts the benchmark runtime from 25min to 5min.

We should be testing the same code paths, but we avoid the need to spin
up a subprocess/server and make network requests to it.

We also refactored some common bits from both `flask_simple` and
`flask_sqli` to ensure we are configuring them the same way.

This will look like a performance improvement, but it isn't. It is the
test itself getting faster.

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [ ] Title is accurate
- [ ] All changes are related to the pull request's stated goal
- [ ] Description motivates each change
- [ ] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [ ] Testing strategy adequately addresses listed risks
- [ ] Change is maintainable (easy to change, telemetry, documentation)
- [ ] Release note makes sense to a user of the library
- [ ] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [ ] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
brettlangdon authored May 14, 2024
1 parent ab86515 commit cee0c7b
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 262 deletions.
2 changes: 1 addition & 1 deletion .gitlab/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ variables:
- git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/"
- git clone --branch dd-trace-py https://github.com/DataDog/benchmarking-platform /platform && cd /platform
- ./steps/capture-hardware-software-info.sh
- '([ $SCENARIO = "flask_simple" ] && BP_SCENARIO=$SCENARIO /benchmarking-platform-tools/bp-runner/bp-runner "$REPORTS_DIR/../.gitlab/benchmarks/bp-runner.yml" --debug -t) || ([ $SCENARIO != "flask_simple" ] && ./steps/run-benchmarks.sh)'
- '([[ $SCENARIO =~ ^flask_* ]] && BP_SCENARIO=$SCENARIO /benchmarking-platform-tools/bp-runner/bp-runner "$REPORTS_DIR/../.gitlab/benchmarks/bp-runner.yml" --debug -t) || (! [[ $SCENARIO =~ ^flask_* ]] && ./steps/run-benchmarks.sh)'
- ./steps/analyze-results.sh
- "./steps/upload-results-to-s3.sh || :"
artifacts:
Expand Down
202 changes: 128 additions & 74 deletions benchmarks/bm/flask_utils.py
Original file line number Diff line number Diff line change
@@ -1,81 +1,135 @@
from contextlib import contextmanager
import hashlib
import os
import subprocess

import requests
import tenacity


SERVER_URL = "http://0.0.0.0:8000/"


def _get_response():
HEADERS = {
"User-Agent": "dd-test-scanner-log",
}
r = requests.get(SERVER_URL, headers=HEADERS)
r.raise_for_status()


@tenacity.retry(
wait=tenacity.wait_fixed(1),
stop=tenacity.stop_after_attempt(30),
)
def _wait():
_get_response()


@contextmanager
def server(scenario, custom_post_response):
env = {
"PERF_TRACER_ENABLED": str(scenario.tracer_enabled),
"PERF_PROFILER_ENABLED": str(scenario.profiler_enabled),
"PERF_DEBUGGER_ENABLED": str(scenario.debugger_enabled),
"DD_APPSEC_ENABLED": str(scenario.appsec_enabled),
"DD_IAST_ENABLED": str(scenario.iast_enabled),
"DD_TELEMETRY_METRICS_ENABLED": str(scenario.telemetry_metrics_enabled),
}
# copy over current environ
env.update(os.environ)
cmd = ["gunicorn", "-c", "gunicorn.conf.py"]
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
close_fds=True,
env=env,
import random
import sqlite3

import attr
import bm
from flask import Flask
from flask import Response
from flask import render_template_string
from flask import request

from ddtrace.debugging._probe.model import DEFAULT_CAPTURE_LIMITS
from ddtrace.debugging._probe.model import DEFAULT_SNAPSHOT_PROBE_RATE
from ddtrace.debugging._probe.model import LiteralTemplateSegment
from ddtrace.debugging._probe.model import LogLineProbe


def make_index():
rand_numbers = [random.random() for _ in range(20)]
m = hashlib.md5()
m.update(b"Insecure hash")
rand_numbers.append(m.digest())
return render_template_string(
"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Hello World!</title>
</head>
<body>
<section class="section">
<div class="container">
<h1 class="title">
Hello World
</h1>
<p class="subtitle">
My first website
</p>
<ul>
{% for i in rand_numbers %}
<li>{{ i }}</li>
{% endfor %}
</ul>
</div>
</section>
</body>
</html>
""",
rand_numbers=rand_numbers,
)
# make sure process has been started
assert proc.poll() is None
try:
_wait()
if scenario.post_request:
response = custom_post_response
else:
response = _get_response
yield response
finally:
proc.terminate()
proc.wait()


def post_fork(server, worker):
# Set lower defaults for ensuring profiler collect is run
if os.environ.get("PERF_PROFILER_ENABLED") == "True":


def create_app():
app = Flask(__name__)

con = sqlite3.connect(":memory:", check_same_thread=False)
cur = con.cursor()

@app.route("/")
def index():
return make_index()

@app.route("/post-view", methods=["POST"])
def post_view():
data = request.data
return data, 200

@app.route("/sqli", methods=["POST"])
def sqli():
sql = "SELECT 1 FROM sqlite_master WHERE name = '" + request.form["username"] + "'"
cur.execute(sql)
return Response("OK")

return app


@attr.s()
class FlaskScenarioMixin:
tracer_enabled = bm.var_bool()
profiler_enabled = bm.var_bool()
debugger_enabled = bm.var_bool()
appsec_enabled = bm.var_bool()
iast_enabled = bm.var_bool()
post_request = bm.var_bool()
telemetry_metrics_enabled = bm.var_bool()

def setup(self):
# Setup the environment and enable Datadog features
os.environ.update(
{"DD_PROFILING_ENABLED": "1", "DD_PROFILING_API_TIMEOUT": "0.1", "DD_PROFILING_UPLOAD_INTERVAL": "10"}
{
"DD_APPSEC_ENABLED": str(self.appsec_enabled),
"DD_IAST_ENABLED": str(self.iast_enabled),
"DD_TELEMETRY_METRICS_ENABLED": str(self.telemetry_metrics_enabled),
}
)
# This will not work with gevent workers as the gevent hub has not been
# initialized when this hook is called.
if os.environ.get("PERF_TRACER_ENABLED") == "True":
import ddtrace.bootstrap.sitecustomize # noqa:F401
if self.profiler_enabled:
os.environ.update(
{"DD_PROFILING_ENABLED": "1", "DD_PROFILING_API_TIMEOUT": "0.1", "DD_PROFILING_UPLOAD_INTERVAL": "10"}
)
if not self.tracer_enabled:
import ddtrace.profiling.auto # noqa:F401

if self.tracer_enabled:
import ddtrace.bootstrap.sitecustomize # noqa:F401

if self.debugger_enabled:
from bm.di_utils import BMDebugger

BMDebugger.enable()

def post_worker_init(worker):
# If profiling enabled but not tracer than only run auto script for profiler
if os.environ.get("PERF_PROFILER_ENABLED") == "1" and os.environ.get("PERF_TRACER_ENABLED") == "0":
import ddtrace.profiling.auto # noqa:F401
if os.environ.get("PERF_DEBUGGER_ENABLED") == "True":
from bm.di_utils import BMDebugger
# Probes are added only if the BMDebugger is enabled.
probe_id = "bm-test"
BMDebugger.add_probes(
LogLineProbe(
probe_id=probe_id,
version=0,
tags={},
source_file="scenario.py",
line=23,
template=probe_id,
segments=[LiteralTemplateSegment(probe_id)],
take_snapshot=True,
limits=DEFAULT_CAPTURE_LIMITS,
condition=None,
condition_error_rate=0.0,
rate=DEFAULT_SNAPSHOT_PROBE_RATE,
),
)

BMDebugger.enable()
def create_app(self):
self.setup()
return create_app()
122 changes: 4 additions & 118 deletions benchmarks/flask_simple/scenario.py
Original file line number Diff line number Diff line change
@@ -1,125 +1,11 @@
import hashlib
import os
import random

import bm
import bm.utils as utils
from flask import Flask
from flask import render_template_string
from flask import request

from ddtrace.debugging._probe.model import DEFAULT_CAPTURE_LIMITS
from ddtrace.debugging._probe.model import DEFAULT_SNAPSHOT_PROBE_RATE
from ddtrace.debugging._probe.model import LiteralTemplateSegment
from ddtrace.debugging._probe.model import LogLineProbe


def make_index():
rand_numbers = [random.random() for _ in range(20)]
m = hashlib.md5()
m.update(b"Insecure hash")
rand_numbers.append(m.digest())
return render_template_string(
"""
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Hello World!</title>
</head>
<body>
<section class="section">
<div class="container">
<h1 class="title">
Hello World
</h1>
<p class="subtitle">
My first website
</p>
<ul>
{% for i in rand_numbers %}
<li>{{ i }}</li>
{% endfor %}
</ul>
</div>
</section>
</body>
</html>
""",
rand_numbers=rand_numbers,
)


def create_app():
app = Flask(__name__)

@app.route("/")
def index():
return make_index()

@app.route("/post-view", methods=["POST"])
def post_view():
data = request.data
return data, 200
from bm import utils
from bm.flask_utils import FlaskScenarioMixin

return app


class FlaskSimple(bm.Scenario):
tracer_enabled = bm.var_bool()
profiler_enabled = bm.var_bool()
debugger_enabled = bm.var_bool()
appsec_enabled = bm.var_bool()
iast_enabled = bm.var_bool()
post_request = bm.var_bool()
telemetry_metrics_enabled = bm.var_bool()

class FlaskSimple(FlaskScenarioMixin, bm.Scenario):
def run(self):
# Setup the environment and enable Datadog features
os.environ.update(
{
"DD_APPSEC_ENABLED": str(self.appsec_enabled),
"DD_IAST_ENABLED": str(self.iast_enabled),
"DD_TELEMETRY_METRICS_ENABLED": str(self.telemetry_metrics_enabled),
}
)
if self.profiler_enabled:
os.environ.update(
{"DD_PROFILING_ENABLED": "1", "DD_PROFILING_API_TIMEOUT": "0.1", "DD_PROFILING_UPLOAD_INTERVAL": "10"}
)
if not self.tracer_enabled:
import ddtrace.profiling.auto # noqa:F401

if self.tracer_enabled:
import ddtrace.bootstrap.sitecustomize # noqa:F401

if self.debugger_enabled:
from bm.di_utils import BMDebugger

BMDebugger.enable()

# Probes are added only if the BMDebugger is enabled.
probe_id = "bm-test"
BMDebugger.add_probes(
LogLineProbe(
probe_id=probe_id,
version=0,
tags={},
source_file="scenario.py",
line=23,
template=probe_id,
segments=[LiteralTemplateSegment(probe_id)],
take_snapshot=True,
limits=DEFAULT_CAPTURE_LIMITS,
condition=None,
condition_error_rate=0.0,
rate=DEFAULT_SNAPSHOT_PROBE_RATE,
),
)

# Create the Flask app
app = create_app()
app = self.create_app()

# Setup the request function
if self.post_request:
Expand Down
23 changes: 0 additions & 23 deletions benchmarks/flask_sqli/app.py

This file was deleted.

8 changes: 0 additions & 8 deletions benchmarks/flask_sqli/gunicorn.conf.py

This file was deleted.

2 changes: 0 additions & 2 deletions benchmarks/flask_sqli/requirements_scenario.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1 @@
flask==3.0.0
gunicorn==20.1.0
requests==2.31.0
Loading

0 comments on commit cee0c7b

Please sign in to comment.