Skip to content

Commit

Permalink
chore(tracer): add dataclass for tracer flare RC configs (#8969)
Browse files Browse the repository at this point in the history
## Overview
Adds two dataclasses: `FlarePrepRequest` and `FlareSendRequest` that get
ingested by the `Flare.prep()` and `Flare.send()` methods, ensuring that
we always get a valid RC config when these methods are called.

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [ ] Title is accurate
- [ ] All changes are related to the pull request's stated goal
- [ ] Description motivates each change
- [ ] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [ ] Testing strategy adequately addresses listed risks
- [ ] Change is maintainable (easy to change, telemetry, documentation)
- [ ] Release note makes sense to a user of the library
- [ ] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [ ] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
erikayasuda authored Apr 30, 2024
1 parent 52e3175 commit c46f79b
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 107 deletions.
152 changes: 70 additions & 82 deletions ddtrace/internal/flare.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import binascii
import dataclasses
import io
import json
import logging
Expand All @@ -7,9 +8,7 @@
import pathlib
import shutil
import tarfile
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple

Expand All @@ -29,14 +28,22 @@
log = get_logger(__name__)


@dataclasses.dataclass
class FlareSendRequest:
case_id: str
hostname: str
email: str
source: str = "tracer_python"


class Flare:
def __init__(self, timeout_sec: int = DEFAULT_TIMEOUT_SECONDS, flare_dir: str = TRACER_FLARE_DIRECTORY):
self.original_log_level: int = logging.NOTSET
self.timeout: int = timeout_sec
self.flare_dir: pathlib.Path = pathlib.Path(flare_dir)
self.file_handler: Optional[RotatingFileHandler] = None

def prepare(self, configs: List[dict]):
def prepare(self, log_level: str):
"""
Update configurations to start sending tracer logs to a file
to be sent in a flare later.
Expand All @@ -46,90 +53,71 @@ def prepare(self, configs: List[dict]):
except Exception as e:
log.error("Failed to create %s directory: %s", self.flare_dir, e)
return
for agent_config in configs:
# AGENT_CONFIG is currently being used for multiple purposes
# We only want to prepare for a tracer flare if the config name
# starts with 'flare-log-level'
if not agent_config.get("name", "").startswith("flare-log-level"):
return

# Validate the flare log level
flare_log_level = agent_config.get("config", {}).get("log_level").upper()
flare_log_level_int = logging.getLevelName(flare_log_level)
if type(flare_log_level_int) != int:
raise TypeError("Invalid log level provided: %s", flare_log_level_int)

ddlogger = get_logger("ddtrace")
pid = os.getpid()
flare_file_path = self.flare_dir / f"tracer_python_{pid}.log"
self.original_log_level = ddlogger.level

# Set the logger level to the more verbose between original and flare
# We do this valid_original_level check because if the log level is NOTSET, the value is 0
# which is the minimum value. In this case, we just want to use the flare level, but still
# retain the original state as NOTSET/0
valid_original_level = 100 if self.original_log_level == 0 else self.original_log_level
logger_level = min(valid_original_level, flare_log_level_int)
ddlogger.setLevel(logger_level)
self.file_handler = _add_file_handler(
ddlogger, flare_file_path.__str__(), flare_log_level, TRACER_FLARE_FILE_HANDLER_NAME
)

# Create and add config file
self._generate_config_file(pid)

def send(self, configs: List[Any]):
flare_log_level_int = logging.getLevelName(log_level)
if type(flare_log_level_int) != int:
raise TypeError("Invalid log level provided: %s", log_level)

ddlogger = get_logger("ddtrace")
pid = os.getpid()
flare_file_path = self.flare_dir / f"tracer_python_{pid}.log"
self.original_log_level = ddlogger.level

# Set the logger level to the more verbose between original and flare
# We do this valid_original_level check because if the log level is NOTSET, the value is 0
# which is the minimum value. In this case, we just want to use the flare level, but still
# retain the original state as NOTSET/0
valid_original_level = (
logging.CRITICAL if self.original_log_level == logging.NOTSET else self.original_log_level
)
logger_level = min(valid_original_level, flare_log_level_int)
ddlogger.setLevel(logger_level)
self.file_handler = _add_file_handler(
ddlogger, flare_file_path.__str__(), flare_log_level_int, TRACER_FLARE_FILE_HANDLER_NAME
)

# Create and add config file
self._generate_config_file(pid)

def send(self, flare_send_req: FlareSendRequest):
"""
Revert tracer flare configurations back to original state
before sending the flare.
"""
for agent_task in configs:
# AGENT_TASK is currently being used for multiple purposes
# We only want to generate the tracer flare if the task_type is
# 'tracer_flare'
if type(agent_task) != dict or agent_task.get("task_type") != "tracer_flare":
continue
args = agent_task.get("args", {})

self.revert_configs()

# We only want the flare to be sent once, even if there are
# multiple tracer instances
lock_path = self.flare_dir / TRACER_FLARE_LOCK
if not os.path.exists(lock_path):
try:
open(lock_path, "w").close()
except Exception as e:
log.error("Failed to create %s file", lock_path)
raise e
data = {
"case_id": args.get("case_id"),
"source": "tracer_python",
"hostname": args.get("hostname"),
"email": args.get("user_handle"),
}
try:
client = get_connection(config._trace_agent_url, timeout=self.timeout)
headers, body = self._generate_payload(data)
client.request("POST", TRACER_FLARE_ENDPOINT, body, headers)
response = client.getresponse()
if response.status == 200:
log.info("Successfully sent the flare")
else:
log.error(
"Upload failed with %s status code:(%s) %s",
response.status,
response.reason,
response.read().decode(),
)
except Exception as e:
log.error("Failed to send tracer flare")
raise e
finally:
client.close()
# Clean up files regardless of success/failure
self.clean_up_files()
return
self.revert_configs()

# We only want the flare to be sent once, even if there are
# multiple tracer instances
lock_path = self.flare_dir / TRACER_FLARE_LOCK
if not os.path.exists(lock_path):
try:
open(lock_path, "w").close()
except Exception as e:
log.error("Failed to create %s file", lock_path)
raise e
try:
client = get_connection(config._trace_agent_url, timeout=self.timeout)
headers, body = self._generate_payload(flare_send_req.__dict__)
client.request("POST", TRACER_FLARE_ENDPOINT, body, headers)
response = client.getresponse()
if response.status == 200:
log.info("Successfully sent the flare to Zendesk ticket %s", flare_send_req.case_id)
else:
log.error(
"Tracer flare upload to Zendesk ticket %s failed with %s status code:(%s) %s",
flare_send_req.case_id,
response.status,
response.reason,
response.read().decode(),
)
except Exception as e:
log.error("Failed to send tracer flare to Zendesk ticket %s", flare_send_req.case_id)
raise e
finally:
client.close()
# Clean up files regardless of success/failure
self.clean_up_files()
return

def _generate_config_file(self, pid: int):
config_file = self.flare_dir / f"tracer_config_{pid}.json"
Expand Down
41 changes: 16 additions & 25 deletions tests/internal/test_tracer_flare.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,17 @@
from ddtrace.internal.flare import TRACER_FLARE_DIRECTORY
from ddtrace.internal.flare import TRACER_FLARE_FILE_HANDLER_NAME
from ddtrace.internal.flare import Flare
from ddtrace.internal.flare import FlareSendRequest
from ddtrace.internal.logger import get_logger


DEBUG_LEVEL_INT = logging.DEBUG


class TracerFlareTests(unittest.TestCase):
mock_agent_config = [{"name": "flare-log-level", "config": {"log_level": "DEBUG"}}]
mock_agent_task = [
False,
{
"args": {
"case_id": "1111111",
"hostname": "myhostname",
"user_handle": "[email protected]",
},
"task_type": "tracer_flare",
"uuid": "d53fc8a4-8820-47a2-aa7d-d565582feb81",
},
]
mock_flare_send_request = FlareSendRequest(
case_id="1111111", hostname="myhostname", email="[email protected]"
)

def setUp(self):
self.flare_uuid = uuid.uuid4()
Expand All @@ -57,7 +48,7 @@ def test_single_process_success(self):
"""
ddlogger = get_logger("ddtrace")

self.flare.prepare(self.mock_agent_config)
self.flare.prepare("DEBUG")

file_handler = self._get_handler()
valid_logger_level = self.flare._get_valid_logger_level(DEBUG_LEVEL_INT)
Expand All @@ -70,7 +61,7 @@ def test_single_process_success(self):

# Sends request to testagent
# This just validates the request params
self.flare.send(self.mock_agent_task)
self.flare.send(self.mock_flare_send_request)

def test_single_process_partial_failure(self):
"""
Expand All @@ -83,7 +74,7 @@ def test_single_process_partial_failure(self):
# Mock the partial failure
with mock.patch("json.dump") as mock_json:
mock_json.side_effect = Exception("file issue happened")
self.flare.prepare(self.mock_agent_config)
self.flare.prepare("DEBUG")

file_handler = self._get_handler()
assert file_handler is not None
Expand All @@ -93,7 +84,7 @@ def test_single_process_partial_failure(self):
assert os.path.exists(self.flare_file_path)
assert not os.path.exists(self.config_file_path)

self.flare.send(self.mock_agent_task)
self.flare.send(self.mock_flare_send_request)

def test_multiple_process_success(self):
"""
Expand All @@ -103,10 +94,10 @@ def test_multiple_process_success(self):
num_processes = 3

def handle_agent_config():
self.flare.prepare(self.mock_agent_config)
self.flare.prepare("DEBUG")

def handle_agent_task():
self.flare.send(self.mock_agent_task)
self.flare.send(self.mock_flare_send_request)

# Create multiple processes
for _ in range(num_processes):
Expand Down Expand Up @@ -134,19 +125,19 @@ def test_multiple_process_partial_failure(self):
"""
processes = []

def do_tracer_flare(agent_config, agent_task):
self.flare.prepare(agent_config)
def do_tracer_flare(prep_request, send_request):
self.flare.prepare(prep_request)
# Assert that only one process wrote its file successfully
# We check for 2 files because it will generate a log file and a config file
assert 2 == len(os.listdir(self.flare_dir))
self.flare.send(agent_task)
self.flare.send(send_request)

# Create successful process
p = multiprocessing.Process(target=do_tracer_flare, args=(self.mock_agent_config, self.mock_agent_task))
p = multiprocessing.Process(target=do_tracer_flare, args=("DEBUG", self.mock_flare_send_request))
processes.append(p)
p.start()
# Create failing process
p = multiprocessing.Process(target=do_tracer_flare, args=(None, self.mock_agent_task))
p = multiprocessing.Process(target=do_tracer_flare, args=(None, self.mock_flare_send_request))
processes.append(p)
p.start()
for p in processes:
Expand All @@ -158,7 +149,7 @@ def test_no_app_logs(self):
file, just the tracer logs
"""
app_logger = Logger(name="my-app", level=DEBUG_LEVEL_INT)
self.flare.prepare(self.mock_agent_config)
self.flare.prepare("DEBUG")

app_log_line = "this is an app log"
app_logger.debug(app_log_line)
Expand Down

0 comments on commit c46f79b

Please sign in to comment.