# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import abc
import re
import sys
from pathlib
import Path
from threading
import Thread
import sentry_sdk
from mozversioncontrol
import (
InvalidRepoPath,
MissingUpstreamRepo,
MissingVCSTool,
get_repository_object,
)
from mach.telemetry
import is_telemetry_enabled
from mach.util
import get_state_dir
# https://sentry.io/organizations/mozilla/projects/mach/
_SENTRY_DSN = (
"https://5cfe351fb3a24e8d82c751252b48722b@o1069899.ingest.sentry.io/6250014"
)
class ErrorReporter:
@abc.abstractmethod
def report_exception(self, exception):
"""Report the exception to remote error-tracking software."""
class SentryErrorReporter(ErrorReporter):
"""Reports errors using Sentry."""
def report_exception(self, exception):
return sentry_sdk.capture_exception(exception)
class NoopErrorReporter(ErrorReporter):
"""Drops errors instead of reporting them.
This
is useful
in cases where error-reporting
is specifically disabled, such
as
when telemetry hasn
't been allowed.
"""
def report_exception(self, exception):
return None
def register_sentry(argv, settings, topsrcdir: Path):
if not is_telemetry_enabled(settings):
return NoopErrorReporter()
global _is_unmodified_mach_core_thread
_is_unmodified_mach_core_thread = Thread(
target=_is_unmodified_mach_core,
args=[topsrcdir],
daemon=
True,
)
_is_unmodified_mach_core_thread.start()
sentry_sdk.init(
_SENTRY_DSN, before_send=
lambda event, _: _process_event(event, topsrcdir)
)
sentry_sdk.add_breadcrumb(message=
"./mach {}".format(
" ".join(argv)))
return SentryErrorReporter()
def _process_event(sentry_event, topsrcdir: Path):
# Returning nothing causes the event to be dropped:
# https://docs.sentry.io/platforms/python/configuration/filtering/#using-beforesend
repo = _get_repository_object(topsrcdir)
if repo
is None:
# We don't know the repo state, so we don't know if mach files are
# unmodified.
return
base_ref = repo.base_ref_as_hg()
if not base_ref:
# If we don't know which revision this exception is attached to, then it's
# not worth sending
return
_is_unmodified_mach_core_thread.join()
if not _is_unmodified_mach_core_result:
return
for map_fn
in (_settle_mach_module_id, _patch_absolute_paths, _delete_server_name):
sentry_event = map_fn(sentry_event, topsrcdir)
sentry_event[
"release"] =
"hg-rev-{}".format(base_ref)
return sentry_event
def _settle_mach_module_id(sentry_event, _):
# Sentry groups issues according to the stack frames and their associated
# "module" properties. However, one of the modules is being reported
# like "mach.commands.26a828ef5164403eaff4305ab4cb0fab" (with a generated id).
# This function replaces that generated id with the static string "<generated>"
# so that grouping behaves as expected
stacktrace_frames = sentry_event[
"exception"][
"values"][0][
"stacktrace"][
"frames"]
for frame
in stacktrace_frames:
module = frame.get(
"module")
if not module:
continue
module = re.sub(
"mach\\.commands\\.[a-f0-9]{32}",
"mach.commands.", module
)
frame[
"module"] = module
return sentry_event
def _patch_absolute_paths(sentry_event, topsrcdir: Path):
# As discussed here (https://bugzilla.mozilla.org/show_bug.cgi?id=1636251#c28),
# we remove usernames from file names with a best-effort basis. The most likely
# place for usernames to manifest in Sentry information is within absolute paths,
# such as: "/home/mitch/dev/firefox/mach"
# We replace the state_dir, obj_dir, src_dir with "<...>" placeholders.
# Note that we also do a blanket find-and-replace of the user's name with "<user>",
# which may have ill effects if the user's name is, by happenstance, a substring
# of some other value within the Sentry event.
def recursive_patch(value, needle, replacement):
if isinstance(value, list):
return [recursive_patch(v, needle, replacement)
for v
in value]
elif isinstance(value, dict):
for key
in list(value.keys()):
next_value = value.pop(key)
key = needle.sub(replacement, key)
value[key] = recursive_patch(next_value, needle, replacement)
return value
elif isinstance(value, str):
return needle.sub(replacement, value)
else:
return value
for target_path, replacement
in (
(get_state_dir(),
""),
(str(topsrcdir),
""),
(str(Path.home()),
"~"),
):
# Sentry converts "vars" to their "representations". When paths are in local
# variables on Windows, "C:\Users\MozillaUser\Desktop" becomes
# "'C:\\Users\\MozillaUser\\Desktop'". To still catch this case, we "repr"
# the home directory and scrub the beginning and end quotes, then
# find-and-replace on that.
repr_path = repr(target_path)[1:-1]
for target
in (target_path, repr_path):
# Paths in the Sentry event aren't consistent:
# * On *nix, they're mostly forward slashes.
# * On *nix, not all absolute paths start with a leading forward slash.
# * On Windows, they're mostly backslashes.
# * On Windows, `.extra."sys.argv"` uses forward slashes.
# * The Python variables in-scope captured by the Sentry report may be
# inconsistent, even for a single path. For example, on
# Windows, Mach calculates the state_dir as "C:\Users\<user>/.mozbuild".
# Handle the case where not all absolute paths start with a leading
# forward slash: make the initial slash optional in the search string.
if target.startswith(
"/"):
target =
"/?" + target[1:]
# Handle all possible slash variants: our search string should match
# both forward slashes and backslashes. This is done by dynamically
# replacing each "/" and "\" with the regex "[\/\\]" (match both).
slash_regex = re.compile(r
"[\/\\]")
# The regex module parses string backslash escapes before compiling the
# regex, so we need to add more backslashes:
# "[\\/\\\\]" => [\/\\] => match "/" and "\"
target = slash_regex.sub(r
"[\\/\\\\]", target)
# Compile the regex and patch the event.
needle_regex = re.compile(target, re.IGNORECASE)
sentry_event = recursive_patch(sentry_event, needle_regex, replacement)
return sentry_event
def _delete_server_name(sentry_event, _):
sentry_event.pop(
"server_name")
return sentry_event
def _get_repository_object(topsrcdir: Path):
try:
return get_repository_object(str(topsrcdir))
except (InvalidRepoPath, MissingVCSTool)
as e:
print(f
"Warning: {e}", file=sys.stderr)
return None
def _is_unmodified_mach_core(topsrcdir: Path):
"""True if mach is unmodified compared to the public tree.
To avoid submitting Sentry events
for errors caused by user
's
local changes, we attempt to detect
if mach (
or code affecting mach)
has been modified
in the user
's local state:
*
In a revision off of a
"ancestor to central" revision,
or:
*
In the working, uncommitted state.
If "$topsrcdir/mach" and "*.py" haven
't been touched, then we can be
pretty confident that the Mach behaviour that caused the exception
also exists
in the public tree.
"""
global _is_unmodified_mach_core_result
repo = _get_repository_object(topsrcdir)
try:
files = set(repo.get_outgoing_files()) | set(repo.get_changed_files())
_is_unmodified_mach_core_result =
not any(
[file
for file
in files
if file ==
"mach" or file.endswith(
".py")]
)
except MissingUpstreamRepo:
# If we don't know the upstream state, we don't know if the mach files
# have been unmodified.
_is_unmodified_mach_core_result =
False
_is_unmodified_mach_core_result =
None
_is_unmodified_mach_core_thread =
None