# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/.
# some parts of this originally taken from /testing/talos/talos/output.py
"""output raptor test results""" import copy import json import os import warnings from abc import ABCMeta, abstractmethod from collections.abc import Iterable
import filters import six from logger.logger import RaptorLogger from utils import flatten
def set_browser_meta(self, browser_name, browser_version): # sets the browser metadata for the perfherder data
self.browser_name = browser_name
self.browser_version = browser_version
def summarize_supporting_data(self): """
Supporting data was gathered outside of the main raptor test; it will be kept
separate from the main raptor test results. Summarize it appropriately.
supporting_data = { 'type': 'data-type', 'test': 'raptor-test-ran-when-data-was-gathered', 'unit': 'unit that the values are in', 'summarize-values': True/False, 'suite-suffix-type': True/False, 'values': { 'name': value_dict, 'nameN': value_dictN
}
}
More specifically, subtest supporting data will look like this:
We want to treat each value as a 'subtest'; andfor the overall aggregated
test result the summary value is dependent on the unit. An exception is
raised in case we don't know about the specified unit. """ if self.supporting_data isNone: return
# suite name will be name of the actual raptor test that ran, plus the type of # supporting data i.e. 'raptor-speedometer-geckoview-power'
vals = []
subtests = []
for result in self.results: if result["name"] == data_set["test"]:
suite["extraOptions"] = result["extra_options"] break
support_data_by_type[data_type]["suites"].append(suite) for measurement_name, value_info in data_set["values"].items(): # Subtests are expected to be specified in a dictionary, this # provides backwards compatibility with the old method
value = value_info ifnot isinstance(value_info, dict):
value = {"values": value_info}
new_subtest = {} if value.get("subtest-prefix-type", True):
new_subtest["name"] = data_type + "-" + measurement_name else:
new_subtest["name"] = measurement_name
if len(subtests) >= 1 and data_set.get("summarize-values", True):
suite["value"] = self.construct_summary(
vals, testname="supporting_data", unit=data_set["unit"]
)
# split the supporting data by type, there will be one # perfherder output per type for data_type in support_data_by_type:
data = support_data_by_type[data_type] if self.browser_name:
data["application"] = {"name": self.browser_name} if self.browser_version:
data["application"]["version"] = self.browser_version
self.summarized_supporting_data.append(data)
return
def output(self, test_names): """output to file and perfherder data json""" if os.getenv("MOZ_UPLOAD_DIR"): # i.e. testing/mozharness/build/raptor.json locally; in production it will # be at /tasks/task_*/build/ (where it will be picked up by mozharness later # and made into a tc artifact accessible in treeherder as perfherder-data.json)
results_path = os.path.join(
os.path.dirname(os.environ["MOZ_UPLOAD_DIR"]), "raptor.json"
)
screenshot_path = os.path.join(
os.path.dirname(os.environ["MOZ_UPLOAD_DIR"]), "screenshots.html"
) else:
results_path = os.path.join(os.getcwd(), "raptor.json")
screenshot_path = os.path.join(os.getcwd(), "screenshots.html")
success = True if self.summarized_results == {}:
success = False
LOG.error( "no summarized raptor results found for any of %s"
% ", ".join(test_names)
) else: for suite in self.summarized_results["suites"]:
gecko_profiling_enabled = "gecko-profile"in suite.get( "extraOptions", []
) if gecko_profiling_enabled:
LOG.info("gecko profiling enabled")
suite["shouldAlert"] = False
# as we do navigation, tname could end in .<alias> # test_names doesn't have tname, so either add it to test_names, # or strip it
tname = suite["name"]
parts = tname.split(".") try:
tname = ".".join(parts[:-1]) except Exception as e:
LOG.info("no alias found on test, ignoring: %s" % e) pass
# Since test names might have been modified, check if # part of the test name exists in the test_names list entries
found = False for test in test_names: if tname in test:
found = True break ifnot found:
success = False
LOG.error("no summarized raptor results found for %s" % (tname))
with open(results_path, "w") as f: for result in self.summarized_results:
f.write("%s\n" % result)
if len(self.summarized_screenshots) > 0: with open(screenshot_path, "w") as f: for result in self.summarized_screenshots:
f.write("%s\n" % result)
LOG.info("screen captures can be found locally at: %s" % screenshot_path)
# now that we've checked for screen captures too, if there were no actual # test results we can bail out here if self.summarized_results == {}: return success, 0
test_type = self.summarized_results["suites"][0].get("type", "")
output_perf_data = True
not_posting = "- not posting regular test results for perfherder" if test_type == "scenario": # if a resource-usage flag was supplied the perfherder data # will still be output from output_supporting_data
LOG.info("scenario test type was run %s" % not_posting)
output_perf_data = False
if self.browser_name:
self.summarized_results["application"] = {"name": self.browser_name} if self.browser_version:
self.summarized_results["application"]["version"] = self.browser_version
total_perfdata = 0 if output_perf_data: # if we have supporting data i.e. power, we ONLY want those measurements # dumped out. TODO: Bug 1515406 - Add option to output both supplementary # data (i.e. power) and the regular Raptor test result # Both are already available as separate PERFHERDER_DATA json blobs if len(self.summarized_supporting_data) == 0:
LOG.info("PERFHERDER_DATA: %s" % json.dumps(self.summarized_results))
total_perfdata = 1 else:
LOG.info( "supporting data measurements exist - only posting those to perfherder"
)
json.dump(
self.summarized_results, open(results_path, "w"), indent=2, sort_keys=True
)
LOG.info("results can also be found locally at: %s" % results_path)
return success, total_perfdata
def output_supporting_data(self, test_names): """
Supporting data was gathered outside of the main raptor test; it has already
been summarized, now output it appropriately.
We want to output supporting data in a completely separate perfherder json blob and in a corresponding file artifact. This way, supporting data can be ingested as its own
test suite in perfherder and alerted upon if desired; kept outside of the test results from the actual Raptor test which was run when the supporting data was gathered. """ if len(self.summarized_supporting_data) == 0:
LOG.error( "no summarized supporting data found for %s" % ", ".join(test_names)
) returnFalse, 0
total_perfdata = 0 for next_data_set in self.summarized_supporting_data:
data_type = next_data_set["suites"][0]["type"]
if os.environ["MOZ_UPLOAD_DIR"]: # i.e. testing/mozharness/build/raptor.json locally; in production it will # be at /tasks/task_*/build/ (where it will be picked up by mozharness later # and made into a tc artifact accessible in treeherder as perfherder-data.json)
results_path = os.path.join(
os.path.dirname(os.environ["MOZ_UPLOAD_DIR"]), "raptor-%s.json" % data_type,
) else:
results_path = os.path.join(os.getcwd(), "raptor-%s.json" % data_type)
# dump data to raptor-data.json artifact
json.dump(next_data_set, open(results_path, "w"), indent=2, sort_keys=True)
# the output that treeherder expects to find
LOG.info("PERFHERDER_DATA: %s" % json.dumps(next_data_set))
LOG.info( "%s results can also be found locally at: %s"
% (data_type, results_path)
)
total_perfdata += 1
returnTrue, total_perfdata
def construct_summary(self, vals, testname, unit=None): def _filter(vals, value=None): if value isNone: return [i for i, j in vals] return [i for i, j in vals if j == value]
if testname.startswith("raptor-v8_7"): return 100 * filters.geometric_mean(_filter(vals))
if testname == "speedometer3":
score = None for val, name in vals: if name == "score":
score = val if score isNone: raise Exception("Unable to find score for Speedometer 3") return score
if"speedometer"in testname:
correctionFactor = 3
results = _filter(vals) # speedometer has 16 tests, each of these are made of up 9 subtests # and a sum of the 9 values. We receive 160 values, and want to use # the 16 test values, not the sub test values. if len(results) != 160: raise Exception( "Speedometer has 160 subtests, found: %s instead" % len(results)
)
if testname.startswith("supporting_data"): ifnot unit: return sum(_filter(vals))
if unit == "%": return filters.mean(_filter(vals))
if unit in ("W", "MHz"): # For power in Watts and clock frequencies, # summarize with the sum of the averages
allavgs = [] for val, subtest in vals: if"avg"in subtest:
allavgs.append(val) if allavgs: return sum(allavgs)
raise Exception( "No average measurements found for supporting data with W, or MHz unit ."
)
if unit in ["KB", "mAh", "mWh"]: return sum(_filter(vals))
raise NotImplementedError("Unit %s not suported" % unit)
if len(vals) > 1: # pylint: disable=W1633 return round(filters.geometric_mean(_filter(vals)), 2)
def parseUnknown(self, test): # Attempt to flatten whatever we've been given # Dictionary keys will be joined by dashes, arrays represent # represent "iterations"
_subtests = {}
ifnot isinstance(test["measurements"], dict): raise Exception( "Expected a dictionary with a single entry as the name of the test. " "The value of this key should be the data."
) if test.get("custom_data", False): # If custom_data is true it means that the data was already flattened # and the test name is included in the keys (the test might have # also removed it if it's in the subtest_name_filters option). Handle this # exception by wrapping it
test["measurements"] = {test["name"]: [test["measurements"]]}
for iteration in test["measurements"][list(test["measurements"].keys())[0]]:
flattened_metrics = None ifnot test.get("custom_data", False):
flattened_metrics = flatten(iteration, ())
for metric, value in (flattened_metrics or iteration).items(): if metric in METRIC_BLOCKLIST: # TODO: Add an option in the test manifest for this continue if metric notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[metric] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": metric, "replicates": [],
}
updated_metric = value ifnot isinstance(value, Iterable):
updated_metric = [value] # pylint: disable=W1633
_subtests[metric]["replicates"].extend(
[round(x, 3) for x in updated_metric]
)
def parseSpeedometerOutput(self, test): # each benchmark 'index' becomes a subtest; each pagecycle / iteration # of the test has multiple values per index/subtest
# this is the format we receive the results in from the benchmark # i.e. this is ONE pagecycle of speedometer:
_subtests = {}
data = test["measurements"]["speedometer"] for page_cycle in data: for sub, replicates in page_cycle[0].items(): # for each pagecycle, build a list of subtests and append all related replicates if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "shouldAlert": True, "replicates": [],
} # pylint: disable=W1633
_subtests[sub]["replicates"].extend([round(x, 3) for x in replicates])
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.median(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
return subtests, vals
def parseAresSixOutput(self, test): """ https://browserbench.org/ARES-6/
Every pagecycle will perform the tests from the index page
We have 4 main tests per index page:
- Air, Basic, Babylon, ML
- andfrom these 4 above, ares6 generates the Overall results
Each test has 3 subtests (firstIteration, steadyState, averageWorstCase):
- _steadyState
- _firstIteration
- _averageWorstCase
Each index page will run 5 cycles, this is set in glue.js
On GeckoView's /ARES6/index.html this is what we see for Air - First Iteration:
- on 1st test cycle : 660.80 (rounded from 660.8000000000002)
- on 2nd test cycle : 643.63 , this is coming from
(660.8000000000002 + 626.4599999999999) / 2 ,
then rounded up to a precision of 2 decimals
- on 3rd test cycle : 647.63 this is coming from
(660.8000000000002 + 626.4599999999999 + 655.6199999999999) / 3 ,
then rounded up to a precision of 2 decimals
- and so on """
_subtests = {}
data = test["measurements"]["ares6"]
for page_cycle in data: for sub, replicates in page_cycle[0].items(): # for each pagecycle, build a list of subtests and append all related replicates if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
} # pylint: disable=W1633
_subtests[sub]["replicates"].extend(
[float(round(x, 3)) for x in replicates]
)
vals = [] for name, test in _subtests.items():
test["value"] = filters.mean(test["replicates"])
vals.append([test["value"], name])
_subtests = {}
data = test["measurements"]["motionmark"] for page_cycle in data:
page_cycle_results = page_cycle[0]
# TODO: this assumes a single suite is run
suite = list(page_cycle_results)[0] for sub in page_cycle_results[suite].keys(): try: # pylint: disable=W1633
replicate = round(
float(
page_cycle_results[suite][sub]["complexity"]["bootstrap"][ "median"
] if"ramp"in test["name"] else page_cycle_results[suite][sub]["frameLength"][ "average"
]
),
3,
) except TypeError as e:
LOG.warning( "[{}][{}] : {} - {}".format(suite, sub, e.__class__.__name__, e)
)
if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
}
_subtests[sub]["replicates"].extend([replicate])
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.median(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
return subtests, vals
def parseYoutubePlaybackPerformanceOutput(self, test): """Parse the metrics for the Youtube playback performance test.
For each video measured values for dropped and decoded frames will be
available from the benchmark site.
With each page cycle / iteration of the test multiple values can be present.
Raptor will calculate the percentage of dropped frames to decoded frames.
All those three values will then be emitted as separate sub tests. """
_subtests = {}
test_name = [
measurement for measurement in test["measurements"].keys() if"youtube-playback"in measurement
] if len(test_name) > 0:
data = test["measurements"].get(test_name[0]) else: raise Exception("No measurements found for youtube test!")
def create_subtest_entry(
name,
value,
unit=test["subtest_unit"],
lower_is_better=test["subtest_lower_is_better"],
): # build a list of subtests and append all related replicates if name notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[name] = { "name": name, "unit": unit, "lowerIsBetter": lower_is_better, "replicates": [],
}
_subtests[name]["replicates"].append(value) if self.subtest_alert_on isnotNone: if name in self.subtest_alert_on:
LOG.info( "turning on subtest alerting for measurement type: %s" % name
)
_subtests[name]["shouldAlert"] = True
failed_tests = [] for pagecycle in data: for _sub, _value in six.iteritems(pagecycle[0]): if _value["decodedFrames"] == 0:
failed_tests.append( "%s test Failed. decodedFrames %s droppedFrames %s."
% (_sub, _value["decodedFrames"], _value["droppedFrames"])
)
try:
percent_dropped = (
float(_value["droppedFrames"]) / _value["decodedFrames"] * 100.0
) except ZeroDivisionError: # if no frames have been decoded the playback failed completely
percent_dropped = 100.0
# Remove the not needed "PlaybackPerf." prefix from each test
_sub = _sub.split("PlaybackPerf", 1)[-1] if _sub.startswith("."):
_sub = _sub[1:]
# build a list of subtests and append all related replicates
create_subtest_entry( "{}_decoded_frames".format(_sub),
_value["decodedFrames"],
lower_is_better=False,
)
create_subtest_entry( "{}_dropped_frames".format(_sub), _value["droppedFrames"]
)
create_subtest_entry( "{}_%_dropped_frames".format(_sub), percent_dropped
)
# Check if any youtube test failed and generate exception if len(failed_tests) > 0:
[LOG.warning("Youtube sub-test FAILED: %s" % test) for test in failed_tests] # TODO: Change this to raise Exception after we figure out the failing tests
LOG.warning( "Youtube playback sub-tests failed!!! " "Not submitting results to perfherder!"
)
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names: # pylint: disable=W1633
_subtests[name]["value"] = round(
float(filters.median(_subtests[name]["replicates"])), 2
)
subtests.append(_subtests[name]) # only include dropped_frames values, without the %_dropped_frames values if name.endswith("X_dropped_frames"):
vals.append([_subtests[name]["value"], name])
return subtests, vals
def parseUnityWebGLOutput(self, test): """
Example output (this is one page cycle):
{'name': 'raptor-unity-webgl-firefox', 'type': 'benchmark', 'measurements': { 'unity-webgl': [
[ '[{"benchmark":"Mandelbrot GPU","result":1035361},...}]'
]
]
}, 'lower_is_better': False, 'unit': 'score'
} """
_subtests = {}
data = test["measurements"]["unity-webgl"] for page_cycle in data:
data = json.loads(page_cycle[0]) for item in data: # for each pagecycle, build a list of subtests and append all related replicates
sub = item["benchmark"] if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
}
_subtests[sub]["replicates"].append(item["result"])
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.median(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
return subtests, vals
def parseWebaudioOutput(self, test): # each benchmark 'index' becomes a subtest; each pagecycle / iteration # of the test has multiple values per index/subtest
# this is the format we receive the results in from the benchmark # i.e. this is ONE pagecycle of speedometer:
# {u'name': u'raptor-webaudio-firefox', u'type': u'benchmark', u'measurements': # {u'webaudio': [[u'[{"name":"Empty testcase","duration":26,"buffer":{}},{"name" # :"Simple gain test without resampling","duration":66,"buffer":{}},{"name":"Simple # gain test without resampling (Stereo)","duration":71,"buffer":{}},{"name":"Simple # gain test without resampling (Stereo and positional)","duration":67,"buffer":{}}, # {"name":"Simple gain test","duration":41,"buffer":{}},{"name":"Simple gain test # (Stereo)","duration":59,"buffer":{}},{"name":"Simple gain test (Stereo and positional)", # "duration":68,"buffer":{}},{"name":"Upmix without resampling (Mono -> Stereo)", # "duration":53,"buffer":{}},{"name":"Downmix without resampling (Mono -> Stereo)", # "duration":44,"buffer":{}},{"name":"Simple mixing (same buffer)", # "duration":288,"buffer":{}}
_subtests = {}
data = test["measurements"]["webaudio"] for page_cycle in data:
data = json.loads(page_cycle[0]) for item in data: # for each pagecycle, build a list of subtests and append all related replicates
sub = item["name"]
replicates = [item["duration"]] if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
} # pylint: disable=W1633
_subtests[sub]["replicates"].extend(
[float(round(x, 3)) for x in replicates]
)
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.median(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
print(subtests) return subtests, vals
def parseWASMGodotOutput(self, test): """
{u'wasm-godot': [
{ "name": "wasm-instantiate", "time": 349
},{ "name": "engine-instantiate", "time": 1263
...
}]} """
_subtests = {}
data = test["measurements"]["wasm-godot"]
print(data) for page_cycle in data: for item in page_cycle[0]: # for each pagecycle, build a list of subtests and append all related replicates
sub = item["name"] if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
}
_subtests[sub]["replicates"].append(item["time"])
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.median(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
return subtests, vals
def parseSunspiderOutput(self, test):
_subtests = {}
data = test["measurements"]["sunspider"] for page_cycle in data: for sub, replicates in page_cycle[0].items(): # for each pagecycle, build a list of subtests and append all related replicates if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
} # pylint: disable=W1633
_subtests[sub]["replicates"].extend(
[float(round(x, 3)) for x in replicates]
)
subtests = []
vals = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.mean(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
return subtests, vals
def parseAssortedDomOutput(self, test): # each benchmark 'index' becomes a subtest; each pagecycle / iteration # of the test has multiple values
# this is the format we receive the results in from the benchmark # i.e. this is ONE pagecycle of assorted-dom ('test' is a valid subtest name btw):
# the 'total' is provided for us from the benchmark; the overall score will be the mean of # the totals from all pagecycles; but keep all the subtest values for the logs/json
_subtests = {}
data = test["measurements"]["assorted-dom"] for pagecycle in data: for _sub, _value in pagecycle[0].items(): # build a list of subtests and append all related replicates if _sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[_sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": _sub, "replicates": [],
}
_subtests[_sub]["replicates"].extend([_value])
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names: # pylint: disable=W1633
_subtests[name]["value"] = float(
round(filters.median(_subtests[name]["replicates"]), 2)
)
subtests.append(_subtests[name]) # only use the 'total's to compute the overall result if name == "total":
vals.append([_subtests[name]["value"], name])
_subtests = {}
data = test["measurements"]["jetstream2"] for page_cycle in data: for sub, replicates in page_cycle[0].items(): # for each pagecycle, build a list of subtests and append all related replicates if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
} # pylint: disable=W1633
_subtests[sub]["replicates"].extend(
[float(round(x, 3)) for x in replicates]
)
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.mean(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
return subtests, vals
def parseWASMMiscOutput(self, test): """
{u'wasm-misc': [
[[{u'name': u'validate', u'time': 163.44000000000005},
...
{u'name': u'__total__', u'time': 63308.434904788155}]],
...
[[{u'name': u'validate', u'time': 129.42000000000002},
{u'name': u'__total__', u'time': 63181.24089257814}]]
]} """
_subtests = {}
data = test["measurements"]["wasm-misc"] for page_cycle in data: for item in page_cycle[0]: # for each pagecycle, build a list of subtests and append all related replicates
sub = item["name"] if sub notin _subtests: # subtest not added yet, first pagecycle, so add new one
_subtests[sub] = { "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
}
_subtests[sub]["replicates"].append(item["time"])
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.median(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
_subtests = {}
data = test["measurements"]["matrix-react-bench"] for page_cycle in data: # Each cycle is formatted like `[[iterations, val], [iterations, val2], ...]` for iteration, val in page_cycle:
sub = f"{iteration}-iterations"
_subtests.setdefault(
sub,
{ "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": sub, "replicates": [],
},
)
# The values produced are far too large for perfherder
_subtests[sub]["replicates"].append(val)
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.mean(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
for metric_name, data in test["measurements"].items(): if"perfstat-"notin metric_name and metric_name != "twitch-animation": # Only keep perfstats or the run metric continue if metric_name == "twitch-animation":
metric = "run" else:
metric = metric_name
# data is just an array with a single number for polymorphic_page_cycle in data: # Each benchmark cycle is formatted like `[val]`, perfstats # are not ifnot isinstance(polymorphic_page_cycle, list):
page_cycle = [polymorphic_page_cycle] else:
page_cycle = polymorphic_page_cycle for val in page_cycle:
_subtests.setdefault(
metric,
{ "unit": test["subtest_unit"], "alertThreshold": float(test["alert_threshold"]), "lowerIsBetter": test["subtest_lower_is_better"], "name": metric, "replicates": [],
},
)
# The values produced are far too large for perfherder
_subtests[metric]["replicates"].append(val)
vals = []
subtests = []
names = list(_subtests)
names.sort(reverse=True) for name in names:
_subtests[name]["value"] = filters.mean(_subtests[name]["replicates"])
subtests.append(_subtests[name])
vals.append([_subtests[name]["value"], name])
return subtests, vals
class RaptorOutput(PerftestOutput): """class for raptor output"""
# check if we actually have any results if len(self.results) == 0:
LOG.error("no raptor test results found for %s" % ", ".join(test_names)) return
for test in self.results:
vals = []
subtests = []
suite = { "name": test["name"], "type": test["type"], "tags": test.get("tags", []), "extraOptions": test["extra_options"], "subtests": subtests, "lowerIsBetter": test["lower_is_better"], "unit": test["unit"], "alertThreshold": float(test["alert_threshold"]),
}
# Check if optional properties have been set by the test if hasattr(test, "alert_change_type"):
suite["alertChangeType"] = test["alert_change_type"]
# if cold load add that info to the suite result dict; this will be used later # when combining the results from multiple browser cycles into one overall result if test["cold"] isTrue:
suite["cold"] = True
suite["browser_cycle"] = int(test["browser_cycle"])
suite["expected_browser_cycles"] = int(test["expected_browser_cycles"])
suite["tags"].append("cold") else:
suite["tags"].append("warm")
suites.append(suite)
# process results for pageloader type of tests if test["type"] in ("pageload", "scenario"): # each test can report multiple measurements per pageload # each measurement becomes a subtest inside the 'suite'
# this is the format we receive the results in from the pageload test # i.e. one test (subtest) in raptor-firefox-tp6:
if test["cold"] isFalse: # for warm page-load, ignore first value due to 1st pageload noise
LOG.info( "ignoring the first %s value due to initial pageload noise"
% measurement_name
)
filtered_values = filters.ignore_first(
new_subtest["replicates"], 1
) else: # for cold-load we want all the values
filtered_values = new_subtest["replicates"]
# for pageload tests that measure TTFI: TTFI is not guaranteed to be available # everytime; the raptor measure.js webext will substitute a '-1' value in the # cases where TTFI is not available, which is acceptable; however we don't want # to include those '-1' TTFI values in our final results calculations if measurement_name == "ttfi":
filtered_values = filters.ignore_negative(filtered_values) # we've already removed the first pageload value; if there aren't any more # valid TTFI values available for this pageload just remove it from results if len(filtered_values) < 1: continue
# if 'alert_on' is set for this particular measurement, then we want to set the # flag in the perfherder output to turn on alerting for this subtest if self.subtest_alert_on isnotNone: if measurement_name in self.subtest_alert_on:
LOG.info( "turning on subtest alerting for measurement type: %s"
% measurement_name
)
new_subtest["shouldAlert"] = True else: # Explicitly set `shouldAlert` to False so that the measurement # is not alerted on. Otherwise Perfherder defaults to alerting
LOG.info( "turning off subtest alerting for measurement type: %s"
% measurement_name
)
new_subtest["shouldAlert"] = False
else:
LOG.error( "output.summarize received unsupported test results type for %s"
% test["name"]
) return
suite["tags"].append(test["type"])
# for benchmarks there is generally more than one subtest in each cycle # and a benchmark-specific formula is needed to calculate the final score # we no longer summarise the page load as we alert on individual subtests # and the geometric mean was found to be of little value if len(subtests) > 1 and test["type"] != "pageload":
suite["value"] = self.construct_summary(vals, testname=test["name"])
def combine_browser_cycles(self): """
At this point the results have been summarized; however there may have been multiple
browser cycles (i.e. cold load). In which case the results have one entry for each
test for each browser cycle. For each test we need to combine the results for all
browser cycles into one results entry.
For example, this is what the summarized results suites list looks like from a test that
was run with multiple (two) browser cycles:
Need to combine those into a single entry. """ # check if we actually have any results if len(self.results) == 0:
LOG.info( "error: no raptor test results found, so no need to combine browser cycles"
) return
# first build a list of entries that need to be combined; and as we do that, mark the # original suite entry as up for deletion, so once combined we know which ones to del # note that summarized results are for all tests that were ran in the session, which # could include cold and / or warm page-load and / or benchnarks combined
suites_to_be_combined = []
combined_suites = []
for _index, suite in enumerate(self.summarized_results.get("suites", [])): if suite.get("cold") isNone: continue
# now create a new suite entry that will have all the results from # all of the browser cycles, but in one result entry for each test
combined_suites = {}
for next_suite in suites_to_be_combined:
suite_name = next_suite["details"]["name"]
browser_cycle = next_suite["details"]["browser_cycle"]
LOG.info( "combining results from browser cycle %d for %s"
% (browser_cycle, suite_name)
) if suite_name notin combined_suites: # first browser cycle so just take entire entry to start with
combined_suites[suite_name] = next_suite["details"]
LOG.info("created new combined result with intial cycle replicates") # remove the 'cold', 'browser_cycle', and 'expected_browser_cycles' info # as we don't want that showing up in perfherder data output del combined_suites[suite_name]["cold"] del combined_suites[suite_name]["browser_cycle"] del combined_suites[suite_name]["expected_browser_cycles"] else: # subsequent browser cycles, already have an entry; just add subtest replicates for next_subtest in next_suite["details"]["subtests"]: # find the existing entry for that subtest in our new combined test entry
found_subtest = False for combined_subtest in combined_suites[suite_name]["subtests"]: if combined_subtest["name"] == next_subtest["name"]: # add subtest (measurement type) replicates to the combined entry
LOG.info("adding replicates for %s" % next_subtest["name"])
combined_subtest["replicates"].extend(
next_subtest["replicates"]
)
found_subtest = True # the subtest / measurement type wasn't found in our existing combined # result entry; if it is for the same suite name add it - this could happen
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.75 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.