Commit b87a5d54 authored by Gero Vermaas's avatar Gero Vermaas Committed by Jochem Schulenklopper
Browse files

Include max memory used in metrics and show it in dashboard.

parent 7cbf5211
......@@ -5,8 +5,7 @@ import json
import sys
import os
# sys.path.append("/opt/dependencies")
sys.path.append("/var/task/build")
sys.path.insert(0, "/var/task")
import requests
AWS_LAMBDA_RUNTIME_API = os.environ.get("AWS_LAMBDA_RUNTIME_API")
......
......@@ -7,6 +7,7 @@ import boto3
from result_store import RunResultsStore
import result_comparator
import scientist_utils
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
......@@ -33,7 +34,6 @@ def lambda_handler(event, context):
experiment_name,
run_id,
)
metrics = {}
candidate_details = candidates[candidate_id]
try:
......@@ -42,10 +42,12 @@ def lambda_handler(event, context):
FunctionName=candidate_details["arn"],
InvocationType="RequestResponse",
Payload=json.dumps(event["payload"]),
LogType="Tail",
)
duration = int((time.time() - start) * 1000)
metrics["duration"] = duration
metrics = scientist_utils.extract_metrics_from_log(response["LogResult"])
metrics["response_time"] = duration
# Report results of control
run_result = {}
......
......@@ -15,8 +15,7 @@
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 1,
"iteration": 1571682382972,
"iteration": 1576587931701,
"links": [],
"panels": [
{
......@@ -29,7 +28,7 @@
"fill": 1,
"gridPos": {
"h": 8,
"w": 9,
"w": 6,
"x": 0,
"y": 0
},
......@@ -152,14 +151,144 @@
{
"aliasColors": {},
"bars": false,
"cacheTimeout": "",
"dashLength": 10,
"dashes": false,
"datasource": "metrics",
"fill": 1,
"gridPos": {
"h": 8,
"w": 9,
"x": 9,
"w": 6,
"x": 6,
"y": 0
},
"id": 26,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "connected",
"options": {},
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"bucketAggs": [
{
"fake": true,
"field": "metric_name.keyword",
"id": "4",
"settings": {
"min_doc_count": 1,
"order": "desc",
"orderBy": "_term",
"size": "10"
},
"type": "terms"
},
{
"fake": true,
"field": "implementation_name.keyword",
"id": "5",
"settings": {
"min_doc_count": 1,
"order": "desc",
"orderBy": "_term",
"size": "10"
},
"type": "terms"
},
{
"field": "@timestamp",
"id": "2",
"settings": {
"interval": "1m",
"min_doc_count": 0,
"trimEdges": 0
},
"type": "date_histogram"
}
],
"metrics": [
{
"field": "value",
"id": "1",
"meta": {},
"settings": {},
"type": "avg"
}
],
"query": "_type:metric AND experiment_name:\"${experiment_name}\" AND metric_name:max_memory_used",
"refId": "A",
"timeField": "@timestamp"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Max memory used",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decmbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "metrics",
"fill": 1,
"gridPos": {
"h": 8,
"w": 6,
"x": 12,
"y": 0
},
"id": 14,
......@@ -719,7 +848,7 @@
"link": false,
"linkTargetBlank": true,
"linkTooltip": "",
"linkUrl": "{{SERVICE_ENDPOINT}}/showdiff?run_id=${__cell}",
"linkUrl": "https://gy486ju769.execute-api.eu-west-1.amazonaws.com/v1/showdiff?run_id=${__cell}",
"mappingType": 1,
"pattern": "run_id",
"type": "date"
......@@ -736,7 +865,7 @@
"link": true,
"linkTargetBlank": true,
"linkTooltip": "",
"linkUrl": "{{SERVICE_ENDPOINT}}/showdiff?run_id=${__cell}",
"linkUrl": "https://gy486ju769.execute-api.eu-west-1.amazonaws.com/v1/showdiff?run_id=${__cell}",
"pattern": "/Metric/",
"thresholds": [],
"type": "string",
......@@ -1124,7 +1253,7 @@
"link": false,
"linkTargetBlank": true,
"linkTooltip": "",
"linkUrl": "{{SERVICE_ENDPOINT}}/showdiff?run_id=${__cell}",
"linkUrl": "https://gy486ju769.execute-api.eu-west-1.amazonaws.com/v1/showdiff?run_id=${__cell}",
"mappingType": 1,
"pattern": "run_id",
"type": "date"
......@@ -1141,7 +1270,7 @@
"link": true,
"linkTargetBlank": true,
"linkTooltip": "",
"linkUrl": "{{SERVICE_ENDPOINT}}/showdiff?run_id=${__cell}",
"linkUrl": "https://gy486ju769.execute-api.eu-west-1.amazonaws.com/v1/showdiff?run_id=${__cell}",
"pattern": "/Metric/",
"thresholds": [],
"type": "string",
......@@ -1529,7 +1658,7 @@
"link": false,
"linkTargetBlank": true,
"linkTooltip": "",
"linkUrl": "{{SERVICE_ENDPOINT}}/showdiff?run_id=${__cell}",
"linkUrl": "https://gy486ju769.execute-api.eu-west-1.amazonaws.com/v1/showdiff?run_id=${__cell}",
"mappingType": 1,
"pattern": "run_id",
"type": "date"
......@@ -1546,7 +1675,7 @@
"link": true,
"linkTargetBlank": true,
"linkTooltip": "",
"linkUrl": "{{SERVICE_ENDPOINT}}/showdiff?run_id=${__cell}",
"linkUrl": "https://gy486ju769.execute-api.eu-west-1.amazonaws.com/v1/showdiff?run_id=${__cell}",
"pattern": "/Metric/",
"thresholds": [],
"type": "string",
......@@ -1610,8 +1739,9 @@
"allValue": null,
"current": {
"selected": true,
"text": "tutorial-round",
"value": "tutorial-round"
"tags": [],
"text": "rounding-float",
"value": "rounding-float"
},
"hide": 0,
"includeAll": false,
......@@ -1620,7 +1750,7 @@
"name": "experiment_name",
"options": [
{
"selected": true,
"selected": false,
"text": "tutorial-round",
"value": "tutorial-round"
},
......@@ -1640,7 +1770,7 @@
"value": "tutorial-pbkdf2"
},
{
"selected": false,
"selected": true,
"text": "rounding-float",
"value": "rounding-float"
},
......@@ -1703,5 +1833,5 @@
"timezone": "",
"title": "Experiments",
"uid": "experiments",
"version": 6
"version": 1
}
\ No newline at end of file
......@@ -727,24 +727,6 @@
"resolved": "https://registry.npmjs.org/arr-union/-/arr-union-3.1.0.tgz",
"integrity": "sha1-45sJrqne+Gao8gbiiK9jkZuuOcQ="
},
"array-filter": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/array-filter/-/array-filter-0.0.1.tgz",
"integrity": "sha1-fajPLiZijtcygDWB/SH2fKzS7uw=",
"dev": true
},
"array-map": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/array-map/-/array-map-0.0.0.tgz",
"integrity": "sha1-iKK6tz0c97zVwbEYoAP2b2ZfpmI=",
"dev": true
},
"array-reduce": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/array-reduce/-/array-reduce-0.0.0.tgz",
"integrity": "sha1-FziZ0//Rx9k4PkR5Ul2+J4yrXys=",
"dev": true
},
"array-union": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/array-union/-/array-union-1.0.2.tgz",
......@@ -3116,12 +3098,6 @@
"graceful-fs": "^4.1.6"
}
},
"jsonify": {
"version": "0.0.0",
"resolved": "https://registry.npmjs.org/jsonify/-/jsonify-0.0.0.tgz",
"integrity": "sha1-LHS27kHZPKUbe1qu6PUDYx0lKnM=",
"dev": true
},
"jsonpath-plus": {
"version": "0.16.0",
"resolved": "https://registry.npmjs.org/jsonpath-plus/-/jsonpath-plus-0.16.0.tgz",
......@@ -4832,15 +4808,6 @@
"jsonfile": "^4.0.0",
"universalify": "^0.1.0"
}
},
"jsonfile": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz",
"integrity": "sha1-h3Gq4HmbZAdrdmQPygWPnBDjPss=",
"dev": true,
"requires": {
"graceful-fs": "^4.1.6"
}
}
}
},
......@@ -4909,16 +4876,10 @@
"integrity": "sha1-2kL0l0DAtC2yypcoVxyxkMmO/qM="
},
"shell-quote": {
"version": "1.6.1",
"resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.6.1.tgz",
"integrity": "sha1-9HgZSczkAmlxJ0MOo7PFR29IF2c=",
"dev": true,
"requires": {
"array-filter": "~0.0.0",
"array-map": "~0.0.0",
"array-reduce": "~0.0.0",
"jsonify": "~0.0.0"
}
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.7.2.tgz",
"integrity": "sha512-mRz/m/JVscCrkMyPqHc/bczi3OQHkLTqXHEFu0zDhK/qfv3UcOA4SVmRCLmos4bhjr9ekVQubj/R7waKapmiQg==",
"dev": true
},
"shortid": {
"version": "2.2.15",
......
......@@ -4,9 +4,9 @@
"devDependencies": {
"serverless-domain-manager": "^3.2.1",
"serverless-offline": "^3.33.0",
"serverless-python-requirements": "4.2.5",
"serverless-offline-python": "^3.21.3",
"serverless-pseudo-parameters": "^2.4.0",
"serverless-python-requirements": "^4.2.5",
"serverless-vpc-plugin": "^0.8.0"
},
"dependencies": {
......
boto3==1.9.90
boto3==1.10.39
dynamodb-json==1.3
elasticsearch>=7.0.0,<8.0.0
pyyaml==3.13
......
......@@ -15,6 +15,7 @@ LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.DEBUG)
CANDIDATE_COMPARE_TRIGGERED_TABLE = os.environ["CANDIDATE_COMPARE_TRIGGERED_TABLE"]
MAX_MEMORY_USED = "max_memory_used"
RESPONSE_TIME = "response_time"
REQUEST_COUNT = "request_count"
......@@ -65,13 +66,13 @@ def do_comparisons(control, candidate, comparators, metrics_publisher):
LOGGER.exception("Error while executing comparison")
def publish_response_time(metrics_publisher, run_result):
def publish_run_metric(metrics_publisher, run_result, mentric_name):
metrics_publisher.publish_value(
RESPONSE_TIME,
mentric_name,
run_result["experiment_name"],
run_result["run_type"],
run_result["implementation_name"],
int(run_result["run_metrics"]["duration"]),
int(run_result["run_metrics"][mentric_name]),
)
......@@ -86,7 +87,8 @@ def compare_experiment_results(run_id):
control_list = [result for result in run_results if result["run_type"] == "control"]
if control_list:
control = control_list[0]
publish_response_time(metrics_publisher, control)
publish_run_metric(metrics_publisher, control, RESPONSE_TIME)
publish_run_metric(metrics_publisher, control, MAX_MEMORY_USED)
candidate_list = [
result for result in run_results if result["run_type"] == "candidate"
......@@ -97,7 +99,8 @@ def compare_experiment_results(run_id):
comparators = control["comparators"]
for candidate in candidate_list:
publish_response_time(metrics_publisher, candidate)
publish_run_metric(metrics_publisher, candidate, RESPONSE_TIME)
publish_run_metric(metrics_publisher, candidate, MAX_MEMORY_USED)
LOGGER.debug("candidate: %s", candidate)
LOGGER.debug("comparators: %s", comparators)
......
......@@ -16,6 +16,7 @@ import yaml
from dynamodb_json import json_util as dyndb_json_util
from metrics_publisher import MetricsPubisher
import scientist_utils
from result_store import RunResultsStore
......@@ -103,18 +104,20 @@ def find_experiment(experiments, path):
def execute_control(experiment_run, control_arn, experiment_details, experiment_name):
LOGGER.info("invoking control")
# Run the control
metrics = {}
start_control = time.time()
response = LAMBDA_CLIENT.invoke(
FunctionName=control_arn,
InvocationType="RequestResponse",
Payload=json.dumps(experiment_run["payload"]),
LogType="Tail",
)
control_duration = int((time.time() - start_control) * 1000)
metrics = scientist_utils.extract_metrics_from_log(response["LogResult"])
LOGGER.info("Execution of control took %s ms", control_duration)
metrics["duration"] = control_duration
metrics[RESPONSE_TIME] = control_duration
response_body = response["Payload"].read().decode("utf-8")
......@@ -281,7 +284,7 @@ def lambda_handler(event, response_url):
scientist_duration = int((time.time() - start) * 1000)
metrics_publisher = MetricsPubisher()
overhead = scientist_duration - control_run_result["metrics"]["duration"]
overhead = scientist_duration - control_run_result["metrics"][RESPONSE_TIME]
metrics_publisher.publish_value(
SCIENTIST_ADDED_RESPONSE_TIME,
experiment_name,
......
import base64
import logging
import re
logging.basicConfig()
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.DEBUG)
MAX_MEMORY_USED = "max_memory_used"
def extract_metrics_from_log(log_lines_base64):
log_lines = base64.b64decode(log_lines_base64).decode("utf-8")
matches = re.search(r"REPORT.*\tMax Memory Used: (\d*) (\w*)\t.*", log_lines,)
max_memory_used = int(matches.group(1))
memory_unit = matches.group(2)
LOGGER.info("Memory usage was %s %s", max_memory_used, memory_unit)
if memory_unit == "GB":
max_memory_used = max_memory_used * 1024
return {MAX_MEMORY_USED: max_memory_used}
......@@ -12,6 +12,10 @@ plugins:
custom:
pythonRequirements:
dockerizePip: non-linux
slim: true
# Hack to make sure that boto3 lib is included in the package
noDeploy:
- xx
logRetentionInDays: 30
gateway: "test"
aws_account_id: "#{AWS::AccountId}"
......@@ -144,7 +148,6 @@ provider:
package:
exclude:
- python-3.7.5.tgz
- node_modules/** #Since none of the Lambda's uses Node.
layers:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment