Commit 94c65013 authored by Tristan Maat's avatar Tristan Maat Committed by Lachlan

Use autopep8 to fix trivial pylint issues

This mostly resolves issues with rampant inconsistent indentation and
ignoring line lengths, but also replaces some types of comparison that
are not recommended.
parent 69d6d065
......@@ -17,6 +17,7 @@ variables:
JOB_NAME_REF: $CI_JOB_NAME
stages:
- checks
- clean
- test-benchmarks
- test-debian-like-project
......@@ -25,6 +26,20 @@ stages:
- test-bstgen-100-square
- deploy
# Run pylint
pylint:
stage: checks
# We only want to run pylint on shared runners
tags:
- shared
image: python:3.7-stretch
before_script:
- "pip install -r requirements.txt"
- "pip install pylint"
script:
- "find . -path ./contrib -prune -o -name '*.py' -exec pylint --rcfile=.pylintrc {} +"
when: always
# Remove previous pipeline_caching
job:
stage: clean
......
This diff is collapsed.
......@@ -46,6 +46,7 @@ import digest_results
# bs_branch - the branch of buildstream being considered.
# shas - shas to be tested.
def main():
# Define all the defaults explicitly
repo_path = 'https://gitlab.com/BuildStream/buildstream.git'
......@@ -58,7 +59,6 @@ def main():
keep_results = False
keep_path = "results.json"
def make_help(message):
return message + " (Default: %(default)s)"
......@@ -152,7 +152,7 @@ def main():
else:
logging.info("Repo path resolves remotely: %s", repo_path)
repo = git.Repo.clone_from(repo_path, temp_staging_area)
except git.exc.GitError as err:
except git.exc.GitError as err: # pylint: disable=no-member
logging.error("Unable to access git repository: %s", err)
sys.exit(1)
......@@ -165,7 +165,7 @@ def main():
if commit.hexsha in shas_to_be_tested:
commits.append(commit)
shas_found.append(commit.hexsha)
except git.exc.GitCommandError as err:
except git.exc.GitCommandError as err: # pylint: disable=no-member
logging.error("Could not find commits in repository '%s' for branch '%s':\n%s",
repo_path, bs_branch, err)
sys.exit(1)
......@@ -198,7 +198,7 @@ def main():
# Add tests to run
for test in tests_to_run:
if test in test_set:
logging.error("Duplicate benchmarking test will be ignored: {}".format(test))
logging.error("Duplicate benchmarking test will be ignored: %s", test)
else:
test_set.append(test)
......@@ -209,7 +209,7 @@ def main():
results_tmp_file = os.path.join(temp_staging_area, 'tmp_result')
try:
bst_benchmarks.main.run(config_files=test_set, debug=True,
bst_benchmarks.main.run(config_files=test_set, _debug=True,
keep_images=False, reuse_images=False,
output_file=results_tmp_file)
# pylint: disable=broad-except
......@@ -228,7 +228,8 @@ def main():
# Create temporary file for results digest
tmp_error = os.path.join(temp_staging_area, 'tmp_error')
digest_results.parse(files=[results_tmp_file], output_file=tmp_output, error_file=tmp_error)
# TODO: _error_file is not actually in use
digest_results.parse(files=[results_tmp_file], output_file=tmp_output, _error_file=tmp_error)
# Write output to requested outfile
with open(tmp_output, "r") as fin:
......
......@@ -22,11 +22,14 @@
# buildstream benchmark script
import click
import bst_benchmarks.main
import logging
import sys
import click
import bst_benchmarks.main
@click.command()
@click.option('config_files', '--config-file', '-c', type=click.Path(),
help="YAML description of tests to run, overriding the defaults.",
......@@ -41,15 +44,16 @@ import sys
@click.option('output_file', '--output-file', '-o', type=click.Path(),
help="Output file definition", default=None)
def run(config_files, debug, keep_images, reuse_images, output_file):
if debug:
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
else:
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
bst_benchmarks.main.run(config_files, debug, keep_images, reuse_images, output_file)
if debug:
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
else:
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
bst_benchmarks.main.run(config_files, debug, keep_images, reuse_images, output_file)
try:
run(prog_name="benchmark")
# pylint: disable=unexpected-keyword-arg,no-value-for-parameter
run(prog_name="benchmark")
except RuntimeError as e:
sys.stderr.write("{}\n".format(e))
sys.exit(1)
sys.stderr.write("{}\n".format(e))
sys.exit(1)
This diff is collapsed.
......@@ -18,48 +18,47 @@
# Authors:
# Sam Thursfield <sam.thursfield@codethink.co.uk>
import collections
import os
import psutil
import subprocess
import psutil
# Returns a dict containing all of the collected host info
def get_host_info():
return {
'total_system_memory_GiB': get_memory_info(),
'processor_info': get_processor(),
'kernel_release': get_kernel_info()}
return {
'total_system_memory_GiB': get_memory_info(),
'processor_info': get_processor(),
'kernel_release': get_kernel_info()
}
# This returns a number
def get_memory_info():
return bytes_to_gib(psutil.virtual_memory().total)
return bytes_to_gib(psutil.virtual_memory().total)
# This argument is a number
# This returns a number
def bytes_to_gib(value): # Bytes to Gibibytes
return round(value / (1024 * 1024 * 1024), 2)
return round(value / (1024 * 1024 * 1024), 2)
# This returns a utf-8 string
def get_kernel_info():
cmd = ["uname", "-r"]
output = subprocess.check_output(cmd)
return str(output, encoding="utf-8").rstrip()
cmd = ["uname", "-r"]
output = subprocess.check_output(cmd)
return str(output, encoding="utf-8").rstrip()
# This returns a string
def get_processor():
cores = 0
with open('/proc/cpuinfo') as f:
for line in f:
if line.strip():
if line.rstrip('\n').startswith('model name'):
model_name = line.rstrip('\n').split(':')[1]
cores += 1
# This returns the model name as a string
# This is done as the name cannot be reliably parsed
# Due to different vendors and models having different naming conventions
return {"cpu_model_name": model_name, "cpu_cores": cores}
cores = 0
with open('/proc/cpuinfo') as f:
for line in f:
if line.strip():
if line.rstrip('\n').startswith('model name'):
model_name = line.rstrip('\n').split(':')[1]
cores += 1
# This returns the model name as a string
# This is done as the name cannot be reliably parsed
# Due to different vendors and models having different naming conventions
return {"cpu_model_name": model_name, "cpu_cores": cores}
This diff is collapsed.
......@@ -31,23 +31,23 @@ import statistics
def main():
directory = '.'
output_file = 'digest.mdwn'
error_file = 'error.mdwn'
files = list()
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--directory",
help="Directory containing multiple results files (*.json), default is current directory.",
type=str)
parser.add_argument(
"-d",
"--directory",
help="Directory containing multiple results files (*.json), default is current directory.",
type=str)
parser.add_argument("-s", "--specific_results",
help="Path to a specific results set",
type=str)
parser.add_argument("-o", "--output_file",
help="Output file for results digest",
type=str)
args = parser.parse_args()
if bool(args.directory):
......@@ -56,23 +56,24 @@ def main():
if entry.name.endswith(".json"):
files.append(entry.path)
else:
logging.error("Specified directory does not exist", args.directory)
logging.error("Specified directory does not exist: %s", args.directory)
sys.exit(1)
if bool(args.specific_results):
if os.path.isfile(args.specific_results):
if args.specific_results.endswith(".json"):
files.append(args.specific_results)
else:
logging.error("Specific results file does not exist", args.specific_results)
logging.error("Specific results file does not exist: %s", args.specific_results)
sys.exit(1)
if bool(args.output_file):
output_file = args.output_file
output_file = args.output_file
parse(files, output_file, error_file)
def parse(files, output_file, error_file):
def parse(files, output_file, _error_file):
resultsdict = dict([])
error_buffer = []
for entry in files:
......@@ -96,7 +97,8 @@ def parse(files, output_file, error_file):
if 'buildstream_commit_date' in version:
if version["buildstream_commit_date"]:
version_date[str(version["buildstream_commit"])] = float(version["buildstream_commit_date"])
version_date[str(version["buildstream_commit"])] = float(
version["buildstream_commit_date"])
else:
version_date[str(version["buildstream_commit"])] = measurement_time
else:
......@@ -112,13 +114,12 @@ def parse(files, output_file, error_file):
# Check if measurements have been made
if "measurements" not in result:
logging.warning("Measurement corruption in: {}".format(f.name))
error_buffer.append("Measurement corruption in: {}\n".format(f.name))
logging.warning("Measurement corruption in: %s", f.name)
error_buffer.append("Measurement corruption in: %s\n", f.name)
continue
bs_ref = None
bs_commit = None
unique_ref = result["version"]
times = []
rss_kbs = []
# Iterate measurements and add
......@@ -131,10 +132,14 @@ def parse(files, output_file, error_file):
bs_commit = result["bs-sha"]
else:
if result["bs-ref"] != bs_ref:
logging.error("Buildstream reference changed from {} to {}: ".format(bs_ref, result["bs-ref"]))
logging.error(
"Buildstream reference changed from %s to %s: ",
bs_ref, result["bs-ref"])
bs_ref = result["bs-ref"]
if result["bs-sha"] != bs_commit:
logging.error("Buildstream commit changed from {} to {}: ".format(bs_commit, result["bs-sha"]))
logging.error(
"Buildstream commit changed from %s to %s: ",
bs_commit, result["bs-sha"])
bs_commit = result["bs-sha"]
if str(version) in version_ref:
......@@ -162,7 +167,15 @@ def parse(files, output_file, error_file):
ice_key = frozenset(key.items())
# Create a value for the entry
value = [version, commit, measurement_time, average_time, average_max_rss_kb, commit_time, times_sd, rss_kbs_sd]
value = [
version,
commit,
measurement_time,
average_time,
average_max_rss_kb,
commit_time,
times_sd,
rss_kbs_sd]
# Add the value to the accumulated values for a given key
if ice_key not in resultsdict:
......@@ -171,15 +184,14 @@ def parse(files, output_file, error_file):
else:
resultsdict[ice_key].append(value)
logging.debug(str(version) + ' ' + name + ' ' + str(commit) + ' '
+ str(measurement_time) + ' ' + str(average_time) + ' '
+ str(times_sd) + ' ' + str(average_max_rss_kb) + ' '
+ str(rss_kbs_sd))
logging.debug("%s %s %s %s %s %s %s %s", version,
name, commit, measurement_time, average_time,
times_sd, average_max_rss_kb, rss_kbs_sd)
except ValueError as error:
logging.error("Error during parse of {}: {}".format(file, error))
logging.error("Error during parse of %s: %s", file, error)
except ValueError as error:
logging.error("Failure to load {} as json file".format(file, error))
logging.error("Failure to load %s as json file: %s", file, error)
with open(output_file, 'w') as results_file:
timestr = time.strftime("%Y%m%d-%H%M%S")
......@@ -190,17 +202,25 @@ def parse(files, output_file, error_file):
results_file.write('\n')
for key, value in resultsdict.items():
dict_k = dict(key)
Test_description = "Test Version: {}, Test Name: {}\n".format(dict_k['version'], dict_k['name'])
results_file.write(Test_description)
test_description = "Test Version: {}, Test Name: {}\n".format(
dict_k['version'], dict_k['name'])
results_file.write(test_description)
list_a = list(value)
list_a.sort(key=lambda x: x[1])
times = list()
average_times = list()
average_kb = list()
for data_set in list_a:
results_file.write("time:, {}, average time to complete test (s):, {}, time standard deviation:, {}, average resident set size (kb):, {}, rss standard deviation {}\n"
.format(datetime.datetime.fromtimestamp(data_set[5]), data_set[3], data_set[6], data_set[4], data_set[7]))
results_file.write('\n')
results_file.write(
"time: {}, "
"average time to complete test (s): {}, "
"time standard deviation: {}, "
"average resident set size (kb):, {}, "
"rss standard deviation: {}\n" .format(
datetime.datetime.fromtimestamp(
data_set[5]),
data_set[3],
data_set[6],
data_set[4],
data_set[7]))
results_file.write('\n')
if __name__ == "__main__":
......
......@@ -18,13 +18,10 @@
# Authors:
# Lachlan Mackenzie <lachlan.mackenzie@codethink.co.uk>
import os
import logging
import shutil
import tempfile
import yaml
from distutils.file_util import copy_file
from bst_benchmarks.config import DEFAULT_BASE_DOCKER_IMAGE, DEFAULT_BUILDSTREAM_REPO
# This function generates a benchmark configuration file that allows for
# multiple buildstream commits to be benchmarked individually.
......@@ -38,28 +35,35 @@ from distutils.file_util import copy_file
# bs_path - path to the Buildstream repo (url or local directory)
# docker_path - path to the Docker Image to be used
def generate_benchmark_configuration(output_file="generated.benchmark", list_of_shas=[], docker_version="30-latest", bs_branch='master', bs_path='https://gitlab.com/BuildStream/buildstream', docker_path='registry.gitlab.com/buildstream/buildstream-docker-images/testsuite-fedora'):
def generate_benchmark_configuration(
output_file="generated.benchmark",
list_of_shas=None,
docker_version="30-latest",
bs_branch='master',
bs_path=DEFAULT_BUILDSTREAM_REPO,
docker_path=DEFAULT_BASE_DOCKER_IMAGE):
if not list_of_shas:
list_of_shas = []
# Iterate through the list of shas and populate the stubbed entries
# with sha data from the entry, then write each entry to the new file
# entry
with open(output_file, 'w') as yaml_file:
version_default = {'version_defaults' : {'base_docker_image': docker_path,
'buildstream_repo': bs_path}}
version_default = {'version_defaults': {'base_docker_image': docker_path,
'buildstream_repo': bs_path}}
yaml.dump(version_default, yaml_file, default_flow_style=False)
yaml_file.write('\n\n')
configs = []
for entry in list_of_shas:
configs.append({'name': str(entry),
'base_docker_ref': docker_version,
'buildstream_ref': bs_branch,
'buildstream_commit': str(entry),
'buildstream_commit_date': str(entry.committed_date)})
'base_docker_ref': docker_version,
'buildstream_ref': bs_branch,
'buildstream_commit': str(entry),
'buildstream_commit_date': str(entry.committed_date)})
version = {'versions': configs}
yaml.dump(version, yaml_file, default_flow_style=False)
yaml_file.write('\n\n')
......@@ -21,19 +21,21 @@
# Lachlan Mackenzie <lachlan.mackenzie@codethink.co.uk>
import argparse
import token_file_processing
import logging
import tempfile
import glob
import ntpath
import get_sha_commits
import generate_benchmark_config
import datetime
import shutil
import git
import os
import operator
import git
import token_file_processing
import generate_benchmark_config
import get_sha_commits
# This command line executable acts as the central method for configuring
# the benchmark CI run. A number of elements are passed in to check that
# configuration settings are tenable - buildstream url, buildstream branch,
......@@ -45,6 +47,7 @@ import operator
# url_path - path to the buildstream repo.
# bs_branch - the branch of buildstream being considered.
def main():
output_file = 'generated_config.benchmark'
results_files = 'results_cache/'
......@@ -98,13 +101,19 @@ def main():
if bool(args.bespoke_start_sha) and bool(args.bespoke_end_sha):
start_sha = args.bespoke_start_sha
end_sha = args.bespoke_end_sha
if not generate_bespoke_buildstream_config(output_file, url_path, bs_branch, start_sha, end_sha):
if not generate_bespoke_buildstream_config(
output_file, url_path, bs_branch, start_sha, end_sha):
os.sys.exit(1)
else:
logging.error('Bespoke setting requested but start or end SHA not specified')
os.sys.exit(1)
else:
if not generate_buildstream_config(output_file, results_files, token_file, url_path, bs_branch):
if not generate_buildstream_config(
output_file,
results_files,
token_file,
url_path,
bs_branch):
os.sys.exit(1)
......@@ -128,15 +137,17 @@ def generate_bespoke_buildstream_config(output_file, url_path, bs_branch, start_
logging.error('No commits associate with given shas, check start/end SHA values')
return False
generate_benchmark_config.generate_benchmark_configuration(output_file=output_file, list_of_shas=commits, bs_branch=bs_branch)
except Exception as err:
generate_benchmark_config.generate_benchmark_configuration(
output_file=output_file, list_of_shas=commits, bs_branch=bs_branch)
# TODO: Figure out what this exception actually means
except Exception: # pylint: disable=broad-except
logging.error('Unable to generate bespoke benchmark configuration file')
return False
return True
def generate_buildstream_config(output_file, results_path, run_token_path, url_path, bs_branch):
start_sha = ''
def generate_buildstream_config(output_file, results_path, run_token_path, url_path, bs_branch):
try:
# Create temporary staging area
temp_staging_area = tempfile.mkdtemp(prefix='temp_staging_location')
......@@ -152,7 +163,7 @@ def generate_buildstream_config(output_file, results_path, run_token_path, url_p
# Init benchmark repo based upon parent directory
bench_mark_repo = git.Repo(search_parent_directories=True)
# Requested Buildstream most recent SHA reference
# Note this could change between starting the tests and completing
# so a single reference is used
......@@ -168,12 +179,11 @@ def generate_buildstream_config(output_file, results_path, run_token_path, url_p
token_data = token_file_processing.process_token_file(run_token_path)
print(token_data)
else:
logging.error('Unable to access token file: ', run_token_path)
logging.error('Unable to access token file: %s', run_token_path)
return False
except OSError as err:
logging.error('Unable to either access or generate configuration: ', results_path)
except OSError:
logging.error('Unable to either access or generate configuration: %s', results_path)
return False
# Check if results directory exists (if it doesn't we shouldn't presume to
# create it here). Walk the .json files in the directory, if they exist
......@@ -182,41 +192,57 @@ def generate_buildstream_config(output_file, results_path, run_token_path, url_p
# possible that an aborted processing has taken place and we need to recover.
archive_result = ''
if os.path.isdir(results_path):
for dirpath, dirnames, files in os.walk(results_path):
for _dirpath, _dirnames, files in os.walk(results_path):
if files:
list_of_files = glob.glob(results_path + '*.json')
file_times = {}
for file in list_of_files:
file_times[file] = datetime.datetime.strptime(os.path.basename(file), 'results-%Y-%m-%d-%H:%M:%S.json')
file_times[file] = datetime.datetime.strptime(
os.path.basename(file), 'results-%Y-%m-%d-%H:%M:%S.json')
if file_times:
latest_file = max(file_times.items(), key=operator.itemgetter(1))[0]
archive_result = ntpath.basename(latest_file)
else:
logging.info('No results files found from: ', results_path)
logging.info('No results files found from: %s', results_path)
else:
logging.info('No files found from: ', results_path)
logging.info('No files found from: %s', results_path)
else:
logging.error('Results path does not exist, will not generate configuration: ', results_path)
logging.error(
'Results path does not exist, will not generate configuration: %s',
results_path)
return False
# Check that token file is valid and if the last result file according to the token data is consistent
# with results directory then get the sha commits that are between the two including the last one. At the
# minute if there is an inconsistency between the last results file and that specified in the token file
#
# Check that token file is valid and if the last result file
# according to the token data is consistent with results
# directory then get the sha commits that are between the two
# including the last one. At the minute if there is an
# inconsistency between the last results file and that specified
# in the token file
try:
repo_path = repo.git.rev_parse("--show-toplevel")
if token_file_processing.verify_token_data(token_data, archive_result, results_path, repo_path):
if token_data[ 'build' ][ 'result' ] != archive_result:
logging.error('Last listed result not consistent with token file: ', token_data[ 'build' ][ 'result' ], archive_result)
if token_file_processing.verify_token_data(
token_data, archive_result, results_path, repo_path):
if token_data['build']['result'] != archive_result:
logging.error('Last listed result not consistent with token file \'%s\': %s',
token_data['build']['result'], archive_result)
return False
else:
logging.error('Token file fails verification')
return False
commits = get_sha_commits.get_list_of_commits(repo_path, token_data[ 'build' ][ 'bs_branch' ], token_data[ 'build' ][ 'bs_sha' ], buildstream_sha)
generate_benchmark_config.generate_benchmark_configuration(output_file=output_file, list_of_shas=commits, bs_branch=token_data[ 'build' ][ 'bs_branch' ])
except Exception as err:
logging.error('Unable to generate benchmark configuration file: ', token_data[ 'build' ][ 'result' ], archive_result)
commits = get_sha_commits.get_list_of_commits(
repo_path,
token_data['build']['bs_branch'],
token_data['build']['bs_sha'],
buildstream_sha)
generate_benchmark_config.generate_benchmark_configuration(
output_file=output_file,
list_of_shas=commits,
bs_branch=token_data['build']['bs_branch'])
# TODO: Figure out what this exception actually means
except Exception: # pylint: disable=broad-except
logging.error('Unable to generate benchmark configuration file \'%s\': %s',
token_data['build']['result'], archive_result)
return False
return True
......
......@@ -20,15 +20,14 @@
# Authors:
# Lachlan Mackenzie <lachlan.mackenzie@codethink.co.uk>
import git
import tempfile
import argparse
import shutil
import re
import os
import logging
import sys
import git
# This function generates a list of SHAs for a given repo, branch a start commit
# which is the last commit that was processed and is prior to the 1st commit to
# be returned and the last commit which might be current head as determined at
......@@ -40,6 +39,7 @@ import sys
# latest_commit - the last commit that needs to be added to the returned
# list.
def main():
repo = "https://gitlab.com/BuildStream/buildstream.git"
branch = 'master'
......@@ -75,17 +75,18 @@ def main():
try:
commits = get_list_of_commits(repo, branch, last_commit, latest_commit)
except git.exc.GitError as err:
except git.exc.GitError as err: # pylint: disable=no-member
print("Unable to extract commits: ", err)
sys.exit(1)
except Exception as ex_err:
# TODO: Come on, surely there's a million other things that could cause this?
except Exception as ex_err: # pylint: disable=broad-except
print("Nothing to extract: ", ex_err)
sys.exit(1)
print(commits)
def get_list_of_commits(repo_path, branch, lastCommit, latestCommit):
def get_list_of_commits(repo_path, branch, last_commit, latest_commit):
commits = list()
......@@ -95,21 +96,21 @@ def get_list_of_commits(repo_path, branch, lastCommit, latestCommit):
repo = git.Repo.init(repo_path, bare=False)
else:
repo = git.Repo.clone_from(repo_path, temp_staging_area)
except git.exc.GitError as err:
logging.error("Unable to access git repository: ", err)
except git.exc.GitError as err: # pylint: disable=no-member
logging.error("Unable to access git repository: %s", err)
raise
start = False
for commit in repo.iter_commits(branch):
if commit.hexsha == latestCommit:
commits.append(commit)
start = True
if commit.hexsha == lastCommit:
break
elif commit.hexsha == lastCommit:
break
elif start == True:
commits.append(commit)
if commit.hexsha == latest_commit:
commits.append(commit)
start = True
if commit.hexsha == last_commit:
break
elif commit.hexsha == last_commit:
break
elif start:
commits.append(commit)
return commits
......
......@@ -22,15 +22,14 @@
import json
import os
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import logging
import argparse
import sys
import time
import datetime
import matplotlib.pyplot as plt
# Commandline executable that takes a directory containing json results files
# iterates through them and generates individual graph files of results based
# upon a given data set (denoted by test type name). Option is provided to
......@@ -38,9 +37,10 @@ import datetime
# testing) as a comparable to canonical results. Non canonical results are not
# retained for future comparison. Option is provided to limit the number of