Commit 9f087634 authored by Thorsten Simons's avatar Thorsten Simons

1.3.3 - removed gridlines from the content sheet; Fine-tuned the column width...

1.3.3 - removed gridlines from the content sheet; Fine-tuned the column width in the query sheets; made the runtime column a bit more readable; added *500_largest_size* query; some documentation additions
parent 8fe8dbf8
Release History
===============
**1.3.3 2017-10-12**
* removed gridlines from the content sheet
* fine-tuned the column width in the query sheets
* made the runtime column a bit more readable
* added *500_largest_size* query
* some documentation additions
**1.3.2 2017-10-10**
* added query runtime to content sheet in xlsx
......
......@@ -2,7 +2,7 @@ Result Interpretation
=====================
Proper interpretation of **hcprequestanalytics** results requires some good
knowledge about HCP works, as well as about http, networking and client
knowledge about how HCP works, as well as about http, networking and client
behaviour. The information in this chapter hopefully helps understanding the
results a bit.\ [#fn1]_
......@@ -96,9 +96,18 @@ the entire range of 100% of the data.
.. image:: _static/knwldg_percentile_.png
:scale: 50%
Let's take ``row 6`` as an example - it tells that the GET request with the
hugest size was 581,632,000 bytes. But it also tells that 99.9% of the GET
requests are 2,550,516 Bytes or smaller (``cell Q6``). This lets us know that
the ``max(size)`` value is just a peak, appearing in the highest 0.1% of the
requests. Looking at the *500_largest_size* query result will proof that:
.. image:: _static/knwldg_size_.png
:scale: 50%
This gives a good overview, but still needs to be taken in relation with other
parameters - for example, if you have overall high latency, you might also have
overall request sizes...
overall huge request sizes...
......@@ -107,5 +116,5 @@ overall request sizes...
.. [#fn1] Taken from all queries referenced in this chapter are based on the
built-in queries.
.. [#fn2] Taken from
`Percentile at Wikipedia <https://en.wikipedia.org/wiki/Percentile>`_
.. [#fn2] Taken from the Percentile article at
`Wikipedia <https://en.wikipedia.org/wiki/Percentile>`_
......@@ -24,7 +24,8 @@ import sys
import sqlite3
from os import cpu_count
from os.path import join, dirname
from time import time, asctime, mktime, strptime, sleep
from time import time, asctime, mktime, strptime
from datetime import datetime
from collections import OrderedDict
from concurrent.futures import ProcessPoolExecutor, as_completed, TimeoutError
from tempfile import NamedTemporaryFile
......@@ -215,7 +216,6 @@ class DB():
"""
_csv = Csv(prefix) if csvtype else Xlsx(prefix)
with ProcessPoolExecutor(max_workers=processes) as executor:
# create a list of all the queries to run
qlist = []
......@@ -258,13 +258,13 @@ class DB():
print('\t{} generated an exception: {}'.format(mps[fu], e),
flush=True)
else:
print('\t{:30}: {:.3f} seconds'.format(mps[fu], runtime, ),
print('\t{:30}: {}'.format(mps[fu], mktimestr(runtime)),
flush=True)
first = True
for rec in data:
if first:
_csv.newsheet(mps[fu], list(rec.keys()),
runtime=runtime,
runtime=mktimestr(runtime),
comment=self.queries.c.get(mps[fu],
'comment',
fallback=''))
......@@ -306,7 +306,7 @@ def runquery(db, qtitle, query):
# just open this temporary file to be able to identify which query a
# subprocess is running ;-)
with NamedTemporaryFile('w', prefix='I_am__*'+qtitle+'*__') as tmphdl:
_st = time()
_st = datetime.today()
con = sqlite3.connect(db)
con.row_factory = sqlite3.Row
con.create_aggregate("percentile", 2, PercentileFunc)
......@@ -317,10 +317,30 @@ def runquery(db, qtitle, query):
data = [row2dict(rec) for rec in cur.fetchall()]
con.close()
_end = time()-_st
_end = datetime.today()-_st
return _end, data
def mktimestr(td):
"""
Convert a timedelta() object to a string HH:MM:SS
:param td: a timedelta object
:return: the string
"""
_h = 60*60
_m = 60
_s = 1
hours = int(td.seconds / _h)
mins = int((td.seconds % _h) / _m)
secs = int((td.seconds % _h) % _m)
return '{}:{}:{:02}.{}'.format('{02:}:'.format(hours) if hours else '__',
'{02:}:'.format(mins) if mins else '__',
secs,
(str(td.microseconds)+'000')[:3])
class PercentileFunc():
"""
Aggregate function for use with sqlite3 - calculates a given percentile.
......
......@@ -164,8 +164,16 @@ query : SELECT printf("%%s/%%s", substr(timestampstr, 4, 3),
FROM logrecs GROUP BY day, request, httpcode
freeze pane : E5
[500_largest]
comment : The records with the 500 largest requests
[500_largest_size]
comment : The records with the 500 largest requests sorted by size
query : SELECT request, httpcode, node, latency, size,
tp(size,latency) as Bytes_sec, clientip, user,
timestamp, timestampstr, path, namespace
FROM logrecs ORDER BY size DESC LIMIT 500
freeze pane : D5
[500_largest_req_httpcode_node]
comment : The records with the 500 largest requests by req, httpcode, node
query : SELECT request, httpcode, node, latency, size,
tp(size,latency) as Bytes_sec, clientip, user,
timestamp, timestampstr, path, namespace
......
......@@ -111,6 +111,7 @@ class Xlsx(Csv):
# create the Content sheet
self.contentws = self.wb.add_worksheet(name='CONTENT')
self.contentws.hide_gridlines(option=2)
def newsheet(self, name, fieldnames, runtime=0.0, comment=''):
......@@ -119,13 +120,15 @@ class Xlsx(Csv):
:param name: the files base name
:param fieldnames: a list of field names
:param runtime: the time it took to run the query (seconds)
:param runtime: the time it took to run the query (str)
:param comment: a comment to be added
"""
self.fieldnames = fieldnames
self.content[name] = {'comment': comment,
'runtime': '({:0.1f} sec.)'.format(float(runtime))}
self.colw = [len(w) for w in fieldnames]
'runtime': '{}'.format(runtime)}
# record the length of each columns title
uplift = 1.15 # uplift for bold header line
self.colw = [len(w)*uplift for w in fieldnames]
self.ws = self.wb.add_worksheet(name=name)
......@@ -150,15 +153,27 @@ class Xlsx(Csv):
:param row: a data row, matching the header
"""
vals = [row[x] for x in self.fieldnames]
_row = [row[x] for x in self.fieldnames]
row = _row
# this is done to properly set the width per column, taking in account
# thousand-seperators
clen = []
for x in range(0, len(self.fieldnames)):
try:
_s = '{:,}'.format(int(row[x]))
_ls = len(_s)
clen.append(_ls)
except ValueError:
clen.append(len(str(row[x])))
# save the max. length per row to be able to set column width later
# when we close this sheet
for x in range(0, len(self.fieldnames)):
if len(str(vals[x])) > self.colw[x]:
self.colw[x] = len(str(vals[x]))
if clen[x] > self.colw[x]:
self.colw[x] = clen[x]
self.ws.write_row(self.row, 0, vals, self.num)
self.ws.write_row(self.row, 0, row, self.num)
self.row += 1
def closesheet(self, fp=''):
......@@ -226,8 +241,8 @@ class Xlsx(Csv):
self.contentws.write(row-1, 3, 'description', hlink)
self.contentws.write(row-1, 4, '', hlink)
w_c = len('description')
self.contentws.write(row-1, 5, '(run time)', hlight)
w_r = len('(run time)')
self.contentws.write(row-1, 5, 'runtime (h:m:s.ms)', hlight)
w_r = len('runtime (h:m:s.ms)')
for q in sorted(self.content.keys()):
self.contentws.write(row, col, q, right)
......
......@@ -27,8 +27,8 @@ class Gvars:
"""
# version control
s_version = "1.3.2"
s_builddate = '2017-10-10'
s_version = "1.3.3"
s_builddate = '2017-10-12'
s_build = "{}/Sm".format(s_builddate)
s_minPython = "3.4.3"
s_description = "hcprequestanalytics"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment