Commit 8aaca294 authored by Thorsten Simons's avatar Thorsten Simons

1.3.0 - some more xlsx luxury, added more queries, added the ability to dump...

1.3.0 - some more xlsx luxury, added more queries, added the ability to dump the built-in queries to stdout, re-worked the cmd-line parameters (-d is now where it belongs to...)
parent e193f063
Release History
===============
**1.2.3 2017-09-29**
**1.3.0 2017-10-03**
* some more xlsx luxury
* added more queries
* added the ability to dump the built-in queries to stdout
* re-worked the cmd-line parameters (-d is now where it belongs to...)
**1.2.2 2017-09-26**
......
Command Syntax
==============
**hcprequestanalytics** consists of several subcommands, each used for a
specific piece of work. Use ``--help`` (pr ``-h``) for details:
.. code-block:: text
:emphasize-lines: 1
$ hcprequestanalytics -h
usage: hcprequestanalytics [-h] [--version]
{load,analyze,showqueries,dumpqueries} ...
positional arguments:
{load,analyze,showqueries,dumpqueries}
load load the database
analyze analyze the database
showqueries show the available queries
dumpqueries dump the built-in queries to stdout
optional arguments:
-h, --help show this help message and exit
--version show program's version number and exit
load
----
The ``load`` subcommands loads the http gateway logs into a *sqlite3* database
file for later analytics:
.. code-block:: text
:emphasize-lines: 1
$ hcprequestanalytics load -h
usage: hcprequestanalytics load [-h] -d DB logpkg
positional arguments:
logpkg the HCP log package to process
optional arguments:
-h, --help show this help message and exit
-d DB the database file
showqueries
-----------
The ``showqueries`` subcommand shows the avaible queries - the ones built-in
as well as the ones added through the ``-a`` parameter:
.. code-block:: text
:emphasize-lines: 1
$ hcprequestanalytics showqueries -h
usage: hcprequestanalytics showqueries [-h] [-a ADDITIONALQUERIES] [-1]
optional arguments:
-h, --help show this help message and exit
-a ADDITIONALQUERIES a file containg addition queries (see documentation)
-1 print a concatenated list of queries, for easy cut and
paste
analyze
-------
The ``analyze`` subcommand runs queries against the database created with the
``load`` subcommand to create an xlsx file as result. Alternatively, a set of
csv files can be requested as well.
.. code-block:: text
:emphasize-lines: 1
$ hcprequestanalytics analyze -h
usage: hcprequestanalytics analyze [-h] [-a ADDITIONALQUERIES] -d DB
[-p PREFIX] [-c] [--procs PROCESSES]
[queries [queries ...]]
positional arguments:
queries a list of query names, or nothing for "all"; you can
select a group of queries by using the first few
characters followed by an asteriks ('req*' for
example)
optional arguments:
-h, --help show this help message and exit
-a ADDITIONALQUERIES a file containg addition queries (see documentation)
-d DB the database file
-p PREFIX prefix for the output files
-c create CSV files instead of a XLSX file
--procs PROCESSES no. of subprocesses to run, defaults to no. of CPUs
dumpqueries
-----------
The ``dumpqueries`` subcommand dumps the built-in queries to stdout. They can
be used as templates to build own queries for use with the ``-a`` parameter:
.. code-block:: text
:emphasize-lines: 1
$ hcprequestanalytics dumpqueries -h
usage: hcprequestanalytics dumpqueries [-h]
optional arguments:
-h, --help show this help message and exit
This diff is collapsed.
src/docs/source/_static/xlsx.png

127 KB | W: | H:

src/docs/source/_static/xlsx.png

177 KB | W: | H:

src/docs/source/_static/xlsx.png
src/docs/source/_static/xlsx.png
src/docs/source/_static/xlsx.png
src/docs/source/_static/xlsx.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -23,6 +23,7 @@ CSV files can be requested.
:maxdepth: 2
10_install
15_syntax
20_use
30_queries
80_goodtoknow
......
......@@ -35,26 +35,23 @@ def parseargs():
version="%(prog)s: {0}\n"
.format(Gvars.Version))
mp.add_argument('-d', dest='db', required=True,
help='the database file')
mp.add_argument('-a', dest='additionalqueries',
required=False,
help='a file containg addition queries '
'(see documentation)')
sp = mp.add_subparsers(dest='cmd')
# mkdbp = sp.add_parser('mkdb',
# help='setup the database')
loadp = sp.add_parser('load',
help='load the database')
loadp.add_argument('-d', dest='db', required=True,
help='the database file')
loadp.add_argument(dest='logpkg',
help='the HCP log package to process')
analyzep = sp.add_parser('analyze',
help='analyze the database')
analyzep.add_argument('-a', dest='additionalqueries',
required=False,
help='a file containg addition queries '
'(see documentation)')
analyzep.add_argument('-d', dest='db', required=True,
help='the database file')
analyzep.add_argument('-p', dest='prefix', required=False,
default='',
help='prefix for the output files')
......@@ -72,15 +69,21 @@ def parseargs():
help='a list of query names, or nothing for "all"; '
'you can select a group of queries by using the'
' first few characters followed by an asteriks '
'(req* vor example)')
'(\'req*\' for example)')
showqp = sp.add_parser('showqueries',
help='show the available queries')
showqp.add_argument('-a', dest='additionalqueries',
required=False,
help='a file containg addition queries '
'(see documentation)')
showqp.add_argument('-1', dest='oneq', required=False,
default=False, action='store_true',
help='print a concatenated list of queries, for easy '
'cut and paste')
dumpqp = sp.add_parser('dumpqueries',
help='dump the built-in queries to stdout')
result = mp.parse_args()
return result
......@@ -22,24 +22,36 @@
import sys
from time import time
from os.path import exists
from os.path import exists, join, dirname
from hcpreq import parseargs
from hcpreq.db import DB
from hcpreq.logs import Handler
def main():
opts = parseargs()
def opendb(db, addqueries):
"""
Open and check the database, load the queries.
db = DB(opts.db)
:param db: the database file's name
:param addqueries: the name of a file w/ additional queries
:return: a database object
"""
db = DB(db)
db.opendb()
db.checkdb()
db.loadqueries(aq=opts.additionalqueries)
db.loadqueries(aq=addqueries)
return db
def main():
opts = parseargs()
# show the known queries
if opts.cmd == 'showqueries':
db = opendb(':memory:', opts.additionalqueries)
print('available queries:')
for q, txt in sorted(db.listqueries()):
if not opts.oneq:
......@@ -49,11 +61,24 @@ def main():
if opts.oneq:
print()
# dump the built-in queries
if opts.cmd == 'dumpqueries':
db = opendb(':memory:', None)
if getattr(sys, 'frozen', False):
_stdq = join(sys._MEIPASS, 'hcpreq/queries')
else:
_stdq = join(dirname(__file__), 'queries')
with open(_stdq, 'r') as qhdl:
for l in qhdl.readlines():
print(l, end='')
# load the database from an HCP log package
elif opts.cmd == 'load':
if not exists(opts.logpkg):
sys.exit('fatal: log package {} not existent'.format(opts.logpkg))
db.opendb()
db = opendb(opts.db, None)
start1 = time()
l = Handler(opts.logpkg)
infiles = l.unpack()
......@@ -77,7 +102,7 @@ def main():
# run queries against the database
elif opts.cmd == 'analyze':
db.opendb()
db = opendb(opts.db, opts.additionalqueries)
try:
_st = time()
db.mpanalyze(opts.prefix, queries=opts.queries, csvtype=opts.csv,
......
......@@ -200,13 +200,6 @@ class DB():
with ProcessPoolExecutor(max_workers=processes) as executor:
# create a list of all the queries to run
qlist = []
# for qs in sorted(self.queries.c.keys()):
# # filter out the unwanted queries
# if not qs == 'DEFAULT':
# if not queries or qs in queries:
# qlist.append(qs)
print('queries:', queries)
if queries:
for _q in queries:
......@@ -226,16 +219,13 @@ class DB():
if not qs == 'DEFAULT':
qlist.append(qs)
print('qlist:', qlist)
# submit the selected queries to the ProcessPoolExecutor
print('scheduling these queries for analytics using {} parallel '
'process(es):'
.format(processes or cpu_count()))
mps = {}
for q in qlist:
print('\t{:30} - {}'.format(q, self.queries.c.get(q, 'comment')))
print('\t{:30}: {}'.format(q, self.queries.c.get(q, 'comment')))
mps[executor.submit(runquery, self.db, q,
self.queries.c.get(q, 'query'))] = q
......
......@@ -22,20 +22,12 @@
[count]
comment : count all records
comment : No. of records, overall
query : SELECT count(*) FROM logrecs
freeze pane : A5
[count_day]
comment : count-per-day analysis
query : SELECT printf("%%s/%%s", substr(timestampstr, 4, 3),
substr(timestampstr, 1, 2)) AS day,
count(*)
FROM logrecs GROUP BY substr(timestampstr, 0, 7)
freeze pane : A5
[clientip]
comment : per-clientIP analysis
comment : No. of records per client IP address
query : SELECT clientip, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -44,7 +36,7 @@ query : SELECT clientip, count(*),
freeze pane : C5
[clientip_httpcode]
comment : httpcode-per-clientIP analysis
comment : No. of records per http code per client IP address
query : SELECT clientip, httpcode, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -53,7 +45,7 @@ query : SELECT clientip, httpcode, count(*),
freeze pane : D5
[clientip_request_httpcode]
comment : httpcode-per-request-per-clientIP analysis
comment : No. of records per http code per request per client IP address
query : SELECT clientip, request, httpcode, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -62,7 +54,7 @@ query : SELECT clientip, request, httpcode, count(*),
freeze pane : E5
[req]
comment : per-request analysis
comment : No. of records per request
query : SELECT request, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -71,7 +63,7 @@ query : SELECT request, count(*),
freeze pane : C5
[req_httpcode]
comment : httpcode-per-request analysis
comment : No. of records per http code per request
query : SELECT request, httpcode, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -80,7 +72,7 @@ query : SELECT request, httpcode, count(*),
freeze pane : C5
[req_httpcode_node]
comment : node-per-httpcode-per-request analysis
comment : No. of records per node per http code per request
query : SELECT request, httpcode, node, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -89,15 +81,16 @@ query : SELECT request, httpcode, node, count(*),
freeze pane : E5
[node]
comment : per-node analysis
comment : No. of records per node
query : SELECT node, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
max(latency)
FROM logrecs GROUP BY node
freeze pane : C5
[node_req]
comment : node-per-request analysis
comment : No. of records per request per node
query : SELECT node, request, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -106,7 +99,7 @@ query : SELECT node, request, count(*),
freeze pane : D5
[node_req_httpcode]
comment : node-per-request-per-httpcode analysis
comment : No. of records per http code per request per node
query : SELECT node, request, httpcode, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
......@@ -114,46 +107,82 @@ query : SELECT node, request, httpcode, count(*),
FROM logrecs GROUP BY node, request, httpcode
freeze pane : E5
[day]
comment : No. of records per day
query : SELECT printf("%%s/%%s", substr(timestampstr, 4, 3),
substr(timestampstr, 1, 2)) AS day,
count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
max(latency)
FROM logrecs GROUP BY day
freeze pane : C5
[day_req]
comment : request-per-day analysis
comment : No. of records per request per day
query : SELECT printf("%%s/%%s", substr(timestampstr, 4, 3),
substr(timestampstr, 1, 2)) AS day,
request, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
max(latency)
FROM logrecs GROUP BY substr(timestampstr, 0, 7), request
FROM logrecs GROUP BY day, request
freeze pane : D5
[day_hour]
comment : No. of records per hour per day
query : SELECT printf("%%s/%%s", substr(timestampstr, 4, 3),
substr(timestampstr, 1, 2)) AS day,
printf("%%s", substr(timestampstr, 13, 2)) AS hour,
count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
max(latency)
FROM logrecs GROUP BY day, hour
freeze pane : D5
[day_hour_req]
comment : No. of records per request per hour per day
query : SELECT printf("%%s/%%s", substr(timestampstr, 4, 3),
substr(timestampstr, 1, 2)) AS day,
printf("%%s", substr(timestampstr, 13, 2)) AS hour,
request, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
max(latency)
FROM logrecs GROUP BY day, hour, request
freeze pane : E5
[day_req_httpcode]
comment : httpcode-per-request-per-day analysis
comment : No. of records per http code per request per day
query : SELECT printf("%%s/%%s", substr(timestampstr, 4, 3),
substr(timestampstr, 1, 2)) AS day,
request, httpcode, count(*),
min(size), avg(size), max(size),
min(latency), avg(latency),
max(latency)
FROM logrecs GROUP BY substr(timestampstr, 0, 7), request, httpcode
FROM logrecs GROUP BY day, request, httpcode
freeze pane : E5
[size_biggest_500]
comment : the 500 recs with the biggest object size
query : SELECT request, httpcode, node, latency, size, clientip, user,
[500_largest]
comment : The records with the 500 largest requests
query : SELECT request, httpcode, node, latency, size,
size/(latency/1000)/1024 as 'KB/sec', clientip, user,
timestamp, timestampstr, path, namespace
FROM (SELECT * FROM logrecs ORDER BY size DESC LIMIT 500)
ORDER BY request, httpcode, node
freeze pane : C5
freeze pane : D5
[latency_worst_100]
comment : the 100 recs with the worst latency
[500_worst_latency]
comment : The records with the 500 worst latencies
query : SELECT request, httpcode, latency, size, clientip, user,
timestamp, timestampstr, path, namespace
FROM (SELECT * FROM logrecs ORDER BY latency DESC LIMIT 100)
FROM (SELECT * FROM logrecs ORDER BY latency DESC LIMIT 500)
ORDER BY request, httpcode
freeze pane : C5
[percentile_req]
comment : per-request analysis, including percentiles for size and latency
comment : No. of records per request analysis, including percentiles for size and latency
query : SELECT request, count(*),
min(size), avg(size), max(size),
percentile(size, 10) as 'pctl-10 (size)',
......@@ -185,17 +214,17 @@ query : SELECT request, count(*),
FROM logrecs GROUP BY request
freeze pane : C5
[throughput_highest_500]
comment : the 500 requests with the highest throughput (KB/sec) for objects >= 1 Byte
[500_highest_throughput]
comment : The 500 records with the highest throughput (KB/sec) for objects >= 1 Byte
query : SELECT * from
(select request, node, clientip, httpcode,
size/(latency/1000)/1024 as 'KB/sec', size,
latency from logrecs where size >= 1)
order by MB_per_sec desc limit 500;
order by 'KB/sec' desc limit 500;
freeze pane : E5
[percentile_throughput_kb]
comment : per-request analysis, percentiles on throughput (KB/sec) for objects >= 10MB
comment : No. of records per request, with percentiles on throughput (KB/sec) for objects >= 10MB
query : SELECT request,
count(*), min(size), avg(size), max(size),
percentile(size/(latency/1000)/1024, 10) as 'pctl-10 (KB/sec)',
......
# The MIT License (MIT)
#
# Copyright (c) 2017 Thorsten Simons (sw@snomis.de)
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from collections import OrderedDict
......@@ -22,6 +22,7 @@
import sys
import csv
import time
import xlsxwriter
......@@ -36,7 +37,7 @@ class Csv(object):
"""
self.prefix = prefix
def newsheet(self, name, fieldnames):
def newsheet(self, name, fieldnames, **kwargs):
"""
Create a new CSV file.
......@@ -83,6 +84,7 @@ class Xlsx(Csv):
:param prefix: the prefix per generated filename
"""
super().__init__(prefix)
self.content = {}
self.wb = xlsxwriter.Workbook('{}-analyzed.xlsx'.format(prefix))
self.wb.set_properties({'title': 'HCP Request Analytics',
......@@ -94,6 +96,10 @@ class Xlsx(Csv):
'readthedocs.io',
})
self.bold = self.wb.add_format({'bold': True})
self.linkback = self.wb.add_format({'bold': True,
'font_size': 14,
'color': 'blue',
'bg_color': 'yellow'})
self.title0 = self.wb.add_format({'bold': False,
'font_size': 14,
'bg_color': 'yellow'})
......@@ -103,6 +109,10 @@ class Xlsx(Csv):
'bottom': 5})
self.num = self.wb.add_format({'num_format': '#,##0'})
# create the Content sheet
self.contentws = self.wb.add_worksheet(name='CONTENT')
def newsheet(self, name, fieldnames, comment=''):
"""
Create a new worksheet
......@@ -111,13 +121,18 @@ class Xlsx(Csv):
:param fieldnames: a list of field names
:param comment: a comment to be added
"""
self.content[name] = comment
self.fieldnames = fieldnames
self.colw = [len(w) for w in fieldnames]
self.ws = self.wb.add_worksheet(name=name)
# write the comment into the header
# write the comment into the header, plus a link to the CONTENT sheet
self.ws.set_row(0, 20, self.title0)
self.ws.merge_range(0, 0, 0, 9, comment)
self.ws.merge_range(0, 10, 0, 12, '', self.title0)
self.ws.write_url(0, 10, 'internal:CONTENT!B2', self.linkback,
'<<< back to CONTENT <<<')
# insert a spacer row
self.ws.set_row(1, 8, self.title0)
# write the field names
......@@ -135,6 +150,12 @@ class Xlsx(Csv):
"""
vals = [row[x] for x in self.fieldnames]
# save the max. length per row to be able to set column width later
# when we close this sheet
for x in range(0, len(self.fieldnames)):
if len(str(vals[x])) > self.colw[x]:
self.colw[x] = len(str(vals[x]))
self.ws.write_row(self.row, 0, vals, self.num)
self.row += 1
......@@ -144,12 +165,66 @@ class Xlsx(Csv):
:param fp: the cell where to split/freeze the pane
"""
# set column width
for x in range(0, len(self.fieldnames)):
self.ws.set_column(x, x, self.colw[x])
if fp:
self.ws.freeze_panes(fp)
return
def close(self):
"""
Close the object.
"""
Create a Content sheez and close the workbook.
"""
row = 3
col = 1
w_q = w_c = 0
title = self.wb.add_format({'bold': True,
'align': 'center',
'font_size': 14,
'bg_color': 'yellow'})
left = self.wb.add_format({'bold': True,
'align': 'right',
'font_size': 12})
link = self.wb.add_format({'bold': True,
'align': 'left',
'font_size': 12,
'color': 'blue'})
footer = self.wb.add_format({'bold': False,
'align': 'center',
'font_size': 10,
'bg_color': 'yellow'})
# headline
self.contentws.merge_range(1, 1, 1, 3, 'Content', title)
for q in sorted(self.content.keys()):
self.contentws.write(row, col, q, left)
self.contentws.write_url(row, col+2,
'internal:{}!A1'.format(q),
link, self.content[q])
w_q = len(q) if len(q) > w_q else w_q
w_c = len(self.content[q]) if len(self.content[q]) > w_c else w_c
row += 1
# set column width for...
# ...the query
self.contentws.set_column(col, col, w_c*.6)
# ...spacer
self.contentws.set_column(col+1, col+1, 2)
# ...the comment, linking to the respetive sheet
self.contentws.set_column(col+2, col+2, w_c*.9)
# footer
self.contentws.merge_range(row+2, 1, row+2, 3,
'created {}'.format(time.asctime()),
footer)
# make this the visible sheet on workbook open
self.contentws.set_first_sheet()
self.contentws.activate()
self.wb.close()
......@@ -27,8 +27,8 @@ class Gvars:
"""
# version control
s_version = "1.2.2"
s_builddate = '2017-09-26'
s_version = "1.3.0"
s_builddate = '2017-10-03'
s_build = "{}/Sm".format(s_builddate)
s_minPython = "3.4.3"
s_description = "hcprequestanalytics"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment