Commit cf1001fb authored by David Spencer's avatar David Spencer

work in progress

parent 4b1c5dfb
......@@ -4,13 +4,15 @@ Periodically create a set of web pages reporting on SlackBuilds.org
## External requirements
python3
python-requests
python-chardet
idna
python-certifi
Jinja2
MarkupSafe
python3
python-requests
python-chardet
idna
python-certifi
Jinja2
MarkupSafe
libgit2
pygit2
## Running
......
......@@ -12,24 +12,30 @@ import subprocess
import globals
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def getnewest(catnam,prgnam):
def getnewest(repopath,itempath):
"""
Return git log information for the newest commit in catnam/prgnam
Return git log information about the newest commit for 'itempath'
(this is painfully slow)
"""
glog = subprocess.run(
"git log --pretty=\"format:%ai %h %s\" -n 1 {:s}".format(catnam+"/"+prgnam),
"git log --pretty=\"format:%ai %h %s\" -n 1 {:s}".format(itempath),
shell=True,
cwd=globals.sbdir,
cwd=repopath,
check=True,
stdout=subprocess.PIPE
)
# from pygit2 import Repository, GIT_SORT_TOPOLOGICAL, GIT_SORT_REVERSE
# repo = Repository(globals.sbrepo+"/.git")
# for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
# break
# return (commit.hash, commit.authordate, commit.message)
return(glog.stdout.decode("utf-8"))
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def clone_sbo():
"""
......@@ -44,7 +50,7 @@ def clone_sbo():
stderr=subprocess.DEVNULL
)
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def pull_sbo():
"""
......@@ -59,7 +65,7 @@ def pull_sbo():
stderr=subprocess.DEVNULL
)
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def megalog(SEP):
"""
......
......@@ -13,7 +13,31 @@ import datetime
import configparser
import logging
#-----------------------------------------------------------------------
import gitfuncs
#-------------------------------------------------------------------------------
# For details of these global structures, see the relevant modules. They
# contain data correlated from multiple sources, so for example, to get info
# about the maintainers from Repology, we need to have a list of maintainers,
# and that list can only come from the cloned SlackBuilds repo. And that is
# why these structures are globals...
SBinfo = {}
SBcatindex = {}
SBmntindex = {}
Maintainerinfo = {}
Maintainerindex = {}
Problems = {}
#-------------------------------------------------------------------------------
# Create a timestamp for this run
updateref = datetime.datetime.now(datetime.timezone.utc)
#-------------------------------------------------------------------------------
# Create the configuration variables
def configulator(inidict, varname, defaultvalue=""):
"""
......@@ -26,14 +50,6 @@ def configulator(inidict, varname, defaultvalue=""):
value = defaultvalue
return value
#-----------------------------------------------------------------------
# Create a timestamp for this run
updateref = datetime.datetime.now(datetime.timezone.utc)
#-----------------------------------------------------------------------
# Create the configuration variables
config = configparser.ConfigParser()
config.read("sbodash.ini")
......@@ -86,13 +102,20 @@ os.makedirs(logsdir,exist_ok=True)
if loglevel not in ( "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" ):
sys.exit("Not a valid log level: {:s}".format(loglevel))
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
# Get our version identifier
ident = "sbodash {:s}".format(gitfuncs.getnewest(dashdir,".").split(" ")[3])
#-------------------------------------------------------------------------------
# Set up logging, and log some debug info
logging.basicConfig( filename=os.path.join(logsdir,"sbodash_{:s}.log".format(updateref.strftime("%Y-%m-%d"))),
format="%(asctime)s %(funcName)s %(message)s",
level=getattr(logging,loglevel) )
level=getattr(logging,loglevel)
)
logging.debug(ident)
logging.debug("configuration:")
logging.debug(" {:s} = {:s}".format("updateref",updateref.strftime("%Y-%m-%d %T")))
logging.debug(" {:s} = {:s}".format("dashdir", dashdir))
......@@ -105,4 +128,4 @@ logging.debug(" {:s} = {:d}".format("keepstats",keepstats))
logging.debug(" {:s} = {:s}".format("logsdir", logsdir))
logging.debug(" {:s} = {:s}".format("loglevel", loglevel))
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
This diff is collapsed.
......@@ -17,144 +17,146 @@ import datetime
import requests
import globals
import utils
import stats
import slackbuilds
Problems = []
import utils
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
# Problems[prgnam] = { [ "type": "problem type",
# "desc": "description text",
# "source": "source",
# "since": "datestring",
# "status": "new/fixed" ] }
#-------------------------------------------------------------------------------
def catnamprgnam(p):
def fetch_repology_problems():
"""
Function for sorting the 'problems' dict by keys 'catnam' and 'prgnam'
Retrieves repology problems for the 'slackbuilds' repo.
Returns the fetched data.
"""
return p["catnam"]+"/"+p["prgnam"]
repo = "slackbuilds"
repologyAPI = "https://repology.org/api/v1/repository/{:s}/problems".format(repo)
#-----------------------------------------------------------------------
# The API spec doesn't say whether the 'problems' result set is is chunked.
# In practice it seems to be truncated at 500 problems, so let's not go there.
return requests.get(repologyAPI).json()
def process_repology_problems(rplist,data_updated):
#-------------------------------------------------------------------------------
def process_repology_problems(rp_data):
"""
Get a list of problems from Repology for the specified repo.
Returns a list of dicts:
{ "catnam":"SBo category", "prgnam":"SBo prgnam", "maintainer":"email",
"problem":"problem type", "description":"description text",
"source":"repology" "since":"datestring" }
Reads the raw Repology problems data 'rp_data'.
Returns a dict suitable for merging into the 'Problems' global list.
"""
logging.debug("started")
rp_problems = {}
problems = []
for p in rplist:
for p in rp_data:
prgnam = p["name"]
catnam = slackbuilds.SBinfo[prgnam]["catnam"]
maintainer = slackbuilds.SBinfo[prgnam]["maintainer"]
email = slackbuilds.SBinfo[prgnam]["email"]
catnam = globals.SBinfo[prgnam]["catnam"]
maintainer = globals.SBinfo[prgnam]["maintainer"]
email = globals.SBinfo[prgnam]["email"]
# All Repology problems currently start with "Homepage link" and
# a URL, but we'll classify any that don't as "other". To make the
# included URL clickable we need to remove the surrounding quotes.
hstr = "Homepage link \""
if p["problem"].startswith(hstr):
problem = "homepage"
problemtype = "homepage"
description = p["problem"][len(hstr):].replace("\" "," ",1)
else:
problem = "other"
problemtype = "other"
description = p["problem"]
problems.append({ "prgnam": prgnam,
"catnam": catnam,
"maintainer": maintainer,
"email": email,
"problem": problem,
"description": description,
"source": "repology",
"since": data_updated.strftime("%Y-%m-%d")
})
rp_problems[prgnam].update({ "type": problemtype,
"desc": description,
"source": "repology",
"since": data_updated.strftime("%Y-%m-%d"),
"status": "new" })
return sorted(problems,key=catnamprgnam)
logging.debug("finished -- {:d} repology problems".format(len(rp_problems)))
return rp_problems
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def get_repology_problems():
def merge_problems(problems):
"""
Retrieve repology problems from remote server
Merge 'problems' into 'Problems'.
"""
repologyAPI = "https://repology.org/api/v1/repository/{:s}/problems".format(repo)
# The API spec doesn't say whether the 'problems' result set is is chunked.
# In practice it seems to be truncated at 500 problems, so let's not go there.
repology_problems = requests.get(repologyAPI).json()
for prgnam, problist in problems.items():
if prgnam not in Problems:
Problems.update( {prgnam,problist} )
else:
newproblist = Problems[prgnam]
for p in problist:
if p not in newproblist:
newproblist.append(p)
Problems.update( {prgnam,newproblist} )
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def load_repology_problems():
"""
Load repology problems into 'Problems'
Load repology problems into 'Problems'.
"""
global Problems
logging.info("started")
repo = "slackbuilds"
datafile = "repology_problems.p"
updatefile = "repology_problems_updated.p"
prev_repology_updated = utils.load(updatefile)
if prev_repology_updated is None or prev_repology_updated + datetime.timedelta(days=globals.upd_repgy) < globals.updateref:
logging.info("update needed; retrieving from Repology")
logging.debug("retrieved {:d} problems from repology".format(len(repology_problems)))
utils.save(repology_problems,datafile)
data_updated = globals.updateref
utils.save(data_updated,updatefile)
else:
logging.debug("update not needed")
repology_problems=utils.load(datafile)
if repology_problems is None:
logging.error("Error: failed to load old repology problems")
return
logging.debug("loaded {:d} problems from storage".format(len(repology_problems)))
data_updated = prev_repology_updated
if len(repology_problems) == 0:
return
Problems = process_repology_problems(repology_problems,data_updated)
rp_data = utils.load( "repology_problems.p",
globals.upd_repgy,
fetch_repology_problems
)
probcount = len(Problems)
rp_problems = process_repology_problems(rp_data)
probcount = len(rp_problems)
stats.setStats("problems.repology.count",probcount)
merge_problems(rp_problems)
stats.addStats("problems.count",probcount)
logging.info("finished -- {:d} problems".format(probcount))
#-----------------------------------------------------------------------
logging.info("finished -- {:d} repology problems".format(probcount))
#-------------------------------------------------------------------------------
def load_download_problems():
pass
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def load_security_problems():
pass
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def load_build_problems():
pass
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def load_problems():
"""
Run all the "load" functions.
"""
for f in ( load_repology_problems,
load_download_problems,
load_security_problems,
load_build_problems
):
f()
#-------------------------------------------------------------------------------
def render_problems():
def render_problems_report():
"""
Render the problems report page
Render the problems report page.
"""
global Problems
logging.info("started")
utils.renderer( page_subdir="reports",
page_name="problems.html",
page_title="SBo Problems Report",
TemplateData=Problems )
TemplateData=globals.Problems )
logging.info("finished")
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
#!/usr/bin/python3 -B
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
# SBo Dashboard
# sbodash
# Top level command
......@@ -8,34 +8,31 @@
# David Spencer 2018
# See LICENCE for copyright information
# See README.md for external requirements
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
import logging
import globals
import stats
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
logging.info("RUN STARTED")
from slackbuilds import load_slackbuilds, render_slackbuild_detail
load_slackbuilds()
render_slackbuild_detail()
from slackbuilds import load_slackbuilds, render_slackbuilds_detail
from maintainers import load_maintainers, render_maintainers_report, render_maintainers_detail
from problems import load_problems, render_problems
from support import render_support, render_indexes
from maintainers import load_maintainers, render_maintainers, render_maintainer_detail
load_slackbuilds()
load_maintainers()
render_maintainers()
render_maintainer_detail()
load_problems()
from problems import load_repology_problems, load_download_problems, load_security_problems, load_build_problems, render_problems
load_repology_problems()
load_download_problems()
load_security_problems()
load_build_problems()
render_problems()
render_slackbuilds_detail()
render_maintainers_report()
render_maintainers_detail()
render_problems_report()
from support import render_support, render_indexes
render_support()
render_indexes()
......@@ -43,4 +40,4 @@ stats.saveStatsHistory()
logging.info("RUN FINISHED")
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
;-----------------------------------------------------------------------
;-------------------------------------------------------------------------------
; SBo Dashboard
; sbo-dashboard.ini -- config file
;
......@@ -15,7 +15,7 @@
; for example
; export SBDIR=/store/slackbuilds
;
;-----------------------------------------------------------------------
;-------------------------------------------------------------------------------
[Locations]
......@@ -38,7 +38,7 @@ sitedir = site
; The rendered site is served at this URL
siteurl = http://localhost/sbodash
;-----------------------------------------------------------------------
;-------------------------------------------------------------------------------
[Logging]
......@@ -48,7 +48,7 @@ logsdir = logs
; Standard Python log level: DEBUG INFO WARNING ERROR CRITICAL
loglevel = INFO
;-----------------------------------------------------------------------
;-------------------------------------------------------------------------------
[Updates]
......@@ -58,11 +58,11 @@ upd_sbo = 7
; minimum interval for fetching maintainers and problems from repology (days)
upd_repgy = 7
;-----------------------------------------------------------------------
;-------------------------------------------------------------------------------
[Statistics]
; Statistics retention period (days)
keepstats = 366
;-----------------------------------------------------------------------
;-------------------------------------------------------------------------------
"""
SBo Dashboard
slackbuilds.py
Functions and dict for accessing a local SlackBuilds.org clone
Functions and dicts for a local SlackBuilds.org clone
David Spencer 2018
See LICENCE for copyright information
......@@ -14,22 +14,41 @@ import subprocess
import logging
import datetime
import gitfuncs
import globals
import stats
import utils
import gitfuncs
SBinfo = {}
SBindex = []
#-----------------------------------------------------------------------
def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
#-------------------------------------------------------------------------------
# SBinfo = { prgnam }
#
# lookup catnam, get a list of prgnams
# SBcatindex[catnam] = [ prgnam1, prgnam2... ]
#
# lookup maintainer email, get list of prgnams, names, etc
# SBmntindex[rawemail] = { "email": email,
# "redact": redact,
# "mntnamlist": [mntnam1, mntnam2...],
# "prgnamlist": [prgnam1, prgnam2...] }
#-------------------------------------------------------------------------------
def fetch_slackbuilds():
"""
Read data from SlackBuilds info files and git into SBinfo and SBindex
and save them into the persistent data files
Create or update a clone of the SBo repo.
Returns a dictionary of information about each prgnam in the repo,
suitable for loading directly into the SBinfo global.
"""
global SBinfo, SBindex
logging.info("started")
sbdata = {}
if os.path.isfile(os.path.join(globals.sbdir,"ChangeLog.txt")):
logging.info("pulling from SBo repo")
gitfuncs.pull_sbo()
else:
logging.info("cloning from SBo repo")
gitfuncs.clone_sbo()
logging.debug("reading .info files")
# These regexes are used >7000 times, so we might as well compile them.
......@@ -39,8 +58,6 @@ def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
re_email = re.compile(r"EMAIL=\".*\"")
infocount = 0
catnamlist = []
for infofile in sorted(glob.glob(globals.sbdir+"/*/*/*.info")):
infocount += 1
logging.debug(infofile)
......@@ -70,9 +87,9 @@ def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
logging.warn("{:s} has missing or broken or duplicated entries".format(infofile))
continue
gitnewest = gitfuncs.getnewest(catnam,prgnam)
gitnewest = gitfuncs.getnewest(sbodir,catnam+"/"+prgnam)
SBinfo.update({ prgnam: { "catnam": catnam,
sbdata.update({ prgnam: { "catnam": catnam,
"version": version,
"homepage": homepage,
"maintainer": maintainer,
......@@ -80,62 +97,86 @@ def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
"gitnewest": gitnewest
} })
if catnam not in catnamlist:
catnamlist.append(catnam)
SBindex.append({"catnam":catnam, "prgnamlist":[prgnam]})
logging.info("finished -- {:d} info files".format(infocount))
return sbdata
#-------------------------------------------------------------------------------
def process_slackbuilds(sbdata):
"""
Populates the SBinfo and associated globals from the supplied data structure.
Returns nothing.
"""
logging.debug("started")
# (1) create SBinfo
globals.SBinfo = sbdata
# well, that was easy :p
# (2) create SBprgindex and SBcatindex
globals.SBprgindex = {}
globals.SBcatindex = {}
for prgnam, prginfo in sbdata.items():
catnam = prginfo["catnam"]
globals.SBprgindex.update({prgnam: catnam})
if catnam not in globals.SBcatindex:
globals.SBcatindex.update({catnam: [prgnam]})
else:
SBindex[-1]["prgnamlist"].append(prgnam)
globals.SBcatindex[catnam].append(prgnam)
if len(globals.SBprgindex) != len(globals.SBinfo):
logging.warn("{:d} duplicate prgnams in slackbuilds repo".format(infocount-len(globals.SBinfo)))
if infocount != len(SBinfo):
logging.warn("{:d} duplicate prgnams in slackbuilds repo".format(infocount-len(SBinfo)))
# (3) create SBmntindex
globals.SBmntindex = {}
utils.save(SBinfo,sbinfofile)
utils.save(SBindex,sbindexfile)
utils.save(globals.updateref,updatefile)
for prgnam, prginfo in sbdata.items():
stats.setStats("slackbuilds.count",infocount)
logging.debug("finished -- {:d} .info files".format(infocount))
rawemail = prginfo["email"]
email = utils.unspamtrap(rawemail)
redact = email != rawemail
#-----------------------------------------------------------------------
if rawemail in SBmntindex:
SBmntindex[rawemail]["prgnamlist"].append(prgnam)
if mntnam not in SBmntindex[rawemail]["mntnamlist"]:
SBmntindex[rawemail]["mntnamlist"].append(mntnam)
else:
SBmntindex.update( { rawemail,
{ "email": email, "redact": redact,
"mntnamlist": [mntnam],
"prgnamlist": [prgnam] }
} )
logging.debug("finished -- {:d} slackbuilds".format(prgnamcount))
#-------------------------------------------------------------------------------
def load_slackbuilds():
"""
Update the local slackbuilds git repo from remote, and load the data
Load the slackbuilds data.
"""
global SBinfo, SBindex
logging.info("started")
sbinfofile = "slackbuilds_SBinfo.p"
sbindexfile = "slackbuilds_SBindex.p"
updatefile = "slackbuilds_updated.p"
slackbuilds_data = utils.load( "slackbuilds_data.p",
globals.upd_sbo,
fetch_slackbuilds
)
if not os.path.isfile(os.path.join(globals.sbdir,"ChangeLog.txt")):
logging.info("cloning from SBo repo")
gitfuncs.clone_sbo()
process_slackbuilds(sbinfofile,sbindexfile,updatefile)
else:
prev_updated_sbo = utils.load(updatefile)
if prev_updated_sbo is None or prev_updated_sbo + datetime.timedelta(days=globals.upd_sbo) < globals.updateref:
logging.info("update needed; pulling from SBo repo")
gitfuncs.pull_sbo()
process_slackbuilds(sbinfofile,sbindexfile,updatefile)
else:
SBinfo=utils.load(sbinfofile)
SBindex=utils.load(sbindexfile)
logging.debug("update not needed")
process_slackbuilds(slackbuilds_data)
stats.setStats("slackbuilds.count",len(slackbuilds_data))
logging.info("finished")
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
def render_slackbuild_detail():
def render_slackbuilds_detail():
"""
Render all the slackbuild detail pages
"""
global SBinfo, SBindex
logging.info("started")
for prgnam, prginfo in SBinfo.items():
for prgnam, prginfo in globals.SBinfo.items():
catnam = prginfo["catnam"]
logging.debug(catnam+"/"+prgnam)
gitnewest = prginfo["gitnewest"]
......@@ -156,9 +197,10 @@ def render_slackbuild_detail():
utils.renderer( page_subdir="slackbuilds/"+catnam,
page_name=prgnam,
page_title=catnam+"/"+prgnam,
template="slackbuild_detail",
TemplateData=SlackbuildDetail )
template="slackbuilds_detail",
TemplateData=SlackbuildDetail
)
logging.info("finished")
#-----------------------------------------------------------------------
#-------------------------------------------------------------------------------
body { font-family: Sans-Serif; font-size: 75%; text-align: left; }
body { font-family: Sans-Serif; font-size: 75%; text-align: left; }
nav img { width: 1.2em; height: 1.2em; margin-right: 0.5em; vertical-align: text-bottom }
footer { font-style: italic; font-size: 0.8em; margin-top: 3rem; }
table { border: 1px solid; border-collapse: collapse }
th, td { padding: 0.5rem; border: 1px solid }
.m-inactive { background-color: #ffdddd }
.m-normal { background-color: #ffffff }
.m-recent { background-color: #ddffdd }
.row-inactive { background-color: #ffdddd }
.row-normal { background-color: #ffffff }
.row-recent { background-color: #ddffdd }
.row-new { background-color: #ffffdd }
.row-fixed { background-color: #ddffdd }
.m { padding:0.5rem; border:1px solid }
.l { text-align: center; padding: 0.5rem; border: 1px solid }
.s { text-align: center; padding: 0.5rem; border: 1px solid }
.n { text-align: center; padding: 0.5rem; border: 1px solid }
.np { text-align: right; padding: 0.5rem; border: 1px solid }
.o { text-align: center; padding: 0.5rem; border: 1px solid }
.op { text-align: right; padding: 0.5rem; border: 1px solid }
.col-text { padding:0.5rem; border:1px solid }
.col-date { text-align: center; padding: 0.5rem; border: 1px solid }
.col-count { text-align: center; padding: 0.5rem; border: 1px solid }
.col-percent { text-align: center; padding: 0.5rem; border: 1px solid }
div.cols { column-width: 10rem }
.cols p { margin: 0 0 0.5em 0; padding: 0 0 0 0 }
#maintainers div.multicol { column-width: 20rem }
#slackbuilds div.multicol { column-width: 10rem }
.multicol p { margin: 0 0 0.5em 0; padding: 0 0 0 0 }