Commit 28b18724 authored by David Spencer's avatar David Spencer

work in progress

parent 4b1c5dfb
......@@ -4,13 +4,15 @@ Periodically create a set of web pages reporting on SlackBuilds.org
## External requirements
python3
python-requests
python-chardet
idna
python-certifi
Jinja2
MarkupSafe
python3
python-requests
python-chardet
idna
python-certifi
Jinja2
MarkupSafe
libgit2
pygit2
## Running
......
......@@ -14,15 +14,15 @@ import globals
#-----------------------------------------------------------------------
def getnewest(catnam,prgnam):
def getnewest(repopath,itempath):
"""
Return git log information for the newest commit in catnam/prgnam
Return git log information about the newest commit for 'itempath'
(this is painfully slow)
"""
glog = subprocess.run(
"git log --pretty=\"format:%ai %h %s\" -n 1 {:s}".format(catnam+"/"+prgnam),
"git log --pretty=\"format:%ai %h %s\" -n 1 {:s}".format(itempath),
shell=True,
cwd=globals.sbdir,
cwd=repopath,
check=True,
stdout=subprocess.PIPE
)
......
......@@ -13,6 +13,8 @@ import datetime
import configparser
import logging
import gitfuncs
#-----------------------------------------------------------------------
def configulator(inidict, varname, defaultvalue=""):
......@@ -86,13 +88,20 @@ os.makedirs(logsdir,exist_ok=True)
if loglevel not in ( "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" ):
sys.exit("Not a valid log level: {:s}".format(loglevel))
#-----------------------------------------------------------------------
# Get our version identifier
ident = "sbodash {:s}".format(gitfuncs.getnewest(dashdir,".").split(" ")[3])
#-----------------------------------------------------------------------
# Set up logging, and log some debug info
logging.basicConfig( filename=os.path.join(logsdir,"sbodash_{:s}.log".format(updateref.strftime("%Y-%m-%d"))),
format="%(asctime)s %(funcName)s %(message)s",
level=getattr(logging,loglevel) )
level=getattr(logging,loglevel)
)
logging.debug(ident)
logging.debug("configuration:")
logging.debug(" {:s} = {:s}".format("updateref",updateref.strftime("%Y-%m-%d %T")))
logging.debug(" {:s} = {:s}".format("dashdir", dashdir))
......
......@@ -27,6 +27,26 @@ Maintainerindex = []
#-----------------------------------------------------------------------
def setlatest(mntnam,cdate,commit):
"""
Record commit details into Maintainerinfo[mntnam]
"""
global Maintainerinfo, Maintainerindex
if "latest" not in Maintainerinfo[mntnam].keys() or Maintainerinfo[mntnam]["latest"]["date"] < cdate:
Maintainerinfo[mntnam]["latest"] = {"date":cdate, "commit":commit}
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
def fetch_maintainers():
"""
Retrieves repology maintainers for the 'slackbuilds' repo.
Returns the fetched data.
"""
def get_repology_by_maintainer(email,sess):
"""
Get relevant details of packages maintained by 'email' from Repology API
......@@ -62,25 +82,12 @@ def get_repology_by_maintainer(email,sess):
return (total,newest,outdated)
def get_repology_all_maintainers(sess):
"""
"""
#-----------------------------------------------------------------------
def setlatest(mntnam,cdate,commit):
def process_maintainers(maintainerdata):
"""
Record commit details into Maintainerinfo[mntnam]
Reads the Repology maintainer data into the Maintainerinfo dict.
"""
global Maintainerinfo, Maintainerindex
if "latest" not in Maintainerinfo[mntnam].keys() or Maintainerinfo[mntnam]["latest"]["date"] < cdate:
Maintainerinfo[mntnam]["latest"] = {"date":cdate, "commit":commit}
#-----------------------------------------------------------------------
def process_maintainers():
"""
......@@ -238,46 +245,30 @@ def process_maintainers():
logging.info("finished -- ${:d} maintainers".format(mntnum))
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
def load_maintainers():
"""
Load maintainers from repology into 'Maintainerinfo'
Load maintainers into 'Maintainerinfo' and 'Maintainerindex'.
"""
global Maintainerinfo, Maintainerindex
logging.info("started")
repo = "slackbuilds"
datafile = "repology_maintainers.p"
updatefile = "repology_maintainers_updated.p"
prev_repology_updated = utils.load(updatefile)
if prev_repology_updated is None or prev_repology_updated + datetime.timedelta(days=globals.upd_repgy) < globals.updateref:
logging.info("update needed; retrieving from Repology")
repology_maintainers = ########
logging.debug("retrieved {:d} maintainers from repology".format(len(repology_maintainers)))
data_updated = updateref
utils.save(data_updated,updatefile)
utils.save(repology_maintainers,datafile)
else:
logging.debug("update not needed")
repology_maintainers = utils.load(datafile)
if repology_maintainers is None:
logging.error("Error: failed to load old repology maintainers")
return
logging.debug("loaded {:d} maintainers from storage".format(len(repology_maintainers)))
data_updated = prev_repology_updated
if len(repology_maintainers) == 0:
return
Maintainerinfo = process_maintainers(repology_maintainers)
maintainers_data = utils.load( "maintainers_data.p",
globals.upd_repgy,
fetch_maintainers
)
Maintainerinfo = process_maintainers(repology_problem_data)
mntcount = len(Maintainerinfo)
stats.setStats("maintainers.count",mntcount)
logging.info("finished -- {:d} maintainers".format(mntcount))
#-----------------------------------------------------------------------
def render_maintainers():
def render_maintainers_report():
"""
Render the maintainers report page
"""
......@@ -293,22 +284,32 @@ def render_maintainers():
#-----------------------------------------------------------------------
def render_maintainer_detail():
def render_maintainers_detail():
"""
Render all the maintainer detail pages
"""
global Maintainerinfo, Maintainerindex
logging.info("started")
for mntnam in SBinfo.Maintainerinfo.items():
for mntnam, mntinfo in SBinfo.Maintainerinfo.items():
email_list = []
newest_commit = []
slackbuild_list = []
problem_list = []
MaintainerDetail = [] #### temporary
MaintainerDetail = { "email_list": email_list,
"newest_commit": newest_commit,
"slackbuild_list": slackbuild_list,
"problem_list": problem_list
}
utils.renderer( page_subdir="maintainers",
page_name=email, #### needs to be Unix-safe
page_name=email,
page_title=mntnam,
template="maintainer_detail",
TemplateData=MaintainerDetail )
template="maintainers_detail",
TemplateData=MaintainerDetail
)
logging.info("finished")
......
......@@ -27,23 +27,38 @@ Problems = []
def catnamprgnam(p):
"""
Function for sorting the 'problems' dict by keys 'catnam' and 'prgnam'
Function for sorting a dict by keys 'catnam' and 'prgnam'
(this is used specifically for sorting 'problems with a small p')
"""
return p["catnam"]+"/"+p["prgnam"]
#-----------------------------------------------------------------------
def process_repology_problems(rplist,data_updated):
def fetch_repology_problems():
"""
Get a list of problems from Repology for the specified repo.
Returns a list of dicts:
Retrieves repology problems for the 'slackbuilds' repo.
Returns the fetched data.
"""
repo = "slackbuilds"
repologyAPI = "https://repology.org/api/v1/repository/{:s}/problems".format(repo)
# The API spec doesn't say whether the 'problems' result set is is chunked.
# In practice it seems to be truncated at 500 problems, so let's not go there.
return requests.get(repologyAPI).json()
#-----------------------------------------------------------------------
def process_repology_problems(rp_data):
"""
Reads the raw Repology problems data 'rp_data'.
Returns a list of dicts suitable for merging into the 'Problems' global list:
{ "catnam":"SBo category", "prgnam":"SBo prgnam", "maintainer":"email",
"problem":"problem type", "description":"description text",
"source":"repology" "since":"datestring" }
"source":"repology", "since":"datestring" }
"""
problems = []
for p in rplist:
for p in rp_data:
prgnam = p["name"]
catnam = slackbuilds.SBinfo[prgnam]["catnam"]
......@@ -75,55 +90,23 @@ def process_repology_problems(rplist,data_updated):
#-----------------------------------------------------------------------
def get_repology_problems():
"""
Retrieve repology problems from remote server
"""
repologyAPI = "https://repology.org/api/v1/repository/{:s}/problems".format(repo)
# The API spec doesn't say whether the 'problems' result set is is chunked.
# In practice it seems to be truncated at 500 problems, so let's not go there.
repology_problems = requests.get(repologyAPI).json()
#-----------------------------------------------------------------------
def load_repology_problems():
"""
Load repology problems into 'Problems'
Load repology problems into 'Problems'.
"""
global Problems
logging.info("started")
repo = "slackbuilds"
datafile = "repology_problems.p"
updatefile = "repology_problems_updated.p"
prev_repology_updated = utils.load(updatefile)
if prev_repology_updated is None or prev_repology_updated + datetime.timedelta(days=globals.upd_repgy) < globals.updateref:
logging.info("update needed; retrieving from Repology")
logging.debug("retrieved {:d} problems from repology".format(len(repology_problems)))
utils.save(repology_problems,datafile)
data_updated = globals.updateref
utils.save(data_updated,updatefile)
else:
logging.debug("update not needed")
repology_problems=utils.load(datafile)
if repology_problems is None:
logging.error("Error: failed to load old repology problems")
return
logging.debug("loaded {:d} problems from storage".format(len(repology_problems)))
data_updated = prev_repology_updated
if len(repology_problems) == 0:
return
repology_problem_data = utils.load( "repology_problems.p",
globals.upd_repgy,
fetch_repology_problems
)
Problems = process_repology_problems(repology_problems,data_updated)
probcount = len(Problems)
Problems = process_repology_problems(repology_problem_data)
probcount = len(repology_problem_data)
stats.setStats("problems.repology.count",probcount)
stats.addStats("problems.count",probcount)
logging.info("finished -- {:d} problems".format(probcount))
#-----------------------------------------------------------------------
......@@ -143,9 +126,22 @@ def load_build_problems():
#-----------------------------------------------------------------------
def render_problems():
def load_problems():
"""
Run all the "load" functions.
"""
for f in ( load_repology_problems,
load_download_problems,
load_security_problems,
load_build_problems
):
f()
#-----------------------------------------------------------------------
def render_problems_report():
"""
Render the problems report page
Render the problems report page.
"""
global Problems
logging.info("started")
......
......@@ -19,23 +19,20 @@ import stats
logging.info("RUN STARTED")
from slackbuilds import load_slackbuilds, render_slackbuild_detail
load_slackbuilds()
render_slackbuild_detail()
from slackbuilds import load_slackbuilds, render_slackbuilds_detail
from maintainers import load_maintainers, render_maintainers_report, render_maintainers_detail
from problems import load_problems, render_problems
from support import render_support, render_indexes
from maintainers import load_maintainers, render_maintainers, render_maintainer_detail
load_slackbuilds()
load_maintainers()
render_maintainers()
render_maintainer_detail()
load_problems()
from problems import load_repology_problems, load_download_problems, load_security_problems, load_build_problems, render_problems
load_repology_problems()
load_download_problems()
load_security_problems()
load_build_problems()
render_problems()
render_slackbuilds_detail()
render_maintainers_report()
render_maintainers_detail()
render_problems_report()
from support import render_support, render_indexes
render_support()
render_indexes()
......
......@@ -20,16 +20,28 @@ import utils
import gitfuncs
SBinfo = {}
SBindex = []
# SBcatindex is like a secondary index on SBinfo for catnam :)
SBcatindex = []
#-----------------------------------------------------------------------
def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
def fetch_slackbuilds():
"""
Read data from SlackBuilds info files and git into SBinfo and SBindex
and save them into the persistent data files
Create or update a clone of the SBo repo.
Returns a dictionary of information about each prgnam in the repo,
suitable for loading directly into the SBinfo global.
"""
global SBinfo, SBindex
sbdata = {}
if os.path.isfile(os.path.join(globals.sbdir,"ChangeLog.txt")):
logging.info("pulling from SBo repo")
gitfuncs.pull_sbo()
else:
logging.info("cloning from SBo repo")
gitfuncs.clone_sbo()
logging.debug("reading .info files")
# These regexes are used >7000 times, so we might as well compile them.
......@@ -39,8 +51,6 @@ def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
re_email = re.compile(r"EMAIL=\".*\"")
infocount = 0
catnamlist = []
for infofile in sorted(glob.glob(globals.sbdir+"/*/*/*.info")):
infocount += 1
logging.debug(infofile)
......@@ -70,9 +80,9 @@ def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
logging.warn("{:s} has missing or broken or duplicated entries".format(infofile))
continue
gitnewest = gitfuncs.getnewest(catnam,prgnam)
gitnewest = gitfuncs.getnewest(sbodir,catnam+"/"+prgnam)
SBinfo.update({ prgnam: { "catnam": catnam,
sbdata.update({ prgnam: { "catnam": catnam,
"version": version,
"homepage": homepage,
"maintainer": maintainer,
......@@ -80,59 +90,68 @@ def process_slackbuilds(sbinfofile,sbindexfile,updatefile):
"gitnewest": gitnewest
} })
logging.info("finished -- {:d} info files".format(infocount))
return sbdata
#-----------------------------------------------------------------------
def process_slackbuilds(sbdata):
"""
Populates the SBinfo and SBcatindex globals from the supplied data structure.
Returns nothing.
"""
global SBinfo, SBcatindex
logging.debug("started")
SBinfo = {}
SBcatindex = []
catnamlist = []
prgnamlist = []
for prgnam, prginfo in sbdata.items():
if prgnam not in prgnamlist:
prgnamlist.append(prgnam)
catnam = prginfo["catnam"]
if catnam not in catnamlist:
catnamlist.append(catnam)
SBindex.append({"catnam":catnam, "prgnamlist":[prgnam]})
SBcatindex.append({"catnam":catnam, "prgnamlist":[prgnam]})
else:
SBindex[-1]["prgnamlist"].append(prgnam)
SBcatindex[-1]["prgnamlist"].append(prgnam)
if infocount != len(SBinfo):
if len(prgnamlist) != len(SBinfo):
logging.warn("{:d} duplicate prgnams in slackbuilds repo".format(infocount-len(SBinfo)))
utils.save(SBinfo,sbinfofile)
utils.save(SBindex,sbindexfile)
utils.save(globals.updateref,updatefile)
SBinfo = sbdata
stats.setStats("slackbuilds.count",infocount)
logging.debug("finished -- {:d} .info files".format(infocount))
logging.debug("finished -- {:d} slackbuilds".format(prgnamcount))
#-----------------------------------------------------------------------
def load_slackbuilds():
"""
Update the local slackbuilds git repo from remote, and load the data
Load the slackbuilds data into 'SBinfo' and 'SBcatindex'.
"""
global SBinfo, SBindex
global SBinfo, SBcatindex
logging.info("started")
sbinfofile = "slackbuilds_SBinfo.p"
sbindexfile = "slackbuilds_SBindex.p"
updatefile = "slackbuilds_updated.p"
slackbuilds_data = utils.load( "slackbuilds_data.p",
globals.upd_sbo,
fetch_slackbuilds
)
if not os.path.isfile(os.path.join(globals.sbdir,"ChangeLog.txt")):
logging.info("cloning from SBo repo")
gitfuncs.clone_sbo()
process_slackbuilds(sbinfofile,sbindexfile,updatefile)
else:
prev_updated_sbo = utils.load(updatefile)
if prev_updated_sbo is None or prev_updated_sbo + datetime.timedelta(days=globals.upd_sbo) < globals.updateref:
logging.info("update needed; pulling from SBo repo")
gitfuncs.pull_sbo()
process_slackbuilds(sbinfofile,sbindexfile,updatefile)
else:
SBinfo=utils.load(sbinfofile)
SBindex=utils.load(sbindexfile)
logging.debug("update not needed")
process_slackbuilds(slackbuilds_data)
stats.setStats("slackbuilds.count",len(slackbuilds_data))
logging.info("finished")
#-----------------------------------------------------------------------
def render_slackbuild_detail():
def render_slackbuilds_detail():
"""
Render all the slackbuild detail pages
"""
global SBinfo, SBindex
global SBinfo, SBcatindex
logging.info("started")
for prgnam, prginfo in SBinfo.items():
......@@ -156,8 +175,9 @@ def render_slackbuild_detail():
utils.renderer( page_subdir="slackbuilds/"+catnam,
page_name=prgnam,
page_title=catnam+"/"+prgnam,
template="slackbuild_detail",
TemplateData=SlackbuildDetail )
template="slackbuilds_detail",
TemplateData=SlackbuildDetail
)
logging.info("finished")
......
body { font-family: Sans-Serif; font-size: 75%; text-align: left; }
body { font-family: Sans-Serif; font-size: 75%; text-align: left; }
nav img { width: 1.2em; height: 1.2em; margin-right: 0.5em; vertical-align: text-bottom }
footer { font-style: italic; font-size: 0.8em; margin-top: 3rem; }
table { border: 1px solid; border-collapse: collapse }
th, td { padding: 0.5rem; border: 1px solid }
.m-inactive { background-color: #ffdddd }
.m-normal { background-color: #ffffff }
.m-recent { background-color: #ddffdd }
.row-inactive { background-color: #ffdddd }
.row-normal { background-color: #ffffff }
.row-recent { background-color: #ddffdd }
.row-new { background-color: #ffffdd }
.row-fixed { background-color: #ddffdd }
.m { padding:0.5rem; border:1px solid }
.l { text-align: center; padding: 0.5rem; border: 1px solid }
.s { text-align: center; padding: 0.5rem; border: 1px solid }
.n { text-align: center; padding: 0.5rem; border: 1px solid }
.np { text-align: right; padding: 0.5rem; border: 1px solid }
.o { text-align: center; padding: 0.5rem; border: 1px solid }
.op { text-align: right; padding: 0.5rem; border: 1px solid }
.col-text { padding:0.5rem; border:1px solid }
.col-date { text-align: center; padding: 0.5rem; border: 1px solid }
.col-count { text-align: center; padding: 0.5rem; border: 1px solid }
.col-percent { text-align: center; padding: 0.5rem; border: 1px solid }
div.cols { column-width: 10rem }
.cols p { margin: 0 0 0.5em 0; padding: 0 0 0 0 }
#maintainers div.multicol { column-width: 20rem }
#slackbuilds div.multicol { column-width: 10rem }
.multicol p { margin: 0 0 0.5em 0; padding: 0 0 0 0 }
nav img { width: 1.2em; height: 1.2em; margin-right: 0.5em; vertical-align: text-bottom }
footer { font-style: italic; font-size: 0.8em; margin-top: 3rem; }
/* popup stuff */
.t { height: 320px; overflow-y:auto }
......
......@@ -35,7 +35,7 @@ def render_indexes():
utils.renderer( page_subdir="slackbuilds",
page_title="SBo SlackBuilds Index",
template="slackbuilds_index",
TemplateData=slackbuilds.SBindex
TemplateData=slackbuilds.SBcatindex
)
# build logs -- for now, just create an empty directory
......
<footer>
<p>Page updated: {{ page_datetime }}</p>
<p>
Page updated: {{ page_datetime }}
<br>{{ ident }}
</p>
</footer>
......@@ -9,28 +9,36 @@
<h1>{{ page_title }}</h1>
<p>Site updated: {{ page_datetime }} </p>
<section id="statistics">
<h3>Statistics</h3>
<ul>
<li>Site updated: {{ page_datetime }}</li>
<li>SlackBuilds: {{ Stats["slackbuilds.count"] }} ({{ Stats["slackbuilds.uptodate"] }} up-to-date, {{ Stats["slackbuilds.outofdate"] }} out-of-date)</li>
<li>Maintainers: {{ Stats["maintainers.count"] }} ({{ Stats["maintainers.recent"] }} recent, {{ Stats["maintainers.inactive"] }}inactive)</li>
<li>Problems: {{ Stats["problems.count"] }}</li>
</ul>
</section>
<section id="reports">
<h3>Reports</h3>
<ul>
<li><a href="{{ siteurl }}/reports/maintainers.html">Maintainers report</a></li>
<li><a href="{{ siteurl }}/reports/problems.html">Problems report</a></li>
</ul>
</section>
<section id="browse">
<h3>Browse</h3>
<ul>
<li><a href="{{ siteurl }}/maintainers/">Maintainer details</a></li>
<li><a href="{{ siteurl }}/slackbuilds/">SlackBuild details</a></li>
<li><a href="{{ siteurl }}/buildlogs/">Build logs</a></li>
</ul>
</section>
<footer>
<p>{{ ident }}</p>
</footer>
</body>
</html>
<!DOCTYPE html>
<html>
{% include 'head.jinja2' %}
<body>
{% include 'nav.jinja2' %}
<h1>{{ page_title }}</h1>
<p>Email <i>not yet implemented</i></p>
<p>Last commit <i>not yet implemented</i></p>
<p><a href="https://repology.org/metapackages/?maintainer={{ TemplateData['email'] }}" target="_blank">Repology</a></p>
<h2>SlackBuilds</h2>
<p><i>not yet implemented</i></p>
<h2>Problems</h2>
<p><i>not yet implemented</i></p>
{% include 'footer.jinja2' %}
</body>
</html>
......@@ -9,7 +9,8 @@
<h1>{{ page_title }}</h1>
<section>
<section id="statistics">
<h2>Statistics</h2>
<p>SlackBuilds
<br>&nbsp;&nbsp;&nbsp;total: {{ "{:d}".format(Stats["slackbuilds.count"]) }}
<br>&nbsp;&nbsp;&nbsp;newest: {{ "{:d} ({:.1f}%)".format(Stats["slackbuilds.uptodate"],Stats["slackbuilds.uptodate"]*100/Stats["slackbuilds.count"]) }}
......@@ -23,32 +24,32 @@
</p>
</section>
<section>
<section id="report">
<table class="sortable">
<thead>
<tr>
<th class="m"><a href="#!">Maintainer</a></th>
<th class="l"><a href="#!">Latest</a></th>
<th class="s"><a href="#!">SlackBuilds</a></th>
<th class="n"><a href="#!">Newest</a></th>
<th class="np"><a href="#!">%</a></th>
<th class="o"><a href="#!">Out of date</a></th>
<th class="op"><a href="#!">%</a></th>
<th class="col-text"><a href="#!">Maintainer</a></th>
<th class="col-date"><a href="#!">Latest</a></th>
<th class="col-count"><a href="#!">SlackBuilds</a></th>
<th class="col-count"><a href="#!">Newest</a></th>
<th class="col-percent"><a href="#!">%</a></th>
<th class="col-count"><a href="#!">Out of date</a></th>
<th class="col-percent"><a href="#!">%</a></th>
</tr>
</thead>
<tbody>
{% for mntnam, mntdeets in TemplateData|dictsort(true) %}
<tr id="m{{ mntdeets['mntnum'] }}" class="{{ 'm-'+mntdeets['status'] }}">
<td class="m">{{ mntdeets["tdmaintainer"] }}</td>
<td class="l">{{ mntdeets["tdlatest"] }}</td>
<td class="s"><a href="#sb{{ mntdeets["mntnum"] }}">{{ mntdeets["tdslackbuilds"] }}</a></td>
<td class="n">{{ mntdeets["tdnewest"] }}</td>