Commit 945e39c6 authored by David Spencer's avatar David Spencer

changelog-parser: Added.

parent 382e33e2
SlackBuilds.org related scripts
SlackBuilds.org related stuff
* maintainer-status.py -- Using public info, list all the maintainers
* maintainer-status -- Using public info, list all the maintainers
of SlackBuilds.org with links to their Repology pages, last commit
date, etc.
* review -- Review, approve and push SlackBuilds.org submissions
* changelog-parser -- Parse a Slackware-style ChangeLog.txt and output it
as JSON
parser.out
parsetab.py
__pycache__
#!/usr/bin/python3
"""
Parse Slackware (and friends) ChangeLog.txt.
Requires: python3, and ply (built with python3 support)
This proof-of-concept parses into a Python structure and then
writes it as JSON, which should end up looking like this:
[
{
date: "string",
motd: "string",
itemlist: [
{ itemname:"string", description:"string", securityfix:BOOL },
{ itemname:"string", description:"string", securityfix:BOOL },
...
]
},
{
date: "string",
motd: "string",
itemlist: [
{ itemname:"string", description:"string", securityfix:BOOL },
{ itemname:"string", description:"string", securityfix:BOOL },
...
]
},
...
]
"""
#-----------------------------------------------------------------------
import sys
import json
from ply import *
#-----------------------------------------------------------------------
states = (
('header', 'exclusive'),
('itemdesc', 'exclusive')
)
tokens = (
'SEPARATOR',
'BLANK',
'DATE',
'MOTDCHUNK',
'ITEMNAME',
'DESCRIPTIONCHUNK',
'SECURITYFIX'
)
# INITIAL state
def t_INITIAL_SEPARATOR(t):
r'\+--------------------------\+\n+'
t.lexer.lineno += 1
pass
def t_INITIAL_BLANK(t):
r'\ *\n'
t.lexer.lineno += 1
pass
def t_INITIAL_DATE(t):
r'.*\n'
t.lexer.lineno += 1
t.value = t.value.rstrip()
t.lexer.begin('header')
return t
# header state
def t_header_ITEMNAME(t):
r'[^ :]+: +'
t.value = t.value.rstrip(": ")
t.lexer.begin('itemdesc')
return t
def t_header_SEPARATOR(t):
r'\+--------------------------\+\n+'
t.lexer.lineno += 1
t.lexer.begin('INITIAL')
pass
def t_header_MOTDCHUNK(t):
r'.*\n'
t.lexer.lineno += 1
return t
# itemdesc state
def t_itemdesc_SECURITYFIX(t):
r'[(* ]+[Ss]ecurity\ [Ff]ix[)* ]+\n'
t.lexer.lineno += 1
return t
def t_itemdesc_SEPARATOR(t):
r'\+--------------------------\+\n+'
t.lexer.lineno += 1
t.lexer.begin('INITIAL')
pass
def t_itemdesc_ITEMNAME(t):
r'[^ :]+: +'
t.value = t.value.rstrip(": ")
t.lexer.begin('itemdesc')
return t
def t_itemdesc_DESCRIPTIONCHUNK(t):
r'.*\n'
if t.value.startswith(" "):
t.value = t.value[2:]
elif t.value.startswith(" "):
t.value = t.value[1:]
t.lexer.lineno += 1
return t
# others
def t_ANY_error(t):
print("We are lost at '%s'" % t.value[0])
t.lexer.skip(1)
def t_ANY_eof(t):
return None
#-----------------------------------------------------------------------
lexer = lex.lex()
#-----------------------------------------------------------------------
def pdbg(str,p):
if False:
print(str)
for pp in p[1:]:
print(" ",pp)
def p_entrylist(p):
"""
entrylist : entry entrylist
| entry
"""
if len(p) == 3:
# append an entry to an existing entrylist
p[0] = p[2]
p[0].append(p[1])
elif len(p) == 2:
# create a new entrylist instance
p[0] = [p[1]]
def p_entry(p):
"""
entry : header itemlist
| header
"""
# create a new entry instance
if len(p) == 3:
# copy the header (which is a dict) with a new key for the itemlist
p[0] = p[1]
p[0]["itemlist"] = p[2]
elif len(p) == 2:
# just copy the header, with an empty itemlist
p[0] = p[1]
p[0]["itemlist"] = []
def p_header(p):
"""
header : DATE motd
| DATE
"""
# create a new header instance
if len(p) == 3:
p[0] = { "date": p[1], "motd": p[2] }
elif len(p) == 2:
p[0] = { "date": p[1], "motd": "" }
def p_motd(p):
"""
motd : MOTDCHUNK motd
| MOTDCHUNK
"""
if len(p) == 3:
# concatenate sequences of MOTDCHUNK
p[0] = p[1] + p[2]
elif len(p) == 2:
p[0] = p[1]
def p_itemlist(p):
"""
itemlist : item itemlist
| item
"""
if len(p) == 3:
# append an item to existing list
p[0] = p[2]
p[0].append(p[1])
elif len(p) == 2:
# create a new list
p[0] = [p[1]]
def p_item(p):
"""
item : ITEMNAME descseq SECURITYFIX descseq
| ITEMNAME descseq SECURITYFIX
| ITEMNAME descseq
"""
description = p[2]
securityfix = False
if len(p) == 5:
description = description + p[4]
securityfix=True
elif len(p) == 4:
securityfix=True
p[0]={"itemname":p[1], "description":description, "securityfix":securityfix}
def p_descseq(p):
"""
descseq : DESCRIPTIONCHUNK descseq
| DESCRIPTIONCHUNK
"""
if len(p) == 3:
# concatenate description fragments
p[0] = p[1] + p[2]
elif len(p) == 2:
p[0] = p[1]
def p_error(p):
print("Syntax error in input!")
print(p)
#-----------------------------------------------------------------------
parser = yacc.yacc()
changelog = parser.parse(sys.stdin.read())
#-----------------------------------------------------------------------
print(json.dumps(changelog, sort_keys=True, indent=4))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment