Commit 75070d60 authored by Mantas Zimnickas's avatar Mantas Zimnickas

Change inventory table format

After working with inventory tables for some time found many issues and
decided to change inventory table format in order tame it easier to work
with it.

New inventory table format looks like this:

    d | r | b | m | property   | source     | type    | ref         | level | access | title | description
    datasets/test/data         |            |         |             |       |        |       |
      | sqlite                 |            |         |             |       |        |       |
      |   |                    |            |         |             |       |        |       |
      |   |   | city           | CITY       |         | id          |       |        |       |
      |   |   |   | id         | ID         | integer |             | 4     |        |       |
      |   |   |   | country_id | COUNTRY_ID | ref     | country[id] | 4     |        |       |
      |   |   |   | name       | NAME       | string  |             | 3     |        |       |
      |   |                    |            |         |             |       |        |       |
      |   |   | country        | COUNTRY    |         | id          |       |        |       |
      |   |   |   | id         | ID         | integer |             | 4     |        |       |
      |   |   |   | area       | AREA       | integer |             | 3     |        |       |
      |   |   |   | name       | NAME       | string  |             | 3     |        |       |

Issue: #838
parent 82784610
Pipeline #122513680 failed with stage
in 2 minutes and 43 seconds
......@@ -28,31 +28,49 @@ def inspect(engine, schema=None):
for table in insp.get_table_names(schema):
pkey = insp.get_pk_constraint(table, schema)
if pkey:
yield (
table,
pkey['constrained_columns'],
'pk',
'',
)
fkeys = insp.get_foreign_keys(table, schema)
refs = {}
for fkey in fkeys:
for col, ref in zip(fkey['constrained_columns'], fkey['referred_columns']):
assert col not in refs, (table, col, refs)
refs[col] = {
'schema': fkey['referred_schema'],
'table': fkey['referred_table'],
'column': ref,
}
for column in insp.get_columns(table, schema):
yield (
table,
column['name'],
detect_type(column['type']),
refs.get(column['name'], ''),
)
pkeys = pkey['constrained_columns']
else:
pkeys = []
model = {
'model': table.lower(),
'source': table,
'ref': ', '.join([pk.lower() for pk in pkeys]),
}
props = inspect_table(insp, table, schema, pkeys)
yield model, props
def inspect_table(insp, table, schema, pkeys):
refs = {}
fkeys = insp.get_foreign_keys(table, schema)
for fkey in fkeys:
for col, ref in zip(fkey['constrained_columns'], fkey['referred_columns']):
assert col not in refs, (table, col, refs)
refschema = fkey['referred_schema']
reftable = fkey['referred_table'].lower()
ref = ref.lower()
if schema and refschema and refschema != schema:
refs[col] = f'{refschema}.{reftable}[{ref}]'
else:
refs[col] = f'{reftable}[{ref}]'
for column in insp.get_columns(table, schema):
name = column['name']
if name in refs:
dtype = 'ref'
else:
dtype = detect_type(column['type'])
if name in pkeys or name in refs:
level = 4
else:
level = 3
yield {
'property': name.lower(),
'source': name,
'type': dtype,
'ref': refs.get(name, ''),
'level': level,
}
def detect_type(ctype):
......@@ -65,61 +83,26 @@ def detect_type(ctype):
))
def writecsv(f, cols, params=None):
params = {
'dataset': '',
'resource': '',
'origin': '',
'model': '{table}',
**(params or {}),
}
writer = csv.writer(f)
writer.writerow([
def writecsv(f, models, dataset='', resource=''):
writer = csv.DictWriter(f, [
'dataset',
'resource',
'origin',
'base',
'model',
'property',
'source',
'type',
'ref',
'const',
'level',
'access',
'title',
'description',
'table',
'column',
'ref.table',
'ref.column',
])
for table, column, ctype, ref in cols:
kwargs = {
'table': table.lower(),
}
if ctype == 'pk':
dtype = 'pk'
prop = '_id'
column = ','.join(column)
else:
prop = column.lower()
if ref:
dtype = 'ref'
else:
dtype = ctype
writer.writerow([
params['dataset'].format(**kwargs),
params['resource'].format(**kwargs),
params['origin'].format(**kwargs),
params['model'].format(**kwargs),
prop,
dtype,
params['model'].format(**{**kwargs, 'table': ref['table'].lower()}) if ref else '',
'', # const
'', # title
'', # description
table,
column,
ref['table'] if ref else '',
ref['column'] if ref else '',
])
writer.writeheader()
writer.writerow({'dataset': dataset})
writer.writerow({'resource': resource})
for model, props in models:
writer.writerow({'base': ''})
writer.writerow(model)
for prop in props:
writer.writerow(prop)
......@@ -12,24 +12,16 @@ from lodam.services.sqlschema import inspect, writecsv
@click.argument('dsn')
@click.option('--schema', help="database schema name")
@click.option('-o', '--output', help="output file")
@click.option('--dataset', default='sql', help="dataset name tamplate")
@click.option('--resource', default='sql', help="resource name template")
@click.option('--origin', default='sql', help="origin name template")
@click.option('--model', default='{table}', help="model name template")
def main(dsn, schema, output, dataset, resource, origin, model):
@click.option('--dataset', default='datasets/example/data', help="dataset name")
@click.option('--resource', default='sql', help="resource name")
def main(dsn, schema, output, dataset, resource):
engine = sa.create_engine(dsn)
cols = inspect(engine, schema)
params = {
'dataset': dataset,
'resource': resource,
'origin': origin,
'model': model,
}
models = inspect(engine, schema)
if output:
with open(output, 'w') as f:
writecsv(f, cols, params)
writecsv(f, models, dataset=dataset, resource=resource)
else:
writecsv(sys.stdout, cols, params)
writecsv(sys.stdout, models, dataset=dataset, resource=resource)
if __name__ == "__main__":
......
from typing import Iterable, Dict
import io
import csv
import sqlalchemy as sa
from lodam.services.sqlschema import inspect, writecsv
from lodam.services import sqlschema
def pretty(rows: Iterable[Dict]):
cols = [
'dataset',
'resource',
'base',
'model',
'property',
'source',
'type',
'ref',
'level',
'access',
'title',
'description',
]
hpos = cols.index('property')
hsize = 1 # hierachical column size
bsize = 3 # border size
sizes = dict(
[(c, 1) for c in cols[:hpos]] +
[(c, len(c)) for c in cols[hpos:]]
)
rows = list(rows)
for row in rows:
for i, col in enumerate(cols):
val = str(row[col])
if i < hpos:
size = (hsize + bsize) * (hpos - i) + sizes['property']
if size < len(val):
sizes['property'] += len(val) - size
elif sizes[col] < len(val):
sizes[col] = len(val)
line = []
for col in cols:
size = sizes[col]
line.append(col[:size].ljust(size))
depth = 0
lines = [line]
for row in rows:
line = []
for i, col in enumerate(cols[:hpos + 1]):
val = row[col]
if val:
depth = i
break
else:
val = ''
if depth < hpos:
depth += 1
else:
depth = 2
line += [' ' * hsize] * depth
size = (hsize + bsize) * (hpos - depth) + sizes['property']
line += [val.ljust(size)]
for col in cols[hpos + 1:]:
val = str(row[col])
size = sizes[col]
line.append(val.ljust(size))
lines.append(line)
lines = [' | '.join(line) for line in lines]
indent = ' '
return '\n'.join([indent + l.rstrip() for l in lines]) + '\n' + indent
def test_inspect():
def inspect(sql, dataset='datasets/test/data', resource='sqlite'):
engine = sa.create_engine('sqlite://')
engine.execute('CREATE TABLE t(foo INTEGER NOT NULL, bar INTEGER NOT NULL, PRIMARY KEY(foo, bar));')
cols = inspect(engine)
engine.connect().connection.cursor().executescript(sql)
models = sqlschema.inspect(engine)
f = io.StringIO()
writecsv(f, cols)
sqlschema.writecsv(f, models, dataset=dataset, resource=resource)
f.seek(0)
assert list(csv.DictReader(f)) == [
{
'dataset': '',
'resource': '',
'origin': '',
'model': 't',
'property': '_id',
'type': 'pk',
'ref': '',
'const': '',
'title': '',
'description': '',
'table': 't',
'column': 'foo,bar',
'ref.table': '',
'ref.column': '',
},
{
'dataset': '',
'resource': '',
'origin': '',
'model': 't',
'property': 'foo',
'type': 'integer',
'ref': '',
'const': '',
'title': '',
'description': '',
'table': 't',
'column': 'foo',
'ref.table': '',
'ref.column': '',
},
{
'dataset': '',
'resource': '',
'origin': '',
'model': 't',
'property': 'bar',
'type': 'integer',
'ref': '',
'const': '',
'title': '',
'description': '',
'table': 't',
'column': 'bar',
'ref.table': '',
'ref.column': '',
},
]
return list(csv.DictReader(f))
def test_no_pk():
table = inspect('''\
CREATE TABLE BAZ(
foo INTEGER NOT NULL,
bar TEXT NOT NULL
);
''')
assert pretty(table) == '''\
d | r | b | m | property | source | type | ref | level | access | title | description
datasets/test/data | | | | | | |
| sqlite | | | | | | |
| | | | | | | | |
| | | baz | BAZ | | | | | |
| | | | foo | foo | integer | | 3 | | |
| | | | bar | bar | string | | 3 | | |
'''
def test_pk():
table = inspect('''\
CREATE TABLE BAZ(
ID INTEGER NOT NULL,
PRIMARY KEY(ID)
);
''')
assert pretty(table) == '''\
d | r | b | m | property | source | type | ref | level | access | title | description
datasets/test/data | | | | | | |
| sqlite | | | | | | |
| | | | | | | | |
| | | baz | BAZ | | id | | | |
| | | | id | ID | integer | | 4 | | |
'''
def test_two_pkeys():
table = inspect('''\
CREATE TABLE BAZ(
foo INTEGER NOT NULL,
bar INTEGER NOT NULL,
PRIMARY KEY(foo, bar)
);
''')
assert pretty(table) == '''\
d | r | b | m | property | source | type | ref | level | access | title | description
datasets/test/data | | | | | | |
| sqlite | | | | | | |
| | | | | | | | |
| | | baz | BAZ | | foo, bar | | | |
| | | | foo | foo | integer | | 4 | | |
| | | | bar | bar | integer | | 4 | | |
'''
def test_fkeys():
table = inspect('''\
CREATE TABLE COUNTRY(
ID INTEGER NOT NULL PRIMARY KEY,
AREA INTEGER NULL,
NAME TEXT NOT NULL
);
CREATE TABLE CITY(
ID INTEGER NOT NULL PRIMARY KEY,
COUNTRY_ID INTEGER,
NAME TEXT NOT NULL,
FOREIGN KEY (COUNTRY_ID) REFERENCES COUNTRY (ID)
);
''')
assert pretty(table) == '''\
d | r | b | m | property | source | type | ref | level | access | title | description
datasets/test/data | | | | | | |
| sqlite | | | | | | |
| | | | | | | | |
| | | city | CITY | | id | | | |
| | | | id | ID | integer | | 4 | | |
| | | | country_id | COUNTRY_ID | ref | country[id] | 4 | | |
| | | | name | NAME | string | | 3 | | |
| | | | | | | | |
| | | country | COUNTRY | | id | | | |
| | | | id | ID | integer | | 4 | | |
| | | | area | AREA | integer | | 3 | | |
| | | | name | NAME | string | | 3 | | |
'''
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment