Commit 28dcd2fe authored by Helder Guerreiro's avatar Helder Guerreiro

Fix the scraper

The parliament web site changed its html slightly
parent 847fa25d
......@@ -173,8 +173,13 @@ class ParlamentoIndex:
'24_968bfc19cf59_ctl00_pnlResults'}).find(
'div',
{'class': 'row margin_h0 margin-Top-15'})
for date, number, mtype, _ in chunks(
table.find_all('div', recursive=False)[:-1], 4):
for line in table.findAll(
'div', {'class': 'row margin_h0 margin-Top-15'}):
cells = line.findAll('div')
date = cells[0]
number = cells[2]
mtype = cells[-1]
try:
schedule_url = number.a['href']
except KeyError:
......@@ -185,7 +190,7 @@ class ParlamentoIndex:
date.a.renderContents(), '%Y-%m-%d'),
'attendance_bid': int(date.a['href'].split('=')[1]),
'number': int(number.a.renderContents()),
'type': mtype.find_all('div')[-1].renderContents(),
'type': mtype.renderContents(),
'schedule_url': schedule_url
}
......@@ -281,12 +286,17 @@ def attendance_read(meeting):
'div',
{'id': 'ctl00_ctl52_g_6319d967_bcb6_4ba9'
'_b9fc_c9bb325b19f1_ctl00_pnlDetalhe'})
for mp, party, status, reason, _ in chunks(
table.find_all('div', recursive=False)[2:], 5):
for line in table.findAll(
'div', {'class': 'row margin_h0 margin-Top-15'}):
cells = line.findAll('div')
mp = cells[0]
party = cells[1]
status = cells[2]
reason = cells[3]
yield {
'name': mp.a.renderContents(),
'mp_bid': int(mp.a['href'].split('=')[1]),
'party': party.span.renderContents(),
'status': status.span.renderContents(),
'reason': reason.span.renderContents(),
}
'party': party.select("span:nth-of-type(2)")[0].renderContents(),
'status': status.select("span:nth-of-type(2)")[0].renderContents(),
'reason': reason.select("span:nth-of-type(2)")[0].renderContents(),
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment