...
 
Commits (8)
Romain Bignon <romain@symlink.me> <romain@peerfuse.org>
Romain Bignon <romain@symlink.me> <romain@budget-insight.com>
Romain Bignon <romain@symlink.me> <romain@weboob.org>
Juke <juke@free.fr> <juke@jhebert.priv.staff.proxad.net>
Juke <juke@free.fr> Julien Hebert <juke@free.fr>
Juke <juke@free.fr> <juke@budget-insight.com>
Clément Schreiner <clement@mux.me> <clemux@clemux.info>
Noé Rubinstein <noe.rubinstein@gmail.com>
Johann Broudin <johann.broudin@6-8.fr> <Johann.Broudin@6-8.fr>
Florent Fourcot <weboob@flo.fourcot.fr> Florent <weboob@flo.fourcot.fr>
Johann Broudin <johann.broudin@6-8.fr> <Johann.Broudin@hsc.fr>
Florent Fourcot <weboob@flo.fourcot.fr> <weboob@flo.fourcot.fr>
Florent Fourcot <weboob@flo.fourcot.fr> <florent.fourcot@resel.fr>
Florent Fourcot <weboob@flo.fourcot.fr> <ffourcot@fourcot.fr>
Florent Fourcot <weboob@flo.fourcot.fr> ffourcot <ffourcot@fourcot.fr>
Florent Fourcot <weboob@flo.fourcot.fr> Florent Fourcot <florent.fourcot@wifirst.fr>
Florent Fourcot <weboob@flo.fourcot.fr> <florent.fourcot@wifirst.fr>
Nicolas Duhamel <nicolas@jombi.fr> <nicolas@macmini.local>
Gabriel Kerneis <gabriel@kerneis.info> <kerneis@pps.jussieu.fr>
Christophe Lampin <weboob@lampin.net> Kitof <git@kitof.net>
Ahmed Boussadia <ahmed.boussadia@gmail.com> Ahmed Bousadia <ahmed.boussadia@gmail.com>
Simon Murail <simon.murail@budget-insight.com> smurail <simon.murail@budget-insight.com>
Christophe Lampin <weboob@lampin.net> <git@kitof.net>
Ahmed Boussadia <ahmed.boussadia@gmail.com>
Simon Murail <simon.murail@budget-insight.com>
Simon Murail <simon.murail@budget-insight.com> <simurail@yahoo.fr>
Vincent Paredes <vparedes@budget-insight.com> <vincentparedes@gmail.com>
Jean-Philippe Dutrève <jdutreve@winancial.com>
Jean-Philippe Dutrève <jdutreve@winancial.com> <jdutreve@gmail.com>
Jean-Philippe Dutrève <jdutreve@winancial.com> Jean-Philippe Dutreve <jdutreve@winancial.com>
Baptiste Delpey <bdelpey@budget-insight.com> <bdelpey@budget-insight.fr> bdelpey <bdelpey@budget-insight.fr> <baptiste@uhu.home>
Baptiste Delpey <bdelpey@budget-insight.com> <bdelpey@budget-insight.fr>
Baptiste Delpey <bdelpey@budget-insight.com> <b.delpey@gmail.com>
Baptiste Delpey <bdelpey@budget-insight.com> <b.delpey@hotmail.fr>
Baptiste Delpey <bdelpey@budget-insight.com> <baptiste@uhu.home>
Samuel Loury <konubinixweb@gmail.com> <samuel.loury@cosmo-platform.org>
Vincent Ardisson <vardisson@budget-insight.com> <dev@indigo.re>
Benjamin CARTON <carton_ben@yahoo.fr> Bezleputh <carton_ben@yahoo.fr>
Phyks (Lucas Verney) <phyks@phyks.me> Phyks <phyks+weboob@phyks.me>
Victor Kannemacher <vkannemacher@budget-insight.com> <vkannemacher.budgetinsight@gmail.com>
Victor Kannemacher <vkannemacher@budget-insight.com> <vkannema@student.42.fr>
Vincent Ardisson <vardisson@budget-insight.com> <va@budget-insight.com>
Benjamin CARTON <carton_ben@yahoo.fr>
Phyks (Lucas Verney) <phyks@phyks.me> <phyks+weboob@phyks.me>
Victor Kannemacher <victor.kannemacher@budget-insight.com> <vkannemacher.budgetinsight@gmail.com>
Victor Kannemacher <victor.kannemacher@budget-insight.com> <vkannema@student.42.fr>
Victor Kannemacher <victor.kannemacher@budget-insight.com> <vkannemacher@budget-insight.com>
Étienne Lacheré <etienne.lachere@budget-insight.com>
Étienne Lacheré <etienne.lachere@budget-insight.com> <elacher@budget-insight.com>
Étienne Lacheré <etienne.lachere@budget-insight.com> <etiennelachere93@gmail.com>
Tony Malto <tmalto@budget-insight.com> <tmalto.bi@gmail.com>
Barthélemy Gouby <bgouby@budget-insight.com> <barth@double-decimetre.lan.budget-insight.com>
Barthélemy Gouby <bgouby@budget-insight.com> <barthelemy.gouby@gmail.com>
Sébastien JEAN <sebj42@gmail.com>
Sébastien JEAN <sebj42@gmail.com> <sebastien.jean@geneanet.org>
Célande Adrien <celande.adrien@budget-insight.com>
Florian Duguet <florian.duguet@budget-insight.com> <florian@banksup.fr>
Maxime Pommier <maxime.pommier@budget-insight.com>
Lowik BOURGEOISAT <lowik.bourgeoisat@geneanet.org>
Simon Lipp <sloonz@gmail.com> <laiquo@hwold.net>
ZeHiro <mail+weboob@abossy.fr> <mail-github@abossy.fr>
Julien Veyssier <eneiluj@posteo.net> <eneiluj@gmx.fr>
Julien Veyssier <eneiluj@posteo.net> <julien.veyssier@aiur.fr>
Juliette Fourcot <juliette-dev@fourcot.fr> <juliette.stehle@gmail.com>
Adrien CLERC <bugs-weboob@antipoul.fr> <bugs-weboob@antipoul.fr>
Vincent Texier <vit@free.fr> <vit@ubuntu-vm1.(none)>
Thomas Lecavelier <thomas-weboob@lecavelier.name> <tl@pierlis.com>
Tony Malto <tony.malto@budget-insight.com> <tmalto@budget-insight.com>
Pierre-Louis Bonicoli <pierre-louis.bonicoli@gmx.fr> <pierre-louis@libregerbil.fr>
Pierre Mazière <pierre.maziere@gmx.com> <pierre.maziere@gmail.com>
Raphaël Rigo <devel-weboob@syscall.eu> <devel-git@syscall.eu>
Benjamin Drieu <benjamin@drieu.org> <bdrieu@april.org>
Benjamin Tampigny <btampigny@budget-insight.com>
Caram Dache <caramdache@gmail.com>
Damien Cassou <damien@cassou.me> <damien.cassou@gmail.com>
David Kremer <dkremer@budget-insight.com> <courrier@david-kremer.fr>
Frédéric Lépy <frederic.lepy@budget-insight.com>
Guillaume Lundy <guillaume.lundy@laposte.net>
Guillaume Seznec <guillaume.seznec@gmail.com> <guillaume.seznec@ouifm.fr>
James GALT <james.galt.bi@gmail.com> <jamesgalt.bi@gmail.com>
James GALT <james.galt.bi@gmail.com> <james.galt.bi@gmail.com.com>
Jean Walrave <jean.walrave@budget-insight.com> <jean.walrave@epitech.eu>
Jean Walrave <jean.walrave@budget-insight.com> <jean.walrave@gmail.com>
Jerome Berthier <jerome.berthier@budget-insight.com> jerome <jerome@linxo.com>
Matthieu Weber <mweber+weboob@free.fr> <matthieu+weboob@weber.fi.eu.org>
Matthieu Weber <mweber+weboob@free.fr> <mweber@free.fr>
Théo Dorée <theo.doree@budget-insight.com> <tdoree@budget-insight.com>
......@@ -59,9 +59,10 @@ class Transaction(FrenchTransaction):
(re.compile(r'^(?P<category>COTISATION TRIMESTRIELLE).*'), FrenchTransaction.TYPE_BANK),
(re.compile(r'^REMISE COMMERCIALE.*'), FrenchTransaction.TYPE_BANK),
(re.compile(r'^(?P<category>.*UTILISATION DU DECOUVERT$)'), FrenchTransaction.TYPE_BANK),
(re.compile(r'^(?P<category>FRAIS (TRIMESTRIELS) DE TENUE DE COMPTE.*)'), FrenchTransaction.TYPE_BANK),
(re.compile(r'^(?P<category>FRAIS (TRIMESTRIELS )?DE TENUE DE COMPTE).*'), FrenchTransaction.TYPE_BANK),
(re.compile(r'^(?P<category>FRAIS IRREGULARITES ET INCIDENTS).*'), FrenchTransaction.TYPE_BANK),
(re.compile(r'^(?P<category>COMMISSION PAIEMENT PAR CARTE)'), FrenchTransaction.TYPE_BANK),
(re.compile(r'^(?P<category>CREDIT CARTE BANCAIRE) (?P<text>.*) (?P<dd>\d{2})\.(?P<mm>\d{2})\.(?P<yy>\d{2,4}) .*'), FrenchTransaction.TYPE_CARD),
]
......
......@@ -37,7 +37,7 @@ class IpapiModule(Module, CapGeolocIp):
BROWSER = Browser
def get_location(self, ipaddr):
res = self.browser.location('http://ip-api.com/json/%s' % ipaddr.encode('utf-8'))
res = self.browser.location(u'http://ip-api.com/json/%s' % ipaddr)
jres = json.loads(res.text)
if "status" in jres and jres["status"] == "fail":
......
......@@ -78,7 +78,7 @@ class Recipe(object):
# workaround, as it's also a mako directive
coding='# -*- coding: utf-8 -*-',
login=self.login,
**kwargs)
**kwargs).strip() + u'\n'
def generate(self):
raise NotImplementedError()
......@@ -3,6 +3,6 @@ set -e
. "$(dirname $0)/common.sh"
$PYTHON "$(dirname $0)/stale_pyc.py"
[ $VER -eq 2 ] && $PYTHON "$(dirname $0)/stale_pyc.py"
exec $PYTHON "$(dirname $0)/local_install.py" "$@"
......@@ -3,6 +3,6 @@ set -e
. "$(dirname $0)/common.sh"
$PYTHON "$(dirname $0)/stale_pyc.py"
[ $VER -eq 2 ] && $PYTHON "$(dirname $0)/stale_pyc.py"
exec $PYTHON "$(dirname $0)/local_run.py" "$@"
......@@ -88,6 +88,7 @@ imgur
indeed
infomaniak
ing
ipapi
ipinfodb
jcvelaux
jirafeau
......@@ -119,6 +120,7 @@ n26
nalo
nectarine
nef
newsfeed
nova
oney
onlinenet
......
......@@ -102,7 +102,7 @@ else
XUNIT_ARGS=""
fi
${PYTHON} "$(dirname $0)/stale_pyc.py"
[ $VER -eq 2 ] && $PYTHON "$(dirname $0)/stale_pyc.py"
echo "file://${WEBOOB_MODULES}" > "${WEBOOB_TMPDIR}/sources.list"
......
......@@ -151,7 +151,7 @@ class Page(object):
self.params = params
# Setup encoding and build document
self.forced_encoding = encoding or self.ENCODING
self.forced_encoding = self.normalize_encoding(encoding or self.ENCODING)
if self.forced_encoding:
self.response.encoding = self.forced_encoding
self.doc = self.build_doc(self.data)
......@@ -169,7 +169,7 @@ class Page(object):
@property
def encoding(self):
return self.response.encoding
return self.normalize_encoding(self.response.encoding)
@encoding.setter
def encoding(self, value):
......@@ -223,6 +223,14 @@ class Page(object):
"""
return None
def normalize_encoding(self, encoding):
"""
Make sure we can easily compare encodings by formatting them the same way.
"""
if isinstance(encoding, bytes):
encoding = encoding.decode('utf-8')
return encoding.lower() if encoding else encoding
def absurl(self, url):
"""
Get an absolute URL from an a partial URL, relative to the Page URL
......@@ -518,7 +526,7 @@ class XMLPage(Page):
import re
m = re.search(b'<\?xml version="1.0" encoding="(.*)"\?>', self.data)
if m:
return m.group(1)
return self.normalize_encoding(m.group(1))
def build_doc(self, content):
import lxml.etree as etree
......@@ -664,10 +672,10 @@ class HTMLPage(Page):
Method to build the lxml document from response and given encoding.
"""
encoding = self.encoding
if encoding == 'latin-1':
encoding = 'latin1'
if encoding == u'latin-1':
encoding = u'latin1'
if encoding:
encoding = encoding.replace('ISO8859_', 'ISO8859-')
encoding = encoding.replace(u'iso8859_', u'iso8859-')
import lxml.html as html
parser = html.HTMLParser(encoding=encoding)
return html.parse(BytesIO(content), parser)
......@@ -681,18 +689,18 @@ class HTMLPage(Page):
# meta http-equiv=content-type content=...
_, params = parse_header(content)
if 'charset' in params:
encoding = params['charset'].strip("'\"")
encoding = self.normalize_encoding(params['charset'].strip("'\""))
for charset in self.doc.xpath('//head/meta[@charset]/@charset'):
# meta charset=...
encoding = charset.lower()
encoding = self.normalize_encoding(charset)
if encoding == 'iso-8859-1' or not encoding:
encoding = 'windows-1252'
if encoding == u'iso-8859-1' or not encoding:
encoding = u'windows-1252'
try:
codecs.lookup(encoding)
except LookupError:
encoding = 'windows-1252'
encoding = u'windows-1252'
return encoding
......
......@@ -79,7 +79,7 @@ class Firefox(Profile):
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0',
'Upgrade-Insecure-Requests': '1',
'DNT': '1'}
if 'br' in ENCODINGS:
......
......@@ -81,7 +81,7 @@ class Javascript(object):
};
navigator = {
userAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0",
userAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
appName: "Netscape"
};
"""
......