check_xpath.py 4.75 KB
Newer Older
1
2
3
4
5
6
7
#!/usr/bin/env python

# Copyright(C) 2017  Vincent A
#
# This file is part of weboob.
#
# weboob is free software: you can redistribute it and/or modify
8
# it under the terms of the GNU Lesser General Public License as published by
9
10
11
12
13
14
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# weboob is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU Lesser General Public License for more details.
16
#
17
# You should have received a copy of the GNU Lesser General Public License
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# along with weboob. If not, see <http://www.gnu.org/licenses/>.

from __future__ import print_function

import ast
import fnmatch
import os
import traceback

import lxml.etree
from weboob.browser.filters import standard


class Error(SyntaxError):
    def __init__(self, file, line, message):
        super(Error, self).__init__('%s:%s: %s' % (file, line, message))
        self.file = file
        self.line = line


def do_visits(*funcs):
    def wrapper(self, node):
        for func in funcs:
            func(self, node)
        self.generic_visit(node)
    return wrapper


class Visitor(ast.NodeVisitor):
    def __init__(self, file, *args, **kwargs):
        self.warnings = kwargs.pop('warnings', False)
49
        super(Visitor, self).__init__(*args, **kwargs)
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
        self.file = file

        self.filters = []
        self.filters.extend(f for f in dir(standard) if isinstance(getattr(standard, f), type) and issubclass(getattr(standard, f), standard.CleanText))
        self.filters.extend(['Regexp', 'XPath', 'Attr', 'Link'])

        self.element_context = []

    def check_xpath(self, s, lineno):
        try:
            lxml.etree.XPath(s)
        except lxml.etree.XPathSyntaxError as exc:
            raise Error(self.file, lineno, exc)

        if self.warnings:
65
            if not s.lstrip('(').startswith('.') and len(self.element_context) >= 2:
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
                if self.element_context[-1] == 'ItemElement' and self.element_context[-2] in ('TableElement', 'ListElement'):
                    print('%s:%s: probable missing "." at start of XPath' % (self.file, lineno))

    def _item_xpath(self, node):
        try:
            target, = node.targets
        except ValueError:
            return
        if not isinstance(target, ast.Name) or target.id != 'item_xpath':
            return
        try:
            if self.element_context[-1] not in ('TableElement', 'ListElement'):
                return
        except IndexError:
            return
        if not isinstance(node.value, ast.Str):
            return

        self.check_xpath(node.value.s, node.lineno)

    visit_Assign = do_visits(_item_xpath)

    def _xpath_call(self, node):
        if not isinstance(node.func, ast.Attribute):
            return
        if node.func.attr != 'xpath':
            return
        try:
            if not isinstance(node.args[0], ast.Str):
                return
        except IndexError:
            return

        self.check_xpath(node.args[0].s, node.lineno)

    def _filter_call(self, node):
        if not isinstance(node.func, ast.Name):
            return
        if node.func.id not in self.filters:
            return
        try:
            if not isinstance(node.args[0], ast.Str):
                return
        except IndexError:
            return

        self.check_xpath(node.args[0].s, node.lineno)

    visit_Call = do_visits(_xpath_call, _filter_call)

    def visit_ClassDef(self, node):
        has_element = False

119
120
121
122
        for basenode in node.bases:
            if isinstance(basenode, ast.Name) and basenode.id in ('ListElement', 'ItemElement', 'TableElement'):
                self.element_context.append(basenode.id)
                has_element = True
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
                break

        self.generic_visit(node)

        if has_element:
            self.element_context.pop()


def search_py(root):
    for path, dirs, files in os.walk(root):
        dirs.sort()
        for f in fnmatch.filter(files, '*.py'):
            yield os.path.join(path, f)


138
139
140
141
142
143
144
145
146
147
148
149
150
151
if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description="Check XPath definitions")
    parser.add_argument('-w', '--warnings', action='store_true')
    args = parser.parse_args()

    modpath = os.getenv('WEBOOB_MODULES', os.path.normpath(os.path.dirname(__file__) + '/../modules'))
    for fn in search_py(modpath):
        with open(fn) as fd:
            try:
                node = ast.parse(fd.read(), fn)
            except SyntaxError as exc:
                print('In file', fn)
                traceback.print_exc(exc)
152
        try:
153
            Visitor(fn, warnings=args.warnings).visit(node)
154
        except SyntaxError as exc:
155
            print(exc)