boobot.py 14.6 KB
Newer Older
Romain Bignon's avatar
Romain Bignon committed
1
2
3
4
5
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright(C) 2012  Romain Bignon
#
6
# This file is part of woob.
Romain Bignon's avatar
Romain Bignon committed
7
#
8
# woob is free software: you can redistribute it and/or modify
9
# it under the terms of the GNU Lesser General Public License as published by
Romain Bignon's avatar
Romain Bignon committed
10
11
12
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
13
# woob is distributed in the hope that it will be useful,
Romain Bignon's avatar
Romain Bignon committed
14
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
# GNU Lesser General Public License for more details.
Romain Bignon's avatar
Romain Bignon committed
17
#
18
# You should have received a copy of the GNU Lesser General Public License
19
# along with woob. If not, see <http://www.gnu.org/licenses/>.
Romain Bignon's avatar
Romain Bignon committed
20

21
from __future__ import print_function
Romain Bignon's avatar
Romain Bignon committed
22

Laurent Bachelier's avatar
Laurent Bachelier committed
23
import itertools
24
import logging
25
import os
Laurent Bachelier's avatar
Laurent Bachelier committed
26
import re
Romain Bignon's avatar
Romain Bignon committed
27
import sys
28
import urllib
Laurent Bachelier's avatar
Laurent Bachelier committed
29
30
31
32
33
34
35
import urlparse
from datetime import datetime, timedelta
from math import log
from random import choice, randint
from threading import Event, Thread

from dateutil.parser import parse as parse_date
36
from irc.bot import SingleServerIRCBot
Romain Bignon's avatar
Romain Bignon committed
37

38
39
40
41
42
43
44
45
46
47
from woob.browser import Browser
from woob.browser.exceptions import HTTPNotFound
from woob.browser.pages import HTMLPage
from woob.core import Woob
from woob.exceptions import BrowserHTTPError, BrowserUnavailable
from woob.tools.application.base import ApplicationStorage
from woob.tools.misc import get_backtrace, to_unicode
from woob.tools.storage import StandardStorage

IRC_CHANNELS = os.getenv('BOOBOT_CHANNELS', '#woob').split(',')
48
IRC_NICKNAME = os.getenv('BOOBOT_NICKNAME', 'boobot')
Romain Bignon's avatar
Romain Bignon committed
49
IRC_SERVER = os.getenv('BOOBOT_SERVER', 'dickson.freenode.net')
50
IRC_IGNORE = [re.compile(i) for i in os.getenv('BOOBOT_IGNORE', '!~?irker@').split(',')]
51
STORAGE_FILE = os.getenv('BOOBOT_STORAGE', 'boobot.storage')
Romain Bignon's avatar
Romain Bignon committed
52

53

54
def fixurl(url):
55
56
57
58
    url = to_unicode(url)

    # remove javascript crap
    url = url.replace('/#!/', '/')
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

    # parse it
    parsed = urlparse.urlsplit(url)

    # divide the netloc further
    userpass, at, hostport = parsed.netloc.rpartition('@')
    user, colon1, pass_ = userpass.partition(':')
    host, colon2, port = hostport.partition(':')

    # encode each component
    scheme = parsed.scheme.encode('utf8')
    user = urllib.quote(user.encode('utf8'))
    colon1 = colon1.encode('utf8')
    pass_ = urllib.quote(pass_.encode('utf8'))
    at = at.encode('utf8')
    host = host.encode('idna')
    colon2 = colon2.encode('utf8')
    port = port.encode('utf8')
77
    path = '/'.join(pce.encode('utf8') for pce in parsed.path.split('/'))
78
79
    # while valid, it is most likely an error
    path = path.replace('//', '/')
80
81
    query = parsed.query.encode('utf8')
    fragment = parsed.fragment.encode('utf8')
82
83
84
85
86
87

    # put it back together
    netloc = ''.join((user, colon1, pass_, at, host, colon2, port))
    return urlparse.urlunsplit((scheme, netloc, path, query, fragment))


Romain Bignon's avatar
Romain Bignon committed
88
89
class BoobotBrowser(Browser):
    TIMEOUT = 3.0
90

91
    def urlinfo(self, url, maxback=2):
Laurent Bachelier's avatar
Laurent Bachelier committed
92
93
        if urlparse.urlsplit(url).netloc == 'mobile.twitter.com':
            url = url.replace('mobile.twitter.com', 'twitter.com', 1)
94
        try:
Romain Bignon's avatar
Romain Bignon committed
95
            r = self.open(url, method='HEAD')
96
            body = False
Romain Bignon's avatar
Romain Bignon committed
97
98
        except HTTPNotFound as e:
            if maxback and not url[-1].isalnum():
99
                return self.urlinfo(url[:-1], maxback-1)
Romain Bignon's avatar
Romain Bignon committed
100
101
102
103
104
            raise e
        except BrowserHTTPError as e:
            if e.response.status_code in (501, 405):
                r = self.open(url)
                body = True
Laurent Bachelier's avatar
Laurent Bachelier committed
105
106
            else:
                raise e
Romain Bignon's avatar
Romain Bignon committed
107
        content_type = r.headers.get('Content-Type')
108
        try:
Romain Bignon's avatar
Romain Bignon committed
109
            size = int(r.headers.get('Content-Length'))
110
111
112
113
            hsize = self.human_size(size)
        except TypeError:
            size = None
            hsize = None
Romain Bignon's avatar
Romain Bignon committed
114
        is_html = ('html' in content_type) if content_type else re.match(r'\.x?html?$', url)
115
116
        title = None
        if is_html:
117
            if not body:
Romain Bignon's avatar
Romain Bignon committed
118
                r = self.open(url)
119
            # update size has we might not have it from headers
Romain Bignon's avatar
Romain Bignon committed
120
            size = len(r.content)
121
            hsize = self.human_size(size)
Romain Bignon's avatar
Romain Bignon committed
122
123
124
125
126
127
128
129

            page = HTMLPage(self, r)

            for title in page.doc.xpath('//head/title'):
                title = to_unicode(title.text_content()).strip()
                title = ' '.join(title.split())
            if urlparse.urlsplit(url).netloc.endswith('twitter.com'):
                for title in page.doc.getroot().cssselect('.permalink-tweet .tweet-text'):
Laurent Bachelier's avatar
Laurent Bachelier committed
130
                    title = to_unicode(title.text_content()).strip()
Romain Bignon's avatar
Romain Bignon committed
131
                    title = ' '.join(title.splitlines())
132

133
134
135
136
        return content_type, hsize, title

    def human_size(self, size):
        if size:
137
138
            units = ('B', 'KiB', 'MiB', 'GiB',
                     'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
139
140
141
142
143
            exponent = int(log(size, 1024))
            return "%.1f %s" % (float(size) / pow(1024, exponent), units[exponent])
        return '0 B'


Romain Bignon's avatar
Romain Bignon committed
144
145
146
147
148
149
150
class Task(object):
    def __init__(self, datetime, message, channel=None):
        self.datetime = datetime
        self.message = message
        self.channel = channel


Romain Bignon's avatar
Romain Bignon committed
151
class MyThread(Thread):
Laurent Bachelier's avatar
Laurent Bachelier committed
152
153
    daemon = True

Romain Bignon's avatar
Romain Bignon committed
154
155
    def __init__(self, bot):
        Thread.__init__(self)
156
157
        self.woob = Woob(storage=StandardStorage(STORAGE_FILE))
        self.woob.load_backends()
Romain Bignon's avatar
Romain Bignon committed
158
        self.bot = bot
159
        self.bot.set_woob(self.woob)
Romain Bignon's avatar
Romain Bignon committed
160
161

    def run(self):
162
        for ev in self.bot.joined.values():
163
            ev.wait()
Romain Bignon's avatar
Romain Bignon committed
164

165
166
167
168
        self.woob.repeat(5, self.check_tasks)
        self.woob.repeat(300, self.check_board)
        self.woob.repeat(600, self.check_dlfp)
        self.woob.repeat(600, self.check_twitter)
Romain Bignon's avatar
Romain Bignon committed
169

170
        self.woob.loop()
Romain Bignon's avatar
Romain Bignon committed
171

172
    def find_keywords(self, text):
173
        for word in [
174
                     'woob', 'weboob',
Laurent Bachelier's avatar
Laurent Bachelier committed
175
                     'budget insight', 'budget-insight', 'budgetinsight', 'budgea']:
176
177
178
179
            if word in text.lower():
                return word
        return None

180
    def check_twitter(self):
Bezleputh's avatar
Bezleputh committed
181
182
        nb_tweets = 10

183
184
        for backend in self.woob.iter_backends(module='twitter'):
            for thread in list(itertools.islice(backend.iter_resources(None, ['search', 'woob']),
Bezleputh's avatar
Bezleputh committed
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
                                                0,
                                                nb_tweets)):

                if not backend.storage.get('lastpurge'):
                    backend.storage.set('lastpurge', datetime.now() - timedelta(days=60))
                    backend.storage.save()

                if thread.id not in backend.storage.get('seen', default={}) and\
                   thread.date > backend.storage.get('lastpurge'):
                    _item = thread.id.split('#')
                    url = 'https://twitter.com/%s/status/%s' % (_item[0], _item[1])
                    for msg in self.bot.on_url(url):
                        self.bot.send_message('%s: %s' % (_item[0], url))
                        self.bot.send_message(msg)

                    backend.set_message_read(backend.fill_thread(thread, ['root']).root)
201

Romain Bignon's avatar
Romain Bignon committed
202
    def check_dlfp(self):
203
        for msg in self.woob.do('iter_unread_messages', backends=['dlfp']):
204
205
206
            word = self.find_keywords(msg.content)
            if word is not None:
                url = msg.signature[msg.signature.find('https://linuxfr'):]
207
208
                self.bot.send_message('[DLFP] %s talks about %s: %s' % (
                    msg.sender, word, url))
209
            self.woob[msg.backend].set_message_read(msg)
210
211

    def check_board(self):
212
        def iter_messages(backend):
213
            return backend.browser.iter_new_board_messages()
214

215
        for msg in self.woob.do(iter_messages, backends=['dlfp']):
216
            word = self.find_keywords(msg.message)
217
            if word is not None and msg.login != 'moules':
218
219
                message = msg.message.replace(word, '\002%s\002' % word)
                self.bot.send_message('[DLFP] <%s> %s' % (msg.login, message))
Romain Bignon's avatar
Romain Bignon committed
220

Romain Bignon's avatar
Romain Bignon committed
221
222
223
224
225
226
    def check_tasks(self):
        for task in list(self.bot.tasks_queue):
            if task.datetime < datetime.now():
                self.bot.send_message(task.message, task.channel)
                self.bot.tasks_queue.remove(task)

Romain Bignon's avatar
Romain Bignon committed
227
    def stop(self):
228
229
        self.woob.want_stop()
        self.woob.deinit()
Romain Bignon's avatar
Romain Bignon committed
230

231

232
233
class Boobot(SingleServerIRCBot):
    def __init__(self, channels, nickname, server, port=6667):
234
        SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
235
        # self.connection.add_global_handler('pubmsg', self.on_pubmsg)
236
237
        self.connection.add_global_handler('join', self.on_join)
        self.connection.add_global_handler('welcome', self.on_welcome)
238
        self.connection.buffer_class.errors = 'replace'
239

240
241
242
243
        self.mainchannel = channels[0]
        self.joined = dict()
        for channel in channels:
            self.joined[channel] = Event()
244
        self.woob = None
Romain Bignon's avatar
Romain Bignon committed
245
246
        self.storage = None

Romain Bignon's avatar
Romain Bignon committed
247
248
        self.tasks_queue = []

249
250
251
    def set_woob(self, woob):
        self.woob = woob
        self.storage = ApplicationStorage('boobot', woob.storage)
Romain Bignon's avatar
Romain Bignon committed
252
        self.storage.load({})
Romain Bignon's avatar
Romain Bignon committed
253

254
255
256
    def on_welcome(self, c, event):
        for channel in self.joined.keys():
            c.join(channel)
257

258
259
260
261
262
263
264
    def on_join(self, c, event):
        # irclib 5.0 compatibility
        if callable(event.target):
            channel = event.target()
        else:
            channel = event.target
        self.joined[channel].set()
Romain Bignon's avatar
Romain Bignon committed
265

266
    def send_message(self, msg, channel=None):
267
        for m in msg.splitlines():
Romain Bignon's avatar
Romain Bignon committed
268
269
            msg = to_unicode(m).encode('utf-8')[:450].decode('utf-8')
            self.connection.privmsg(to_unicode(channel or self.mainchannel), msg)
Romain Bignon's avatar
Romain Bignon committed
270

271
    def on_pubmsg(self, c, event):
272
273
274
        # irclib 5.0 compatibility
        if callable(event.arguments):
            text = ' '.join(event.arguments())
275
            channel = event.target()
276
            nick = event.source()
277
278
        else:
            text = ' '.join(event.arguments)
279
            channel = event.target
280
281
282
283
            nick = event.source
        for ignore in IRC_IGNORE:
            if ignore.search(nick):
                return
Romain Bignon's avatar
Romain Bignon committed
284
        for m in re.findall('([\w\d_\-]+@\w+)', text):
285
286
            for msg in self.on_boobid(m):
                self.send_message(msg, channel)
287
        for m in re.findall(u'(https?://[^\s\xa0+]+)', text):
288
289
            for msg in self.on_url(m):
                self.send_message(msg, channel)
290

Romain Bignon's avatar
Romain Bignon committed
291
292
        m = re.match('^%(?P<cmd>\w+)(?P<args>.*)$', text)
        if m and hasattr(self, 'cmd_%s' % m.groupdict()['cmd']):
Laurent Bachelier's avatar
Laurent Bachelier committed
293
            getattr(self, 'cmd_%s' % m.groupdict()['cmd'])(nick, channel, m.groupdict()['args'].strip())
Romain Bignon's avatar
Romain Bignon committed
294

Romain Bignon's avatar
Romain Bignon committed
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
    def cmd_at(self, nick, channel, text):
        try:
            datetime, message = text.split(' ', 1)
        except ValueError:
            self.send_message('Syntax: %at [YYYY-MM-DDT]HH:MM[:SS] message', channel)
            return

        try:
            datetime = parse_date(datetime)
        except ValueError:
            self.send_message('Unable to read date %r' % datetime)
            return

        self.tasks_queue.append(Task(datetime, message, channel))

Romain Bignon's avatar
Romain Bignon committed
310
311
312
313
314
    def cmd_addquote(self, nick, channel, text):
        quotes = self.storage.get(channel, 'quotes', default=[])
        quotes.append({'author': nick, 'timestamp': datetime.now(), 'text': text})
        self.storage.set(channel, 'quotes', quotes)
        self.storage.save()
Romain Bignon's avatar
Romain Bignon committed
315
        self.send_message('Quote #%s added' % (len(quotes) - 1), channel)
Romain Bignon's avatar
Romain Bignon committed
316
317
318
319
320
321
322
323
324
325
326
327
328
329

    def cmd_delquote(self, nick, channel, text):
        quotes = self.storage.get(channel, 'quotes', default=[])

        try:
            n = int(text)
        except ValueError:
            self.send_message("Quote #%s not found gros" % text, channel)
            return

        quotes.pop(n)
        self.storage.set(channel, 'quotes', quotes)
        self.storage.save()
        self.send_message('Quote #%s removed' % n, channel)
Romain Bignon's avatar
Romain Bignon committed
330
331
332

    def cmd_searchquote(self, nick, channel, text):
        try:
Laurent Bachelier's avatar
Laurent Bachelier committed
333
            pattern = re.compile(to_unicode(text), re.IGNORECASE | re.UNICODE)
Romain Bignon's avatar
Romain Bignon committed
334
335
336
337
338
339
        except Exception as e:
            self.send_message(str(e), channel)
            return

        quotes = []
        for quote in self.storage.get(channel, 'quotes', default=[]):
340
            if pattern.search(to_unicode(quote['text'])):
Romain Bignon's avatar
Romain Bignon committed
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
                quotes.append(quote)

        try:
            quote = choice(quotes)
        except IndexError:
            self.send_message('No match', channel)
        else:
            self.send_message('%s' % quote['text'], channel)

    def cmd_getquote(self, nick, channel, text):
        quotes = self.storage.get(channel, 'quotes', default=[])
        if len(quotes) == 0:
            return

        try:
            n = int(text)
        except ValueError:
            n = randint(0, len(quotes)-1)

        try:
            quote = quotes[n]
        except IndexError:
            self.send_message('Unable to find quote #%s' % n, channel)
        else:
            self.send_message('[%s] %s' % (n, quote['text']), channel)

367
368
    def on_boobid(self, boobid):
        _id, backend_name = boobid.split('@', 1)
369
370
        if backend_name in self.woob.backend_instances:
            backend = self.woob.backend_instances[backend_name]
371
            for cap in backend.iter_caps():
Romain Bignon's avatar
Romain Bignon committed
372
                func = 'obj_info_%s' % cap.__name__[3:].lower()
373
374
                if hasattr(self, func):
                    try:
375
376
377
                        for msg in getattr(self, func)(backend, _id):
                            yield msg
                    except Exception as e:
378
                        print(get_backtrace())
Romain Bignon's avatar
Romain Bignon committed
379
                        yield u'Oops: [%s] %s' % (type(e).__name__, e)
380
381
382
                    break

    def on_url(self, url):
383
        url = fixurl(url)
384
385
386
        try:
            content_type, hsize, title = BoobotBrowser().urlinfo(url)
            if title:
387
                yield u'URL: %s' % title
388
            elif hsize:
389
                yield u'URL (file): %s, %s' % (content_type, hsize)
390
            else:
391
                yield u'URL (file): %s' % content_type
392
        except BrowserUnavailable as e:
393
394
            yield u'URL (error): %s' % e
        except Exception as e:
395
            print(get_backtrace())
Romain Bignon's avatar
Romain Bignon committed
396
            yield u'Oops: [%s] %s' % (type(e).__name__, e)
Romain Bignon's avatar
Romain Bignon committed
397
398
399
400

    def obj_info_video(self, backend, id):
        v = backend.get_video(id)
        if v:
401
            yield u'Video: %s (%s)' % (v.title, v.duration)
Romain Bignon's avatar
Romain Bignon committed
402
403
404
405

    def obj_info_housing(self, backend, id):
        h = backend.get_housing(id)
        if h:
406
            yield u'Housing: %s (%sm² / %s%s)' % (h.title, h.area, h.cost, h.currency)
Romain Bignon's avatar
Romain Bignon committed
407

408

Romain Bignon's avatar
Romain Bignon committed
409
def main():
410
    logging.basicConfig(level=logging.DEBUG)
411
    bot = Boobot(IRC_CHANNELS, IRC_NICKNAME, IRC_SERVER)
Romain Bignon's avatar
Romain Bignon committed
412
413
414
415
416

    thread = MyThread(bot)
    thread.start()

    try:
Laurent Bachelier's avatar
Laurent Bachelier committed
417
        bot.start()
Romain Bignon's avatar
Romain Bignon committed
418
    except KeyboardInterrupt:
419
        print("Stopped.")
Romain Bignon's avatar
Romain Bignon committed
420
421
422

    thread.stop()

Laurent Bachelier's avatar
Laurent Bachelier committed
423

Romain Bignon's avatar
Romain Bignon committed
424
425
if __name__ == "__main__":
    sys.exit(main())