hyperkitty_import aborts when message_from_bytes fails (1.3.3)
It should report the id of the mail and skip it instead of aborting as shown in the example below. UnicodeError is not the only kind of exception that can happen.
A patch in the spirit of this should be enough, don't you think ?
diff --git a/hyperkitty/management/commands/hyperkitty_import.py b/hyperkitty/management/commands/hyperkitty_import.py
index ff00392..29d42d2 100644
--- a/hyperkitty/management/commands/hyperkitty_import.py
+++ b/hyperkitty/management/commands/hyperkitty_import.py
@@ -152,11 +152,16 @@ class DbImporter(object):
for msg in mbox:
# FIXME: this converts mailbox.mboxMessage to
# email.message.EmailMessage
msg_raw = msg.as_bytes(unixfrom=False)
unixfrom = msg.get_from()
try:
message = message_from_bytes(msg_raw, policy=policy.default)
- except UnicodeError as e:
+ except e:
self.stderr.write('Failed to convert {} to '
'email.message.Message\n {}'.format(
unquote(msg["Message-Id"]), e))
Traceback (most recent call last):
File "manage.py", line 10, in <module>
execute_from_command_line(sys.argv)
File "/home/debian/venv-hyperkitty/lib/python3.7/site-packages/django/core/management/__init__.py", line 401, in execute_from_command_line
utility.execute()
File "/home/debian/venv-hyperkitty/lib/python3.7/site-packages/django/core/management/__init__.py", line 395, in execute
self.fetch_command(subcommand).run_from_argv(self.argv)
File "/home/debian/venv-hyperkitty/lib/python3.7/site-packages/django/core/management/base.py", line 328, in run_from_argv
self.execute(*args, **cmd_options)
File "/home/debian/venv-hyperkitty/lib/python3.7/site-packages/django/core/management/base.py", line 369, in execute
output = self.handle(*args, **options)
File "/home/debian/hyperkitty/hyperkitty/management/commands/hyperkitty_import.py", line 333, in handle
importer.from_mbox(mbfile)
File "/home/debian/hyperkitty/hyperkitty/management/commands/hyperkitty_import.py", line 163, in from_mbox
message = message_from_bytes(msg_raw, policy=policy.default)
File "/usr/lib/python3.7/email/__init__.py", line 46, in message_from_bytes
return BytesParser(*args, **kws).parsebytes(s)
File "/usr/lib/python3.7/email/parser.py", line 124, in parsebytes
return self.parser.parsestr(text, headersonly)
File "/usr/lib/python3.7/email/parser.py", line 68, in parsestr
return self.parse(StringIO(text), headersonly=headersonly)
File "/usr/lib/python3.7/email/parser.py", line 57, in parse
feedparser.feed(data)
File "/usr/lib/python3.7/email/feedparser.py", line 176, in feed
self._call_parse()
File "/usr/lib/python3.7/email/feedparser.py", line 180, in _call_parse
self._parse()
File "/usr/lib/python3.7/email/feedparser.py", line 385, in _parsegen
for retval in self._parsegen():
File "/usr/lib/python3.7/email/feedparser.py", line 256, in _parsegen
if self._cur.get_content_type() == 'message/delivery-status':
File "/usr/lib/python3.7/email/message.py", line 578, in get_content_type
value = self.get('content-type', missing)
File "/usr/lib/python3.7/email/message.py", line 471, in get
return self.policy.header_fetch_parse(k, v)
File "/usr/lib/python3.7/email/policy.py", line 162, in header_fetch_parse
return self.header_factory(name, value)
File "/usr/lib/python3.7/email/headerregistry.py", line 589, in __call__
return self[name](name, value)
File "/usr/lib/python3.7/email/headerregistry.py", line 197, in __new__
cls.parse(value, kwds)
File "/usr/lib/python3.7/email/headerregistry.py", line 446, in parse
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
File "/usr/lib/python3.7/email/_header_value_parser.py", line 2504, in parse_content_type_header
ctype.append(parse_mime_parameters(value[1:]))
File "/usr/lib/python3.7/email/_header_value_parser.py", line 2413, in parse_mime_parameters
token, value = get_invalid_parameter(value)
File "/usr/lib/python3.7/email/_header_value_parser.py", line 2063, in get_invalid_parameter
token, value = get_phrase(value)
File "/usr/lib/python3.7/email/_header_value_parser.py", line 1377, in get_phrase
token, value = get_word(value)
File "/usr/lib/python3.7/email/_header_value_parser.py", line 1340, in get_word
token, value = get_quoted_string(value)
File "/usr/lib/python3.7/email/_header_value_parser.py", line 1241, in get_quoted_string
token, value = get_bare_quoted_string(value)
File "/usr/lib/python3.7/email/_header_value_parser.py", line 1170, in get_bare_quoted_string
if value[0] == '"':
IndexError: string index out of range
Edited by Loïc Dachary