Commit 724b7cee authored by Barry Warsaw's avatar Barry Warsaw

Mailing lists can now have their own header matching rules, although

site-defined rules still take precedence.  Importing a Mailman 2.1 list with
header matching rules defined will create them in Mailman 3, albeit with a few
unsupported corner cases.  Definition of new header matching rules is not yet
exposed through the REST API.  Given by Aurélien Bompard.

Code cleaning pass by Barry Warsaw.

Closes !42
parents 49d17bc0 5104e712
Pipeline #196012 failed with stage
......@@ -37,22 +37,29 @@ log = logging.getLogger('mailman.error')
def make_link(header, pattern):
def make_link(header, pattern, chain=None):
"""Create a Link object.
The link action is always to defer, since at the end of all the header
checks, we'll jump to the chain defined in the configuration file, should
any of them have matched.
The link action is to defer by default, since at the end of all the
header checks, we'll jump to the chain defined in the configuration
file, should any of them have matched. However, it is possible to
create a link which jumps to a specific chain.
:param header: The email header name to check, e.g. X-Spam.
:type header: string
:param pattern: A regular expression for matching the header value.
:type pattern: string
:param chain: When given, this is the chain to jump to if the
pattern matches the header.
:type chain: string
:return: The link representing this rule check.
:rtype: `ILink`
"""
rule = HeaderMatchRule(header, pattern)
return Link(rule, LinkAction.defer)
if chain is None:
return Link(rule)
chain = config.chains[chain]
return Link(rule, LinkAction.jump, chain)
......@@ -132,17 +139,18 @@ class HeaderMatchChain(Chain):
parts = line.split(':', 1)
if len(parts) != 2:
log.error('Configuration error: [antispam]header_checks '
'contains bogus line: {0}'.format(line))
'contains bogus line: {}'.format(line))
continue
yield make_link(parts[0], parts[1].lstrip())
# Then return all the list-specific header matches.
# Python 3.3: Use 'yield from'
for entry in mlist.header_matches:
yield make_link(*entry)
# Then return all the explicitly added links.
for link in self._extended_links:
yield link
# Finally, if any of the above rules matched, jump to the chain
# defined in the configuration file.
yield Link(config.rules['any'], LinkAction.jump,
yield from self._extended_links
# If any of the above rules matched, they will have deferred their
# action until now, so jump to the chain defined in the configuration
# file. For security considerations, this takes precedence over
# list-specific matches.
yield Link(config.rules['any'],
LinkAction.jump,
config.chains[config.antispam.jump_chain])
# Then return all the list-specific header matches.
for entry in mlist.header_matches:
yield make_link(entry.header, entry.pattern, entry.chain)
......@@ -25,12 +25,16 @@ __all__ = [
import unittest
from mailman.app.lifecycle import create_list
from mailman.chains.headers import HeaderMatchRule
from mailman.chains.headers import HeaderMatchRule, make_link
from mailman.config import config
from mailman.core.chains import process
from mailman.email.message import Message
from mailman.interfaces.chain import LinkAction
from mailman.interfaces.chain import LinkAction, HoldEvent
from mailman.interfaces.mailinglist import IHeaderMatchSet
from mailman.testing.helpers import (
LogFileMark, configuration, event_subscribers,
specialized_message_from_string as mfs)
from mailman.testing.layers import ConfigLayer
from mailman.testing.helpers import LogFileMark, configuration
......@@ -42,6 +46,24 @@ class TestHeaderChain(unittest.TestCase):
def setUp(self):
self._mlist = create_list('[email protected]')
def test_make_link(self):
# Test that make_link() with no given chain creates a Link with a
# deferred link action.
link = make_link('Subject', '[tT]esting')
self.assertEqual(link.rule.header, 'Subject')
self.assertEqual(link.rule.pattern, '[tT]esting')
self.assertEqual(link.action, LinkAction.defer)
self.assertIsNone(link.chain)
def test_make_link_with_chain(self):
# Test that make_link() with a given chain creates a Link with a jump
# action to the chain.
link = make_link('Subject', '[tT]esting', 'accept')
self.assertEqual(link.rule.header, 'Subject')
self.assertEqual(link.rule.pattern, '[tT]esting')
self.assertEqual(link.action, LinkAction.jump)
self.assertEqual(link.chain, config.chains['accept'])
@configuration('antispam', header_checks="""
Foo: a+
Bar: bb?
......@@ -119,3 +141,66 @@ class TestHeaderChain(unittest.TestCase):
HeaderMatchRule, 'x-spam-score', '.*')
finally:
config.rules = saved_rules
def test_list_rule(self):
# Test that the header-match chain has the header checks from the
# mailing-list configuration.
chain = config.chains['header-match']
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('Foo', 'a+')
links = [link for link in chain.get_links(self._mlist, Message(), {})
if link.rule.name != 'any']
self.assertEqual(len(links), 1)
self.assertEqual(links[0].action, LinkAction.defer)
self.assertEqual(links[0].rule.header, 'foo')
self.assertEqual(links[0].rule.pattern, 'a+')
def test_list_complex_rule(self):
# Test that the mailing-list header-match complex rules are read
# properly.
chain = config.chains['header-match']
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('Foo', 'a+', 'reject')
header_matches.add('Bar', 'b+', 'discard')
header_matches.add('Baz', 'z+', 'accept')
links = [link for link in chain.get_links(self._mlist, Message(), {})
if link.rule.name != 'any']
self.assertEqual(len(links), 3)
self.assertEqual([
(link.rule.header, link.rule.pattern, link.action, link.chain.name)
for link in links
],
[('foo', 'a+', LinkAction.jump, 'reject'),
('bar', 'b+', LinkAction.jump, 'discard'),
('baz', 'z+', LinkAction.jump, 'accept'),
])
@configuration('antispam', header_checks="""
Foo: foo
""", jump_chain='hold')
def test_priority_site_over_list(self):
# Test that the site-wide checks take precedence over the list-specific
# checks.
msg = mfs("""\
From: [email protected]
To: [email protected]
Subject: A message
Message-ID: <ant>
Foo: foo
MIME-Version: 1.0
A message body.
""")
msgdata = {}
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('Foo', 'foo', 'accept')
# This event subscriber records the event that occurs when the message
# is processed by the owner chain.
events = []
with event_subscribers(events.append):
process(self._mlist, msg, msgdata, start_chain='header-match')
self.assertEqual(len(events), 1)
event = events[0]
# Site-wide wants to hold the message, the list wants to accept it.
self.assertTrue(isinstance(event, HoldEvent))
self.assertEqual(event.chain, config.chains['hold'])
......@@ -34,6 +34,12 @@
factory="mailman.model.mailinglist.ListArchiverSet"
/>
<adapter
for="mailman.interfaces.mailinglist.IMailingList"
provides="mailman.interfaces.mailinglist.IHeaderMatchSet"
factory="mailman.model.mailinglist.HeaderMatchSet"
/>
<adapter
for="mailman.interfaces.mailinglist.IMailingList"
provides="mailman.interfaces.requests.IListRequests"
......
"""header_matches
Revision ID: 42756496720
Revises: 2bb9b382198
Create Date: 2015-09-11 10:11:38.310315
"""
# revision identifiers, used by Alembic.
revision = '42756496720'
down_revision = '2bb9b382198'
import sqlalchemy as sa
from alembic import op
from mailman.database.helpers import is_sqlite, exists_in_db
def upgrade():
# Create the new table
header_match_table = op.create_table(
'headermatch',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('mailing_list_id', sa.Integer(), nullable=True),
sa.Column('header', sa.Unicode(), nullable=False),
sa.Column('pattern', sa.Unicode(), nullable=False),
sa.Column('chain', sa.Unicode(), nullable=True),
sa.ForeignKeyConstraint(['mailing_list_id'], ['mailinglist.id'], ),
sa.PrimaryKeyConstraint('id')
)
# Now migrate the data. It can't be offline because we need to read the
# pickles.
connection = op.get_bind()
# Don't import the table definition from the models, it may break this
# migration when the model is updated in the future (see the Alembic doc).
mlist_table = sa.sql.table(
'mailinglist',
sa.sql.column('id', sa.Integer),
sa.sql.column('header_matches', sa.PickleType)
)
for mlist_id, old_matches in connection.execute(mlist_table.select()):
for old_match in old_matches:
connection.execute(header_match_table.insert().values(
mailing_list_id=mlist_id,
header=old_match[0],
pattern=old_match[1],
chain=None
))
# Now that data is migrated, drop the old column (except on SQLite which
# does not support this)
if not is_sqlite(connection):
op.drop_column('mailinglist', 'header_matches')
def downgrade():
if not exists_in_db(op.get_bind(), 'mailinglist', 'header_matches'):
# SQLite will not have deleted the former column, since it does not
# support column deletion.
op.add_column(
'mailinglist',
sa.Column('header_matches', sa.PickleType, nullable=True))
# Now migrate the data. It can't be offline because we need to read the
# pickles.
connection = op.get_bind()
# Don't import the table definition from the models, it may break this
# migration when the model is updated in the future (see the Alembic doc).
mlist_table = sa.sql.table(
'mailinglist',
sa.sql.column('id', sa.Integer),
sa.sql.column('header_matches', sa.PickleType)
)
header_match_table = sa.sql.table(
'headermatch',
sa.sql.column('mailing_list_id', sa.Integer),
sa.sql.column('header', sa.Unicode),
sa.sql.column('pattern', sa.Unicode),
)
for mlist_id, header, pattern in connection.execute(
header_match_table.select()).fetchall():
mlist = connection.execute(mlist_table.select().where(
mlist_table.c.id == mlist_id)).fetchone()
header_matches = mlist['header_matches']
if not header_matches:
header_matches = []
header_matches.append((header, pattern))
connection.execute(mlist_table.update().where(
mlist_table.c.id == mlist_id).values(
header_matches=header_matches))
op.drop_table('headermatch')
......@@ -28,6 +28,7 @@ import sqlalchemy as sa
from mailman.config import config
from mailman.database.alembic import alembic_cfg
from mailman.database.helpers import exists_in_db
from mailman.database.model import Model
from mailman.testing.layers import ConfigLayer
......@@ -41,17 +42,61 @@ class TestMigrations(unittest.TestCase):
def tearDown(self):
# Drop and restore a virgin database.
config.db.store.rollback()
md = sa.MetaData(bind=config.db.engine)
md.reflect()
# We have circular dependencies between user and address, thus we can't
# use drop_all() without getting a warning. Setting use_alter to True
# on the foreign keys helps SQLAlchemy mark those loops as known.
for tablename in ('user', 'address'):
if tablename not in md.tables:
continue
for fk in md.tables[tablename].foreign_keys:
fk.constraint.use_alter = True
md.drop_all()
Model.metadata.create_all(config.db.engine)
def test_all_migrations(self):
script_dir = alembic.script.ScriptDirectory.from_config(alembic_cfg)
revisions = [sc.revision for sc in
script_dir.walk_revisions('base', 'heads')]
revisions = [sc.revision for sc in script_dir.walk_revisions()]
for revision in revisions:
alembic.command.downgrade(alembic_cfg, revision)
revisions.reverse()
for revision in revisions:
alembic.command.upgrade(alembic_cfg, revision)
def test_42756496720_header_matches(self):
test_header_matches = [
('test-header-1', 'test-pattern-1'),
('test-header-2', 'test-pattern-2'),
('test-header-3', 'test-pattern-3'),
]
mlist_table = sa.sql.table(
'mailinglist',
sa.sql.column('id', sa.Integer),
sa.sql.column('header_matches', sa.PickleType)
)
header_match_table = sa.sql.table(
'headermatch',
sa.sql.column('mailing_list_id', sa.Integer),
sa.sql.column('header', sa.Unicode),
sa.sql.column('pattern', sa.Unicode),
)
# Downgrading.
config.db.store.execute(mlist_table.insert().values(id=1))
config.db.store.execute(header_match_table.insert().values(
[{'mailing_list_id': 1, 'header': hm[0], 'pattern': hm[1]}
for hm in test_header_matches]))
config.db.store.commit()
alembic.command.downgrade(alembic_cfg, '2bb9b382198')
results = config.db.store.execute(
mlist_table.select()).fetchall()
self.assertEqual(results[0].header_matches, test_header_matches)
self.assertFalse(exists_in_db(config.db.engine, 'headermatch'))
config.db.store.commit()
# Upgrading.
alembic.command.upgrade(alembic_cfg, '42756496720')
results = config.db.store.execute(
header_match_table.select()).fetchall()
self.assertEqual(results,
[(1, hm[0], hm[1]) for hm in test_header_matches])
......@@ -43,6 +43,11 @@ Bugs
Configuration
-------------
* Mailing lists can now have their own header matching rules, although
site-defined rules still take precedence. Importing a Mailman 2.1 list
with header matching rules defined will create them in Mailman 3, albeit
with a few unsupported corner cases. Definition of new header matching
rules is not yet exposed through the REST API. Given by Aurélien Bompard.
* The default languages from Mailman 2.1 have been ported over. Given by
Aurélien Bompard.
......
......@@ -20,6 +20,7 @@
__all__ = [
'IAcceptableAlias',
'IAcceptableAliasSet',
'IHeaderMatch',
'IListArchiver',
'IListArchiverSet',
'IMailingList',
......@@ -839,3 +840,61 @@ class IListArchiverSet(Interface):
:return: the matching `IListArchiver` or None if the named archiver
does not exist.
"""
class IHeaderMatch(Interface):
"""A mailing list-specific message header matching rule."""
mailing_list = Attribute(
"""The mailing list for the header match.""")
header = Attribute(
"""The email header that will be checked.""")
pattern = Attribute(
"""The regular expression to match.""")
chain = Attribute(
"""The chain to jump to on a match.
If it is None, the `[antispam]jump_chain` action in the configuration
file is used.
""")
class IHeaderMatchSet(Interface):
"""The set of header matching rules for a mailing list."""
def clear():
"""Clear the set of header matching rules."""
def add(header, pattern, chain=None):
"""Add the given header matching rule to this mailing list's set.
:param header: The email header to filter on. It will be converted to
lower case for consistency.
:type header: string
:param pattern: The regular expression to use.
:type pattern: string
:param chain: The chain to jump to, or None to use the site-wide
configuration. Defaults to None.
:type chain: string or None
:raises ValueError: if the header/pattern pair already exists for this
mailing list.
"""
def remove(header, pattern):
"""Remove the given header matching rule from this mailing list's set.
:param header: The email header part of the rule to be removed.
:type header: string
:param pattern: The regular expression part of the rule to be removed.
:type pattern: string
"""
def __iter__():
"""An iterator over all the IHeaderMatches defined in this set.
:return: iterator over `IHeaderMatch`.
"""
......@@ -37,8 +37,9 @@ from mailman.interfaces.digests import DigestFrequency
from mailman.interfaces.domain import IDomainManager
from mailman.interfaces.languages import ILanguageManager
from mailman.interfaces.mailinglist import (
IAcceptableAlias, IAcceptableAliasSet, IListArchiver, IListArchiverSet,
IMailingList, Personalization, ReplyToMunging, SubscriptionPolicy)
IAcceptableAlias, IAcceptableAliasSet, IHeaderMatch, IHeaderMatchSet,
IListArchiver, IListArchiverSet, IMailingList, Personalization,
ReplyToMunging, SubscriptionPolicy)
from mailman.interfaces.member import (
AlreadySubscribedError, MemberRole, MissingPreferredAddressError,
SubscriptionEvent)
......@@ -57,6 +58,7 @@ from sqlalchemy import (
LargeBinary, PickleType, Unicode)
from sqlalchemy.event import listen
from sqlalchemy.orm import relationship
from sqlalchemy.orm.exc import NoResultFound
from urllib.parse import urljoin
from zope.component import getUtility
from zope.event import notify
......@@ -149,7 +151,6 @@ class MailingList(Model):
gateway_to_mail = Column(Boolean)
gateway_to_news = Column(Boolean)
goodbye_message_uri = Column(Unicode)
header_matches = Column(PickleType)
header_uri = Column(Unicode)
hold_these_nonmembers = Column(PickleType)
info = Column(Unicode)
......@@ -621,3 +622,70 @@ class ListArchiverSet:
return store.query(ListArchiver).filter(
ListArchiver.mailing_list == self._mailing_list,
ListArchiver.name == archiver_name).first()
@implementer(IHeaderMatch)
class HeaderMatch(Model):
"""See `IHeaderMatch`."""
__tablename__ = 'headermatch'
id = Column(Integer, primary_key=True)
mailing_list_id = Column(Integer, ForeignKey('mailinglist.id'))
mailing_list = relationship('MailingList', backref='header_matches')
header = Column(Unicode)
pattern = Column(Unicode)
chain = Column(Unicode, nullable=True)
@implementer(IHeaderMatchSet)
class HeaderMatchSet:
"""See `IHeaderMatchSet`."""
def __init__(self, mailing_list):
self._mailing_list = mailing_list
@dbconnection
def clear(self, store):
"""See `IHeaderMatchSet`."""
store.query(HeaderMatch).filter(
HeaderMatch.mailing_list == self._mailing_list).delete()
@dbconnection
def add(self, store, header, pattern, chain=None):
header = header.lower()
existing = store.query(HeaderMatch).filter(
HeaderMatch.mailing_list == self._mailing_list,
HeaderMatch.header == header,
HeaderMatch.pattern == pattern).count()
if existing > 0:
raise ValueError('Pattern already exists')
header_match = HeaderMatch(
mailing_list=self._mailing_list,
header=header, pattern=pattern, chain=chain)
store.add(header_match)
@dbconnection
def remove(self, store, header, pattern):
header = header.lower()
# Don't just filter and use delete(), or the MailingList.header_matches
# collection will not be updated:
# http://docs.sqlalchemy.org/en/rel_1_0/orm/collections.html#dynamic-relationship-loaders
try:
existing = store.query(HeaderMatch).filter(
HeaderMatch.mailing_list == self._mailing_list,
HeaderMatch.header == header,
HeaderMatch.pattern == pattern).one()
except NoResultFound:
raise ValueError('Pattern does not exist')
else:
self._mailing_list.header_matches.remove(existing)
@dbconnection
def __iter__(self, store):
yield from store.query(HeaderMatch).filter(
HeaderMatch.mailing_list == self._mailing_list)
......@@ -32,7 +32,7 @@ from mailman.config import config
from mailman.database.transaction import transaction
from mailman.interfaces.listmanager import IListManager
from mailman.interfaces.mailinglist import (
IAcceptableAliasSet, IListArchiverSet)
IAcceptableAliasSet, IHeaderMatchSet, IListArchiverSet)
from mailman.interfaces.member import (
AlreadySubscribedError, MemberRole, MissingPreferredAddressError)
from mailman.interfaces.usermanager import IUserManager
......@@ -163,3 +163,58 @@ class TestAcceptableAliases(unittest.TestCase):
self.assertEqual(['[email protected]'], list(alias_set.aliases))
getUtility(IListManager).delete(self._mlist)
self.assertEqual(len(list(alias_set.aliases)), 0)
class TestHeaderMatch(unittest.TestCase):
layer = ConfigLayer
def setUp(self):
self._mlist = create_list('[email protected]')
def test_lowercase_header(self):
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('Header', 'pattern')
self.assertEqual(len(self._mlist.header_matches), 1)
self.assertEqual(self._mlist.header_matches[0].header, 'header')
def test_chain_defaults_to_none(self):
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('header', 'pattern')
self.assertEqual(len(self._mlist.header_matches), 1)
self.assertEqual(self._mlist.header_matches[0].chain, None)
def test_duplicate(self):
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('Header', 'pattern')
self.assertRaises(
ValueError, header_matches.add, 'Header', 'pattern')
self.assertEqual(len(self._mlist.header_matches), 1)
def test_remove_non_existent(self):
header_matches = IHeaderMatchSet(self._mlist)
self.assertRaises(
ValueError, header_matches.remove, 'header', 'pattern')
def test_add_remove(self):
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('header', 'pattern')
self.assertEqual(len(self._mlist.header_matches), 1)
header_matches.remove('header', 'pattern')
self.assertEqual(len(self._mlist.header_matches), 0)
def test_iterator(self):
header_matches = IHeaderMatchSet(self._mlist)
header_matches.add('Header', 'pattern')
header_matches.add('Subject', 'patt.*')
header_matches.add('From', '.*@example.com', 'discard')
header_matches.add('From', '.*@example.org', 'accept')
matches = sorted((match.header, match.pattern, match.chain)
for match in IHeaderMatchSet(self._mlist))
self.assertEqual(
matches,
[('from', '.*@example.com', 'discard'),
('from', '.*@example.org', 'accept'),
('header', 'pattern', None),
('subject', 'patt.*', None),
])
......@@ -119,13 +119,21 @@ List-specific header matching
=============================
Each mailing list can also be configured with a set of header matching regular
expression rules. These are used to impose list-specific header filtering
with the same semantics as the global `[antispam]` section.
expression rules. These can be used to impose list-specific header filtering
with the same semantics as the global ``[antispam]`` section, or to have a
different action.
To follow the global antispam action, the header match rule must not specify a
``chain`` to jump to. If the default antispam action is changed in the
configuration file and Mailman is restarted, those rules will get the new jump
action.
The list administrator wants to match not on four stars, but on three plus
signs, but only for the current mailing list.
>>> mlist.header_matches = [('x-spam-score', '[+]{3,}')]
>>> from mailman.interfaces.mailinglist import IHeaderMatchSet
>>> header_matches = IHeaderMatchSet(mlist)
>>> header_matches.add('x-spam-score', '[+]{3,}')
A message with a spam score of two pluses does not match.
......@@ -139,8 +147,8 @@ A message with a spam score of two pluses does not match.
x-spam-score: [+]{3,}
But a message with a spam score of three pluses does match. Because a message
with the previous Message-Id is already in the moderation queue, we need to
give this message a new Message-Id.
with the previous ``Message-Id`` is already in the moderation queue, we need
to give this message a new ``Message-Id``.
>>> msgdata = {}
>>> del msg['x-spam-score']
......@@ -165,3 +173,25 @@ As does a message with a spam score of four pluses.
Rule hits:
x-spam-score: [+]{3,}
No rules missed
Now, the list administrator wants to match on three plus signs, but wants
those emails to be discarded instead of held.
>>> header_matches.remove('x-spam-score', '[+]{3,}')
>>> header_matches.add('x-spam-score', '[+]{3,}', 'discard')
A message with a spam score of three pluses will still match, and the message
will be discarded.