From 5ad87d6c0256b162efdf3623d475b9aea2227225 Mon Sep 17 00:00:00 2001 From: David Baer Date: Fri, 22 May 2020 21:59:41 -0400 Subject: [PATCH] Parse gz reports --- dmarcreceiver/commands.py | 79 +++++++++++++++++++++++++-------------- dmarcreceiver/model.py | 13 +++++-- setup.py | 2 +- 3 files changed, 61 insertions(+), 33 deletions(-) diff --git a/dmarcreceiver/commands.py b/dmarcreceiver/commands.py index 86ec7b4..4aee49d 100644 --- a/dmarcreceiver/commands.py +++ b/dmarcreceiver/commands.py @@ -4,9 +4,12 @@ import zipfile from datetime import datetime from email import message_from_file from tempfile import TemporaryDirectory -from lxml.etree import parse as parse_xml -from dmarcreceiver.model import DBSession, Report, ReportError, ReportRecord, OverrideReason, DKIMResult, SPFResult, metadata, init_model +from lxml.etree import parse as parse_xml, tounicode as xml_tree_to_unicode +from dmarcreceiver.model import DBSession, Report, ReportXML, \ + ReportError, ReportRecord, OverrideReason, DKIMResult, SPFResult, metadata, init_model import transaction +import gzip +from io import BytesIO from .util import sendmail, install_exception_handler from .config import config @@ -73,6 +76,8 @@ def parse_report(f): scoop_elements(spf, spf_node, 'domain', 'result') record.spf_results.append(spf) report.records.append(record) + xml = xml_tree_to_unicode(tree) + report.original = ReportXML(xml=xml) DBSession.add(report) transaction.commit() @@ -81,32 +86,7 @@ def read_config_if_present(args): if os.access(args.config_file, os.R_OK): config.read_file(open(args.config_file, 'rt')) -def receive_report(args): - read_config_if_present(args) - init_model() - - # read email message from stdin - msg = message_from_file(sys.stdin) - - # if not running on a tty, install email-based exception handler - if not sys.stderr.isatty(): - install_exception_handler(msg) - - # check for zip file - content_type = msg['content-type'] - if content_type.find(';') != -1: - content_type = content_type.split(';',1)[0] - - if content_type != 'application/zip': - # not a zip file - bounce to postmaster - bounce_address = config.get('bounce_address') - if args.bounce_address: - bounce_address = args.bounce_address - if bounce_address: - msg['To'] = bounce_address - sendmail(msg) - return - +def extract_zip(msg): with TemporaryDirectory( prefix=os.path.splitext(os.path.basename(sys.argv[0]))[0] + '-' ) as tempdir: @@ -121,7 +101,48 @@ def receive_report(args): report_fn = os.path.join(tempdir, os.path.basename(namelist[0])) z.extract(namelist[0], path=tempdir) with open(report_fn, 'rb') as f: - parse_report(f) + data = f.read() + return data + +def extract_gzip(msg): + return gzip.decompress(msg.get_payload(decode=True)) + +def receive_report(args): + read_config_if_present(args) + init_model() + + # read email message from stdin + msg = message_from_file(sys.stdin) + + # if not running on a tty, install email-based exception handler + if not sys.stderr.isatty(): + install_exception_handler(msg) + + xml_content = None + for part in msg.walk(): + # check for zip file + content_type = part['content-type'] + if content_type.find(';') != -1: + content_type = content_type.split(';',1)[0] + + if content_type == 'application/zip': + xml_content = extract_zip(part) + break + elif content_type == 'application/gzip': + xml_content = extract_gzip(part) + break + + if xml_content is None: + # not a zip file - bounce to postmaster + bounce_address = config.get('bounce_address') + if args.bounce_address: + bounce_address = args.bounce_address + if bounce_address: + msg['To'] = bounce_address + sendmail(msg) + return + + parse_report(BytesIO(xml_content)) def init(args): read_config_if_present(args) diff --git a/dmarcreceiver/model.py b/dmarcreceiver/model.py index 132ea44..cd4c1eb 100644 --- a/dmarcreceiver/model.py +++ b/dmarcreceiver/model.py @@ -1,6 +1,7 @@ import zope.sqlalchemy as zsqla -from sqlalchemy import Column, Integer, String, Unicode, Enum, CheckConstraint, ForeignKey, DateTime, create_engine -from sqlalchemy.orm import scoped_session, sessionmaker, relationship +from sqlalchemy import Column, Integer, String, Unicode, \ + UnicodeText, Enum, CheckConstraint, ForeignKey, DateTime, create_engine +from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref import sqlalchemy.types as satypes import sqlalchemy.dialects.postgresql as dpg from sqlalchemy.ext.declarative import declarative_base @@ -60,9 +61,15 @@ class Report(DeclarativeBase): adkim = Column(Alignment, nullable=False) aspf = Column(Alignment, nullable=False) p = Column(Disposition, nullable=False) - sp = Column(Disposition, nullable=False) + sp = Column(Disposition) pct = Column(Integer, CheckConstraint('pct >= 0 AND pct <= 100'), nullable=False) +class ReportXML(DeclarativeBase): + __tablename__ = 'report_xml' + report_id = Column(Integer, ForeignKey(Report.id, onupdate='CASCADE', ondelete='CASCADE'), nullable=False, primary_key=True) + xml = Column(UnicodeText, nullable=False) + report = relationship(Report, backref=backref('original', uselist=False), uselist=False) + class ReportError(DeclarativeBase): __tablename__ = 'report_errors' id = Column(Integer, primary_key=True) diff --git a/setup.py b/setup.py index eac0c5d..fe9034c 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ install_requires=[ setup( name='DMARCReceiver', - version='1.1', + version='1.2', description='Receive DMARC reports', author='David Baer', author_email='david@amyanddavid.net',