Parse gz reports

This commit is contained in:
David Baer
2020-05-22 21:59:41 -04:00
parent 9491d5d25d
commit 5ad87d6c02
3 changed files with 61 additions and 33 deletions

View File

@@ -4,9 +4,12 @@ import zipfile
from datetime import datetime
from email import message_from_file
from tempfile import TemporaryDirectory
from lxml.etree import parse as parse_xml
from dmarcreceiver.model import DBSession, Report, ReportError, ReportRecord, OverrideReason, DKIMResult, SPFResult, metadata, init_model
from lxml.etree import parse as parse_xml, tounicode as xml_tree_to_unicode
from dmarcreceiver.model import DBSession, Report, ReportXML, \
ReportError, ReportRecord, OverrideReason, DKIMResult, SPFResult, metadata, init_model
import transaction
import gzip
from io import BytesIO
from .util import sendmail, install_exception_handler
from .config import config
@@ -73,6 +76,8 @@ def parse_report(f):
scoop_elements(spf, spf_node, 'domain', 'result')
record.spf_results.append(spf)
report.records.append(record)
xml = xml_tree_to_unicode(tree)
report.original = ReportXML(xml=xml)
DBSession.add(report)
transaction.commit()
@@ -81,32 +86,7 @@ def read_config_if_present(args):
if os.access(args.config_file, os.R_OK):
config.read_file(open(args.config_file, 'rt'))
def receive_report(args):
read_config_if_present(args)
init_model()
# read email message from stdin
msg = message_from_file(sys.stdin)
# if not running on a tty, install email-based exception handler
if not sys.stderr.isatty():
install_exception_handler(msg)
# check for zip file
content_type = msg['content-type']
if content_type.find(';') != -1:
content_type = content_type.split(';',1)[0]
if content_type != 'application/zip':
# not a zip file - bounce to postmaster
bounce_address = config.get('bounce_address')
if args.bounce_address:
bounce_address = args.bounce_address
if bounce_address:
msg['To'] = bounce_address
sendmail(msg)
return
def extract_zip(msg):
with TemporaryDirectory(
prefix=os.path.splitext(os.path.basename(sys.argv[0]))[0] + '-'
) as tempdir:
@@ -121,7 +101,48 @@ def receive_report(args):
report_fn = os.path.join(tempdir, os.path.basename(namelist[0]))
z.extract(namelist[0], path=tempdir)
with open(report_fn, 'rb') as f:
parse_report(f)
data = f.read()
return data
def extract_gzip(msg):
return gzip.decompress(msg.get_payload(decode=True))
def receive_report(args):
read_config_if_present(args)
init_model()
# read email message from stdin
msg = message_from_file(sys.stdin)
# if not running on a tty, install email-based exception handler
if not sys.stderr.isatty():
install_exception_handler(msg)
xml_content = None
for part in msg.walk():
# check for zip file
content_type = part['content-type']
if content_type.find(';') != -1:
content_type = content_type.split(';',1)[0]
if content_type == 'application/zip':
xml_content = extract_zip(part)
break
elif content_type == 'application/gzip':
xml_content = extract_gzip(part)
break
if xml_content is None:
# not a zip file - bounce to postmaster
bounce_address = config.get('bounce_address')
if args.bounce_address:
bounce_address = args.bounce_address
if bounce_address:
msg['To'] = bounce_address
sendmail(msg)
return
parse_report(BytesIO(xml_content))
def init(args):
read_config_if_present(args)

View File

@@ -1,6 +1,7 @@
import zope.sqlalchemy as zsqla
from sqlalchemy import Column, Integer, String, Unicode, Enum, CheckConstraint, ForeignKey, DateTime, create_engine
from sqlalchemy.orm import scoped_session, sessionmaker, relationship
from sqlalchemy import Column, Integer, String, Unicode, \
UnicodeText, Enum, CheckConstraint, ForeignKey, DateTime, create_engine
from sqlalchemy.orm import scoped_session, sessionmaker, relationship, backref
import sqlalchemy.types as satypes
import sqlalchemy.dialects.postgresql as dpg
from sqlalchemy.ext.declarative import declarative_base
@@ -60,9 +61,15 @@ class Report(DeclarativeBase):
adkim = Column(Alignment, nullable=False)
aspf = Column(Alignment, nullable=False)
p = Column(Disposition, nullable=False)
sp = Column(Disposition, nullable=False)
sp = Column(Disposition)
pct = Column(Integer, CheckConstraint('pct >= 0 AND pct <= 100'), nullable=False)
class ReportXML(DeclarativeBase):
__tablename__ = 'report_xml'
report_id = Column(Integer, ForeignKey(Report.id, onupdate='CASCADE', ondelete='CASCADE'), nullable=False, primary_key=True)
xml = Column(UnicodeText, nullable=False)
report = relationship(Report, backref=backref('original', uselist=False), uselist=False)
class ReportError(DeclarativeBase):
__tablename__ = 'report_errors'
id = Column(Integer, primary_key=True)

View File

@@ -16,7 +16,7 @@ install_requires=[
setup(
name='DMARCReceiver',
version='1.1',
version='1.2',
description='Receive DMARC reports',
author='David Baer',
author_email='david@amyanddavid.net',