summaryrefslogtreecommitdiff
path: root/get_zammad_ham_spam.py
blob: 1690fff547f27a86e9e70ecc46d4a172d6ca274a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3

# apt install python3-psycopg2

import subprocess
import datetime
import psycopg2

# DB settings
DB_HOST = "DBHOST"
DB_USER = "rspamd"
DB_PASS = "DBPASSWORD"
DB_NAME = "zammad"

# log file for spam subject logging
SUBJECT_LOG = None

# consider tickets closed in the last X days for learning
NEW_MAIL_TIME_PERIOD = 7

VALID_DESTINATIONS = [
    b'ticketsystem@hostname'
]


def send_to_rspamd(ticketid, title, spamflag, maildata):
    # send the mail to rspamc to process as ham/spam
    with subprocess.Popen(['/usr/bin/rspamc', f"learn_{spamflag}"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as rspamd:
        stdout, stderr = rspamd.communicate(input=maildata)
        if len(stdout) > 0:
            print(f"Ticket {ticketid}: {spamflag}\nSubject: {title}\nstdout: {stdout.decode('utf-8')}")
        if len(stderr) > 0:
            print(f"Ticket {ticketid}: {spamflag}\nSubject: {title}\nstderr: {stderr.decode('utf-8')}")


# Send mails from zammad to rspamc to learn already processed mails
if __name__ == '__main__':
    conn = psycopg2.connect(f"dbname={DB_NAME} host={DB_HOST} user={DB_USER} password={DB_PASS}")
    cur = conn.cursor()
    # get starting position, one week ago
    last_week = (datetime.datetime.now() - datetime.timedelta(days=NEW_MAIL_TIME_PERIOD)).date()

    # fetch all current tickets, which are in state closed and take its' first article
    cur.execute("SELECT tickets.id AS ticket, tickets.title AS title, tags.tag_item_id AS flag, MIN(ticket_articles.id) AS article \
            FROM tickets LEFT JOIN tags ON tags.o_id=tickets.id \
            LEFT JOIN ticket_articles ON tickets.id=ticket_articles.ticket_id \
            WHERE tickets.state_id=4 \
            AND (tags.tag_item_id=1 OR tags.tag_item_id IS NULL) \
            AND tickets.close_at>=%s \
            GROUP BY tickets.id, tags.tag_item_id", (last_week.strftime('%Y-%m-%d'),))
    tickets = cur.fetchall()

    for ticketrow in tickets:
        ticket, title, flag, article = ticketrow
        # fetch the associated mail file from db, there should only be one with store_object_id=2 (Ticket::Article::Mail)
        cur.execute("SELECT stores.filename AS filename, store_provider_dbs.data AS data \
                FROM stores LEFT JOIN store_provider_dbs ON store_provider_dbs.id=store_file_id \
                WHERE stores.store_object_id=2 \
                AND stores.o_id=%s", (str(article),))
        mail = cur.fetchall()
        if len(mail) != 1:
            print(f"ERROR: did find more than one mail in ticket {ticket}")
            continue

        filename, data = mail[0]
        mailbytes = data.tobytes()

        # safety check
        has_been_delivered_to_me = False
        for dest in VALID_DESTINATIONS:
            if b'Delivered-To: ' + dest in mailbytes:
                has_been_delivered_to_me = True
                break

        if not has_been_delivered_to_me:
            print(f"ERROR: mail for ticket {ticket} does not contain Delivered-To header field")
        elif flag == 1:
            send_to_rspamd(ticket, title, 'spam', mailbytes)
            if SUBJECT_LOG is not None and len(SUBJECT_LOG) > 0:
                with open(SUBJECT_LOG, 'a+') as subject_log:
                    subject_log.write(f"{title}\n")
        elif flag is None:
            send_to_rspamd(ticket, title, 'ham', mailbytes)
        else:
            print(f"ERROR: mail flag in ticket {ticket} undefined: {flag}")

    cur.close()
    conn.close()