summaryrefslogtreecommitdiffstats
path: root/meta/lib/patchtest/mbox.py
blob: 1d95819b7aee5daa879eac0710827b45b2acfec3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#! /usr/bin/env python3

# series.py
#
# Read a series' mbox file and get information about the patches
# contained
#
# Copyright (C) 2024 BayLibre SAS
#
# SPDX-License-Identifier: GPL-2.0-only
#

import email
import re

# From: https://stackoverflow.com/questions/59681461/read-a-big-mbox-file-with-python
class MboxReader:
    def __init__(self, filepath):
        self.handle = open(filepath, 'rb')
        assert self.handle.readline().startswith(b'From ')

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        self.handle.close()

    def __iter__(self):
        return iter(self.__next__())

    def __next__(self):
        lines = []
        while True:
            line = self.handle.readline()
            if line == b'' or line.startswith(b'From '):
                yield email.message_from_bytes(b''.join(lines))
                if line == b'':
                    break
                lines = []
                continue
            lines.append(line)

class Patch:
    def __init__(self, data):
        self.author = data['From']
        self.to = data['To']
        self.cc = data['Cc']
        self.subject = data['Subject']
        self.split_body = re.split('---', data.get_payload(), maxsplit=1)
        self.commit_message = self.split_body[0]
        self.diff = self.split_body[1]

class PatchSeries:
    def __init__(self, filepath):
        with MboxReader(filepath) as mbox:
            self.patches = [Patch(message) for message in mbox]

        assert self.patches
        self.patch_count = len(self.patches)
        self.path = filepath

        @property
        def path(self):
            return self.path

        self.branch = self.get_branch()

    def get_branch(self):
        fullprefix = ""
        pattern = re.compile(r"(\[.*\])", re.DOTALL)

        # There should be at least one patch in the series and it should
        # include the branch name in the subject, so parse that
        match = pattern.search(self.patches[0].subject)
        if match:
            fullprefix = match.group(1)

        branch, branches, valid_branches = None, [], []

        if fullprefix:
            prefix = fullprefix.strip('[]')
            branches = [ b.strip() for b in prefix.split(',')]
            valid_branches = [b for b in branches if PatchSeries.valid_branch(b)]

        if len(valid_branches):
            branch = valid_branches[0]

        # Get the branch name excluding any brackets. If nothing was
        # found, then assume there was no branch tag in the subject line
        # and that the patch targets master
        if branch is not None:
            return branch.split(']')[0]
        else:
            return "master"

    @staticmethod
    def valid_branch(branch):
        """ Check if branch is valid name """
        lbranch = branch.lower()

        invalid  = lbranch.startswith('patch') or \
                   lbranch.startswith('rfc') or \
                   lbranch.startswith('resend') or \
                   re.search(r'^v\d+', lbranch) or \
                   re.search(r'^\d+/\d+', lbranch)

        return not invalid