1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
#! /usr/bin/env python3
# series.py
#
# Read a series' mbox file and get information about the patches
# contained
#
# Copyright (C) 2024 BayLibre SAS
#
# SPDX-License-Identifier: GPL-2.0-only
#
import email
import re
# From: https://stackoverflow.com/questions/59681461/read-a-big-mbox-file-with-python
class MboxReader:
def __init__(self, filepath):
self.handle = open(filepath, 'rb')
assert self.handle.readline().startswith(b'From ')
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self.handle.close()
def __iter__(self):
return iter(self.__next__())
def __next__(self):
lines = []
while True:
line = self.handle.readline()
if line == b'' or line.startswith(b'From '):
yield email.message_from_bytes(b''.join(lines))
if line == b'':
break
lines = []
continue
lines.append(line)
class Patch:
def __init__(self, data):
self.author = data['From']
self.to = data['To']
self.cc = data['Cc']
self.subject = data['Subject']
self.split_body = re.split('---', data.get_payload(), maxsplit=1)
self.commit_message = self.split_body[0]
self.diff = self.split_body[1]
class PatchSeries:
def __init__(self, filepath):
with MboxReader(filepath) as mbox:
self.patches = [Patch(message) for message in mbox]
assert self.patches
self.patch_count = len(self.patches)
self.path = filepath
@property
def path(self):
return self.path
self.branch = self.get_branch()
def get_branch(self):
fullprefix = ""
pattern = re.compile(r"(\[.*\])", re.DOTALL)
# There should be at least one patch in the series and it should
# include the branch name in the subject, so parse that
match = pattern.search(self.patches[0].subject)
if match:
fullprefix = match.group(1)
branch, branches, valid_branches = None, [], []
if fullprefix:
prefix = fullprefix.strip('[]')
branches = [ b.strip() for b in prefix.split(',')]
valid_branches = [b for b in branches if PatchSeries.valid_branch(b)]
if len(valid_branches):
branch = valid_branches[0]
# Get the branch name excluding any brackets. If nothing was
# found, then assume there was no branch tag in the subject line
# and that the patch targets master
if branch is not None:
return branch.split(']')[0]
else:
return "master"
@staticmethod
def valid_branch(branch):
""" Check if branch is valid name """
lbranch = branch.lower()
invalid = lbranch.startswith('patch') or \
lbranch.startswith('rfc') or \
lbranch.startswith('resend') or \
re.search(r'^v\d+', lbranch) or \
re.search(r'^\d+/\d+', lbranch)
return not invalid
|