scripts/lib/resulttool/resultutils.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274

# resulttool - common library/utility functions
#
# Copyright (c) 2019, Intel Corporation.
# Copyright (c) 2019, Linux Foundation
#
# SPDX-License-Identifier: GPL-2.0-only
#

import os
import base64
import zlib
import json
import scriptpath
import copy
import urllib.request
import posixpath
import logging
scriptpath.add_oe_lib_path()

logger = logging.getLogger('resulttool')

flatten_map = {
    "oeselftest": [],
    "runtime": [],
    "sdk": [],
    "sdkext": [],
    "manual": []
}
regression_map = {
    "oeselftest": ['TEST_TYPE', 'MACHINE'],
    "runtime": ['TESTSERIES', 'TEST_TYPE', 'IMAGE_BASENAME', 'MACHINE', 'IMAGE_PKGTYPE', 'DISTRO'],
    "sdk": ['TESTSERIES', 'TEST_TYPE', 'IMAGE_BASENAME', 'MACHINE', 'SDKMACHINE'],
    "sdkext": ['TESTSERIES', 'TEST_TYPE', 'IMAGE_BASENAME', 'MACHINE', 'SDKMACHINE'],
    "manual": ['TEST_TYPE', 'TEST_MODULE', 'IMAGE_BASENAME', 'MACHINE']
}
store_map = {
    "oeselftest": ['TEST_TYPE', 'TESTSERIES', 'MACHINE'],
    "runtime": ['TEST_TYPE', 'DISTRO', 'MACHINE', 'IMAGE_BASENAME'],
    "sdk": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'],
    "sdkext": ['TEST_TYPE', 'MACHINE', 'SDKMACHINE', 'IMAGE_BASENAME'],
    "manual": ['TEST_TYPE', 'TEST_MODULE', 'MACHINE', 'IMAGE_BASENAME']
}

rawlog_sections = {
    "ptestresult.rawlogs": "ptest",
    "ltpresult.rawlogs": "ltp",
    "ltpposixresult.rawlogs": "ltpposix"
}

def is_url(p):
    """
    Helper for determining if the given path is a URL
    """
    return p.startswith('http://') or p.startswith('https://')

extra_configvars = {'TESTSERIES': ''}

#
# Load the json file and append the results data into the provided results dict
#
def append_resultsdata(results, f, configmap=store_map, configvars=extra_configvars):
    if type(f) is str:
        if is_url(f):
            with urllib.request.urlopen(f) as response:
                data = json.loads(response.read().decode('utf-8'))
            url = urllib.parse.urlparse(f)
            testseries = posixpath.basename(posixpath.dirname(url.path))
        else:
            with open(f, "r") as filedata:
                try:
                    data = json.load(filedata)
                except json.decoder.JSONDecodeError:
                    print("Cannot decode {}. Possible corruption. Skipping.".format(f))
                    data = ""
            testseries = os.path.basename(os.path.dirname(f))
    else:
        data = f
    for res in data:
        if "configuration" not in data[res] or "result" not in data[res]:
            raise ValueError("Test results data without configuration or result section?")
        for config in configvars:
            if config == "TESTSERIES" and "TESTSERIES" not in data[res]["configuration"]:
                data[res]["configuration"]["TESTSERIES"] = testseries
                continue
            if config not in data[res]["configuration"]:
                data[res]["configuration"][config] = configvars[config]
        testtype = data[res]["configuration"].get("TEST_TYPE")
        if testtype not in configmap:
            raise ValueError("Unknown test type %s" % testtype)
        testpath = "/".join(data[res]["configuration"].get(i) for i in configmap[testtype])
        if testpath not in results:
            results[testpath] = {}
        results[testpath][res] = data[res]

#
# Walk a directory and find/load results data
# or load directly from a file
#
def load_resultsdata(source, configmap=store_map, configvars=extra_configvars):
    results = {}
    if is_url(source) or os.path.isfile(source):
        append_resultsdata(results, source, configmap, configvars)
        return results
    for root, dirs, files in os.walk(source):
        for name in files:
            f = os.path.join(root, name)
            if name == "testresults.json":
                append_resultsdata(results, f, configmap, configvars)
    return results

def filter_resultsdata(results, resultid):
    newresults = {}
    for r in results:
        for i in results[r]:
            if i == resultsid:
                 newresults[r] = {}
                 newresults[r][i] = results[r][i]
    return newresults

def strip_logs(results):
    newresults = copy.deepcopy(results)
    for res in newresults:
        if 'result' not in newresults[res]:
            continue
        for logtype in rawlog_sections:
            if logtype in newresults[res]['result']:
                del newresults[res]['result'][logtype]
        if 'ptestresult.sections' in newresults[res]['result']:
            for i in newresults[res]['result']['ptestresult.sections']:
                if 'log' in newresults[res]['result']['ptestresult.sections'][i]:
                    del newresults[res]['result']['ptestresult.sections'][i]['log']
    return newresults

# For timing numbers, crazy amounts of precision don't make sense and just confuse
# the logs. For numbers over 1, trim to 3 decimal places, for numbers less than 1,
# trim to 4 significant digits
def trim_durations(results):
    for res in results:
        if 'result' not in results[res]:
            continue
        for entry in results[res]['result']:
            if 'duration' in results[res]['result'][entry]:
                duration = results[res]['result'][entry]['duration']
                if duration > 1:
                    results[res]['result'][entry]['duration'] = float("%.3f" % duration)
                elif duration < 1:
                    results[res]['result'][entry]['duration'] = float("%.4g" % duration)
    return results

def handle_cleanups(results):
    # Remove pointless path duplication from old format reproducibility results
    for res2 in results:
        try:
            section = results[res2]['result']['reproducible']['files']
            for pkgtype in section:
                for filelist in section[pkgtype].copy():
                    if section[pkgtype][filelist] and type(section[pkgtype][filelist][0]) == dict:
                        newlist = []
                        for entry in section[pkgtype][filelist]:
                            newlist.append(entry["reference"].split("/./")[1])
                        section[pkgtype][filelist] = newlist

        except KeyError:
            pass
    # Remove pointless duplicate rawlogs data
    try:
        del results[res2]['result']['reproducible.rawlogs']
    except KeyError:
        pass

def decode_log(logdata):
    if isinstance(logdata, str):
        return logdata
    elif isinstance(logdata, dict):
        if "compressed" in logdata:
            data = logdata.get("compressed")
            data = base64.b64decode(data.encode("utf-8"))
            data = zlib.decompress(data)
            return data.decode("utf-8", errors='ignore')
    return None

def generic_get_log(sectionname, results, section):
    if sectionname not in results:
        return None
    if section not in results[sectionname]:
        return None

    ptest = results[sectionname][section]
    if 'log' not in ptest:
        return None
    return decode_log(ptest['log'])

def ptestresult_get_log(results, section):
    return generic_get_log('ptestresult.sections', results, section)

def generic_get_rawlogs(sectname, results):
    if sectname not in results:
        return None
    if 'log' not in results[sectname]:
        return None
    return decode_log(results[sectname]['log'])

def save_resultsdata(results, destdir, fn="testresults.json", ptestjson=False, ptestlogs=False):
    for res in results:
        if res:
            dst = destdir + "/" + res + "/" + fn
        else:
            dst = destdir + "/" + fn
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        resultsout = results[res]
        if not ptestjson:
            resultsout = strip_logs(results[res])
        trim_durations(resultsout)
        handle_cleanups(resultsout)
        with open(dst, 'w') as f:
            f.write(json.dumps(resultsout, sort_keys=True, indent=1))
        for res2 in results[res]:
            if ptestlogs and 'result' in results[res][res2]:
                seriesresults = results[res][res2]['result']
                for logtype in rawlog_sections:
                    logdata = generic_get_rawlogs(logtype, seriesresults)
                    if logdata is not None:
                        logger.info("Extracting " + rawlog_sections[logtype] + "-raw.log")
                        with open(dst.replace(fn, rawlog_sections[logtype] + "-raw.log"), "w+") as f:
                            f.write(logdata)
                if 'ptestresult.sections' in seriesresults:
                    for i in seriesresults['ptestresult.sections']:
                        sectionlog = ptestresult_get_log(seriesresults, i)
                        if sectionlog is not None:
                            with open(dst.replace(fn, "ptest-%s.log" % i), "w+") as f:
                                f.write(sectionlog)

def git_get_result(repo, tags, configmap=store_map):
    git_objs = []
    for tag in tags:
        files = repo.run_cmd(['ls-tree', "--name-only", "-r", tag]).splitlines()
        git_objs.extend([tag + ':' + f for f in files if f.endswith("testresults.json")])

    def parse_json_stream(data):
        """Parse multiple concatenated JSON objects"""
        objs = []
        json_d = ""
        for line in data.splitlines():
            if line == '}{':
                json_d += '}'
                objs.append(json.loads(json_d))
                json_d = '{'
            else:
                json_d += line
        objs.append(json.loads(json_d))
        return objs

    # Optimize by reading all data with one git command
    results = {}
    for obj in parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--'])):
        append_resultsdata(results, obj, configmap=configmap)

    return results

def test_run_results(results):
    """
    Convenient generator function that iterates over all test runs that have a
    result section.

    Generates a tuple of:
        (result json file path, test run name, test run (dict), test run "results" (dict))
    for each test run that has a "result" section
    """
    for path in results:
        for run_name, test_run in results[path].items():
            if not 'result' in test_run:
                continue
            yield path, run_name, test_run, test_run['result']