diff options
| -rw-r--r-- | bitbake/lib/bb/codeparser.py | 273 | ||||
| -rw-r--r-- | bitbake/lib/bb/data_smart.py | 6 |
2 files changed, 276 insertions, 3 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 0000000000..88a26c82a7 --- /dev/null +++ b/bitbake/lib/bb/codeparser.py | |||
| @@ -0,0 +1,273 @@ | |||
| 1 | from pysh import pyshyacc, pyshlex | ||
| 2 | from itertools import chain | ||
| 3 | from bb import msg, utils | ||
| 4 | import ast | ||
| 5 | import codegen | ||
| 6 | |||
| 7 | def check_indent(codestr): | ||
| 8 | """If the code is indented, add a top level piece of code to 'remove' the indentation""" | ||
| 9 | |||
| 10 | if codestr[0] is " " or codestr[0] is " ": | ||
| 11 | return "if 1:\n" + codestr | ||
| 12 | |||
| 13 | return codestr | ||
| 14 | |||
| 15 | pythonparsecache = {} | ||
| 16 | |||
| 17 | class PythonParser(): | ||
| 18 | class ValueVisitor(): | ||
| 19 | """Visitor to traverse a python abstract syntax tree and obtain | ||
| 20 | the variables referenced via bitbake metadata APIs, and the external | ||
| 21 | functions called. | ||
| 22 | """ | ||
| 23 | |||
| 24 | getvars = ("d.getVar", "bb.data.getVar", "data.getVar") | ||
| 25 | expands = ("d.expand", "bb.data.expand", "data.expand") | ||
| 26 | execs = ("bb.build.exec_func", "bb.build.exec_task") | ||
| 27 | |||
| 28 | @classmethod | ||
| 29 | def _compare_name(cls, strparts, node): | ||
| 30 | """Given a sequence of strings representing a python name, | ||
| 31 | where the last component is the actual Name and the prior | ||
| 32 | elements are Attribute nodes, determine if the supplied node | ||
| 33 | matches. | ||
| 34 | """ | ||
| 35 | |||
| 36 | if not strparts: | ||
| 37 | return True | ||
| 38 | |||
| 39 | current, rest = strparts[0], strparts[1:] | ||
| 40 | if isinstance(node, ast.Attribute): | ||
| 41 | if current == node.attr: | ||
| 42 | return cls._compare_name(rest, node.value) | ||
| 43 | elif isinstance(node, ast.Name): | ||
| 44 | if current == node.id: | ||
| 45 | return True | ||
| 46 | return False | ||
| 47 | |||
| 48 | @classmethod | ||
| 49 | def compare_name(cls, value, node): | ||
| 50 | """Convenience function for the _compare_node method, which | ||
| 51 | can accept a string (which is split by '.' for you), or an | ||
| 52 | iterable of strings, in which case it checks to see if any of | ||
| 53 | them match, similar to isinstance. | ||
| 54 | """ | ||
| 55 | |||
| 56 | if isinstance(value, basestring): | ||
| 57 | return cls._compare_name(tuple(reversed(value.split("."))), | ||
| 58 | node) | ||
| 59 | else: | ||
| 60 | return any(cls.compare_name(item, node) for item in value) | ||
| 61 | |||
| 62 | def __init__(self, value): | ||
| 63 | self.var_references = set() | ||
| 64 | self.var_execs = set() | ||
| 65 | self.direct_func_calls = set() | ||
| 66 | self.var_expands = set() | ||
| 67 | self.value = value | ||
| 68 | |||
| 69 | @classmethod | ||
| 70 | def warn(cls, func, arg): | ||
| 71 | """Warn about calls of bitbake APIs which pass a non-literal | ||
| 72 | argument for the variable name, as we're not able to track such | ||
| 73 | a reference. | ||
| 74 | """ | ||
| 75 | |||
| 76 | try: | ||
| 77 | funcstr = codegen.to_source(func) | ||
| 78 | argstr = codegen.to_source(arg) | ||
| 79 | except TypeError: | ||
| 80 | msg.debug(2, None, "Failed to convert function and argument to source form") | ||
| 81 | else: | ||
| 82 | msg.debug(1, None, "Warning: in call to '%s', argument '%s' is not a literal" % | ||
| 83 | (funcstr, argstr)) | ||
| 84 | |||
| 85 | def visit_Call(self, node): | ||
| 86 | if self.compare_name(self.getvars, node.func): | ||
| 87 | if isinstance(node.args[0], ast.Str): | ||
| 88 | self.var_references.add(node.args[0].s) | ||
| 89 | else: | ||
| 90 | self.warn(node.func, node.args[0]) | ||
| 91 | elif self.compare_name(self.expands, node.func): | ||
| 92 | if isinstance(node.args[0], ast.Str): | ||
| 93 | self.warn(node.func, node.args[0]) | ||
| 94 | self.var_expands.update(node.args[0].s) | ||
| 95 | elif isinstance(node.args[0], ast.Call) and \ | ||
| 96 | self.compare_name(self.getvars, node.args[0].func): | ||
| 97 | pass | ||
| 98 | else: | ||
| 99 | self.warn(node.func, node.args[0]) | ||
| 100 | elif self.compare_name(self.execs, node.func): | ||
| 101 | if isinstance(node.args[0], ast.Str): | ||
| 102 | self.var_execs.add(node.args[0].s) | ||
| 103 | else: | ||
| 104 | self.warn(node.func, node.args[0]) | ||
| 105 | elif isinstance(node.func, ast.Name): | ||
| 106 | self.direct_func_calls.add(node.func.id) | ||
| 107 | elif isinstance(node.func, ast.Attribute): | ||
| 108 | # We must have a qualified name. Therefore we need | ||
| 109 | # to walk the chain of 'Attribute' nodes to determine | ||
| 110 | # the qualification. | ||
| 111 | attr_node = node.func.value | ||
| 112 | identifier = node.func.attr | ||
| 113 | while isinstance(attr_node, ast.Attribute): | ||
| 114 | identifier = attr_node.attr + "." + identifier | ||
| 115 | attr_node = attr_node.value | ||
| 116 | if isinstance(attr_node, ast.Name): | ||
| 117 | identifier = attr_node.id + "." + identifier | ||
| 118 | self.direct_func_calls.add(identifier) | ||
| 119 | |||
| 120 | def __init__(self): | ||
| 121 | #self.funcdefs = set() | ||
| 122 | self.execs = set() | ||
| 123 | #self.external_cmds = set() | ||
| 124 | self.references = set() | ||
| 125 | |||
| 126 | def parse_python(self, node): | ||
| 127 | |||
| 128 | if node in pythonparsecache: | ||
| 129 | self.references = pythonparsecache[node].references | ||
| 130 | self.execs = pythonparsecache[node].execs | ||
| 131 | return | ||
| 132 | |||
| 133 | code = compile(check_indent(str(node)), "<string>", "exec", | ||
| 134 | ast.PyCF_ONLY_AST) | ||
| 135 | |||
| 136 | visitor = self.ValueVisitor(code) | ||
| 137 | for n in ast.walk(code): | ||
| 138 | if n.__class__.__name__ == "Call": | ||
| 139 | visitor.visit_Call(n) | ||
| 140 | |||
| 141 | self.references.update(visitor.var_references) | ||
| 142 | self.references.update(visitor.var_execs) | ||
| 143 | self.execs = visitor.direct_func_calls | ||
| 144 | |||
| 145 | pythonparsecache[node] = self | ||
| 146 | |||
| 147 | |||
| 148 | shellparsecache = {} | ||
| 149 | |||
| 150 | class ShellParser(): | ||
| 151 | def __init__(self): | ||
| 152 | self.funcdefs = set() | ||
| 153 | self.allexecs = set() | ||
| 154 | self.execs = set() | ||
| 155 | |||
| 156 | def parse_shell(self, value): | ||
| 157 | """Parse the supplied shell code in a string, returning the external | ||
| 158 | commands it executes. | ||
| 159 | """ | ||
| 160 | |||
| 161 | if value in pythonparsecache: | ||
| 162 | self.execs = shellparsecache[value].execs | ||
| 163 | return | ||
| 164 | |||
| 165 | try: | ||
| 166 | tokens, _ = pyshyacc.parse(value, eof=True, debug=False) | ||
| 167 | except pyshlex.NeedMore: | ||
| 168 | raise ShellSyntaxError("Unexpected EOF") | ||
| 169 | |||
| 170 | for token in tokens: | ||
| 171 | self.process_tokens(token) | ||
| 172 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) | ||
| 173 | |||
| 174 | shellparsecache[value] = self | ||
| 175 | |||
| 176 | return self.execs | ||
| 177 | |||
| 178 | def process_tokens(self, tokens): | ||
| 179 | """Process a supplied portion of the syntax tree as returned by | ||
| 180 | pyshyacc.parse. | ||
| 181 | """ | ||
| 182 | |||
| 183 | def function_definition(value): | ||
| 184 | self.funcdefs.add(value.name) | ||
| 185 | return [value.body], None | ||
| 186 | |||
| 187 | def case_clause(value): | ||
| 188 | # Element 0 of each item in the case is the list of patterns, and | ||
| 189 | # Element 1 of each item in the case is the list of commands to be | ||
| 190 | # executed when that pattern matches. | ||
| 191 | words = chain(*[item[0] for item in value.items]) | ||
| 192 | cmds = chain(*[item[1] for item in value.items]) | ||
| 193 | return cmds, words | ||
| 194 | |||
| 195 | def if_clause(value): | ||
| 196 | main = chain(value.cond, value.if_cmds) | ||
| 197 | rest = value.else_cmds | ||
| 198 | if isinstance(rest, tuple) and rest[0] == "elif": | ||
| 199 | return chain(main, if_clause(rest[1])) | ||
| 200 | else: | ||
| 201 | return chain(main, rest) | ||
| 202 | |||
| 203 | def simple_command(value): | ||
| 204 | return None, chain(value.words, (assign[1] for assign in value.assigns)) | ||
| 205 | |||
| 206 | token_handlers = { | ||
| 207 | "and_or": lambda x: ((x.left, x.right), None), | ||
| 208 | "async": lambda x: ([x], None), | ||
| 209 | "brace_group": lambda x: (x.cmds, None), | ||
| 210 | "for_clause": lambda x: (x.cmds, x.items), | ||
| 211 | "function_definition": function_definition, | ||
| 212 | "if_clause": lambda x: (if_clause(x), None), | ||
| 213 | "pipeline": lambda x: (x.commands, None), | ||
| 214 | "redirect_list": lambda x: ([x.cmd], None), | ||
| 215 | "subshell": lambda x: (x.cmds, None), | ||
| 216 | "while_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
| 217 | "until_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
| 218 | "simple_command": simple_command, | ||
| 219 | "case_clause": case_clause, | ||
| 220 | } | ||
| 221 | |||
| 222 | for token in tokens: | ||
| 223 | name, value = token | ||
| 224 | try: | ||
| 225 | more_tokens, words = token_handlers[name](value) | ||
| 226 | except KeyError: | ||
| 227 | raise NotImplementedError("Unsupported token type " + name) | ||
| 228 | |||
| 229 | if more_tokens: | ||
| 230 | self.process_tokens(more_tokens) | ||
| 231 | |||
| 232 | if words: | ||
| 233 | self.process_words(words) | ||
| 234 | |||
| 235 | def process_words(self, words): | ||
| 236 | """Process a set of 'words' in pyshyacc parlance, which includes | ||
| 237 | extraction of executed commands from $() blocks, as well as grabbing | ||
| 238 | the command name argument. | ||
| 239 | """ | ||
| 240 | |||
| 241 | words = list(words) | ||
| 242 | for word in list(words): | ||
| 243 | wtree = pyshlex.make_wordtree(word[1]) | ||
| 244 | for part in wtree: | ||
| 245 | if not isinstance(part, list): | ||
| 246 | continue | ||
| 247 | |||
| 248 | if part[0] in ('`', '$('): | ||
| 249 | command = pyshlex.wordtree_as_string(part[1:-1]) | ||
| 250 | self.parse_shell(command) | ||
| 251 | |||
| 252 | if word[0] in ("cmd_name", "cmd_word"): | ||
| 253 | if word in words: | ||
| 254 | words.remove(word) | ||
| 255 | |||
| 256 | usetoken = False | ||
| 257 | for word in words: | ||
| 258 | if word[0] in ("cmd_name", "cmd_word") or \ | ||
| 259 | (usetoken and word[0] == "TOKEN"): | ||
| 260 | if "=" in word[1]: | ||
| 261 | usetoken = True | ||
| 262 | continue | ||
| 263 | |||
| 264 | cmd = word[1] | ||
| 265 | if cmd.startswith("$"): | ||
| 266 | msg.debug(1, None, "Warning: execution of non-literal command '%s'" % cmd) | ||
| 267 | elif cmd == "eval": | ||
| 268 | command = " ".join(word for _, word in words[1:]) | ||
| 269 | self.parse_shell(command) | ||
| 270 | else: | ||
| 271 | self.allexecs.add(cmd) | ||
| 272 | break | ||
| 273 | |||
diff --git a/bitbake/lib/bb/data_smart.py b/bitbake/lib/bb/data_smart.py index 1ed04d50c3..b9d9476fd8 100644 --- a/bitbake/lib/bb/data_smart.py +++ b/bitbake/lib/bb/data_smart.py | |||
| @@ -46,7 +46,7 @@ class VariableParse: | |||
| 46 | self.value = val | 46 | self.value = val |
| 47 | 47 | ||
| 48 | self.references = set() | 48 | self.references = set() |
| 49 | self.funcrefs = set() | 49 | self.execs = set() |
| 50 | 50 | ||
| 51 | def var_sub(self, match): | 51 | def var_sub(self, match): |
| 52 | key = match.group()[2:-1] | 52 | key = match.group()[2:-1] |
| @@ -64,10 +64,10 @@ class VariableParse: | |||
| 64 | code = match.group()[3:-1] | 64 | code = match.group()[3:-1] |
| 65 | codeobj = compile(code.strip(), self.varname or "<expansion>", "eval") | 65 | codeobj = compile(code.strip(), self.varname or "<expansion>", "eval") |
| 66 | 66 | ||
| 67 | parser = bb.rptest.PythonParser() | 67 | parser = bb.codeparser.PythonParser() |
| 68 | parser.parse_python(code) | 68 | parser.parse_python(code) |
| 69 | self.references |= parser.references | 69 | self.references |= parser.references |
| 70 | self.funcrefs |= parser.execs | 70 | self.execs |= parser.execs |
| 71 | 71 | ||
| 72 | value = utils.better_eval(codeobj, {"d": self.d}) | 72 | value = utils.better_eval(codeobj, {"d": self.d}) |
| 73 | return str(value) | 73 | return str(value) |
