diff options
| author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2012-03-11 14:30:31 +0000 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2012-03-12 15:52:33 +0000 |
| commit | c27b7aab3c37f182bf9acc5d459185f32fb195d0 (patch) | |
| tree | be1e1144c068a9d109eedbc8f113187c1d14438a /bitbake/lib/bb/codeparser.py | |
| parent | 7bf0a790b23833a1b426d2349885459112fb5d7c (diff) | |
| download | poky-c27b7aab3c37f182bf9acc5d459185f32fb195d0.tar.gz | |
codeparser: Call intern over the set contents for better cache performance
See the comment in the code in the commit for more information.
(Bitbake rev: 2d56dc7b1f0d186e14c4c8a949b280b6b3fc31de)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
| -rw-r--r-- | bitbake/lib/bb/codeparser.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py index 04a34f944a..af2e19411c 100644 --- a/bitbake/lib/bb/codeparser.py +++ b/bitbake/lib/bb/codeparser.py | |||
| @@ -98,6 +98,12 @@ def parser_cache_save(d): | |||
| 98 | bb.utils.unlockfile(lf) | 98 | bb.utils.unlockfile(lf) |
| 99 | bb.utils.unlockfile(glf) | 99 | bb.utils.unlockfile(glf) |
| 100 | 100 | ||
| 101 | def internSet(items): | ||
| 102 | new = set() | ||
| 103 | for i in items: | ||
| 104 | new.add(intern(i)) | ||
| 105 | return new | ||
| 106 | |||
| 101 | def parser_cache_savemerge(d): | 107 | def parser_cache_savemerge(d): |
| 102 | cachefile = parser_cachefile(d) | 108 | cachefile = parser_cachefile(d) |
| 103 | if not cachefile: | 109 | if not cachefile: |
| @@ -133,6 +139,21 @@ def parser_cache_savemerge(d): | |||
| 133 | data[1][h] = extradata[1][h] | 139 | data[1][h] = extradata[1][h] |
| 134 | os.unlink(f) | 140 | os.unlink(f) |
| 135 | 141 | ||
| 142 | # When the dicts are originally created, python calls intern() on the set keys | ||
| 143 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
| 144 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
| 145 | # in memory. This also means pickle will save the same string multiple times in | ||
| 146 | # the cache file. By interning the data here, the cache file shrinks dramatically | ||
| 147 | # meaning faster load times and the reloaded cache files also consume much less | ||
| 148 | # memory. This is worth any performance hit from this loops and the use of the | ||
| 149 | # intern() data storage. | ||
| 150 | # Python 3.x may behave better in this area | ||
| 151 | for h in data[0]: | ||
| 152 | data[0][h]["refs"] = internSet(data[0][h]["refs"]) | ||
| 153 | data[0][h]["execs"] = internSet(data[0][h]["execs"]) | ||
| 154 | for h in data[1]: | ||
| 155 | data[1][h]["execs"] = internSet(data[1][h]["execs"]) | ||
| 156 | |||
| 136 | p = pickle.Pickler(file(cachefile, "wb"), -1) | 157 | p = pickle.Pickler(file(cachefile, "wb"), -1) |
| 137 | p.dump([data, PARSERCACHE_VERSION]) | 158 | p.dump([data, PARSERCACHE_VERSION]) |
| 138 | 159 | ||
