diff options
Diffstat (limited to 'import_zip.py')
-rw-r--r-- | import_zip.py | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/import_zip.py b/import_zip.py new file mode 100644 index 00000000..08aff326 --- /dev/null +++ b/import_zip.py | |||
@@ -0,0 +1,345 @@ | |||
1 | # | ||
2 | # Copyright (C) 2008 The Android Open Source Project | ||
3 | # | ||
4 | # Licensed under the Apache License, Version 2.0 (the "License"); | ||
5 | # you may not use this file except in compliance with the License. | ||
6 | # You may obtain a copy of the License at | ||
7 | # | ||
8 | # http://www.apache.org/licenses/LICENSE-2.0 | ||
9 | # | ||
10 | # Unless required by applicable law or agreed to in writing, software | ||
11 | # distributed under the License is distributed on an "AS IS" BASIS, | ||
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
13 | # See the License for the specific language governing permissions and | ||
14 | # limitations under the License. | ||
15 | |||
16 | import stat | ||
17 | import struct | ||
18 | import zlib | ||
19 | import cStringIO | ||
20 | |||
21 | from import_ext import ImportExternal | ||
22 | from error import ImportError | ||
23 | |||
24 | class ImportZip(ImportExternal): | ||
25 | """Streams a zip file from the network directly into a Project's | ||
26 | Git repository. | ||
27 | """ | ||
28 | @classmethod | ||
29 | def CanAccept(cls, url): | ||
30 | """Can this importer read and unpack the data stored at url? | ||
31 | """ | ||
32 | if url.endswith('.zip') or url.endswith('.jar'): | ||
33 | return True | ||
34 | return False | ||
35 | |||
36 | def _UnpackFiles(self): | ||
37 | url_fd, url = self._OpenUrl() | ||
38 | try: | ||
39 | if not self.__class__.CanAccept(url): | ||
40 | raise ImportError('non-zip file extension: %s' % url) | ||
41 | |||
42 | zip = _ZipFile(url_fd) | ||
43 | for entry in zip.FileRecords(): | ||
44 | data = zip.Open(entry).read() | ||
45 | sz = len(data) | ||
46 | |||
47 | if data and _SafeCRLF(data): | ||
48 | data = data.replace('\r\n', '\n') | ||
49 | sz = len(data) | ||
50 | |||
51 | fd = cStringIO.StringIO(data) | ||
52 | self._UnpackOneFile(entry.mode, sz, entry.name, fd) | ||
53 | zip.Close(entry) | ||
54 | |||
55 | for entry in zip.CentralDirectory(): | ||
56 | self._SetFileMode(entry.name, entry.mode) | ||
57 | |||
58 | zip.CheckTail() | ||
59 | finally: | ||
60 | url_fd.close() | ||
61 | |||
62 | |||
63 | def _SafeCRLF(data): | ||
64 | """Is it reasonably safe to perform a CRLF->LF conversion? | ||
65 | |||
66 | If the stream contains a NUL byte it is likely binary, | ||
67 | and thus a CRLF->LF conversion may damage the stream. | ||
68 | |||
69 | If the only NUL is in the last position of the stream, | ||
70 | but it otherwise can do a CRLF<->LF conversion we do | ||
71 | the CRLF conversion anyway. At least one source ZIP | ||
72 | file has this structure in its source code. | ||
73 | |||
74 | If every occurrance of a CR and LF is paired up as a | ||
75 | CRLF pair then the conversion is safely bi-directional. | ||
76 | s/\r\n/\n/g == s/\n/\r\\n/g can convert between them. | ||
77 | """ | ||
78 | nul = data.find('\0') | ||
79 | if 0 <= nul and nul < (len(data) - 1): | ||
80 | return False | ||
81 | |||
82 | n_lf = 0 | ||
83 | last = 0 | ||
84 | while True: | ||
85 | lf = data.find('\n', last) | ||
86 | if lf < 0: | ||
87 | break | ||
88 | if lf == 0 or data[lf - 1] != '\r': | ||
89 | return False | ||
90 | last = lf + 1 | ||
91 | n_lf += 1 | ||
92 | return n_lf > 0 | ||
93 | |||
94 | class _ZipFile(object): | ||
95 | """Streaming iterator to parse a zip file on the fly. | ||
96 | """ | ||
97 | def __init__(self, fd): | ||
98 | self._fd = _UngetStream(fd) | ||
99 | |||
100 | def FileRecords(self): | ||
101 | return _FileIter(self._fd) | ||
102 | |||
103 | def CentralDirectory(self): | ||
104 | return _CentIter(self._fd) | ||
105 | |||
106 | def CheckTail(self): | ||
107 | type_buf = self._fd.read(4) | ||
108 | type = struct.unpack('<I', type_buf)[0] | ||
109 | if type != 0x06054b50: # end of central directory | ||
110 | raise ImportError('zip record %x unsupported' % type) | ||
111 | |||
112 | def Open(self, entry): | ||
113 | if entry.is_compressed: | ||
114 | return _InflateStream(self._fd) | ||
115 | else: | ||
116 | if entry.has_trailer: | ||
117 | raise ImportError('unable to extract streamed zip') | ||
118 | return _FixedLengthStream(self._fd, entry.uncompressed_size) | ||
119 | |||
120 | def Close(self, entry): | ||
121 | if entry.has_trailer: | ||
122 | type = struct.unpack('<I', self._fd.read(4))[0] | ||
123 | if type == 0x08074b50: | ||
124 | # Not a formal type marker, but commonly seen in zips | ||
125 | # as the data descriptor signature. | ||
126 | # | ||
127 | struct.unpack('<3I', self._fd.read(12)) | ||
128 | else: | ||
129 | # No signature for the data descriptor, so read the | ||
130 | # remaining fields out of the stream | ||
131 | # | ||
132 | self._fd.read(8) | ||
133 | |||
134 | |||
135 | class _FileIter(object): | ||
136 | def __init__(self, fd): | ||
137 | self._fd = fd | ||
138 | |||
139 | def __iter__(self): | ||
140 | return self | ||
141 | |||
142 | def next(self): | ||
143 | fd = self._fd | ||
144 | |||
145 | type_buf = fd.read(4) | ||
146 | type = struct.unpack('<I', type_buf)[0] | ||
147 | |||
148 | if type != 0x04034b50: # local file header | ||
149 | fd.unread(type_buf) | ||
150 | raise StopIteration() | ||
151 | |||
152 | rec = _FileHeader(fd.read(26)) | ||
153 | rec.name = fd.read(rec.name_len) | ||
154 | fd.read(rec.extra_len) | ||
155 | |||
156 | if rec.name.endswith('/'): | ||
157 | rec.name = rec.name[:-1] | ||
158 | rec.mode = stat.S_IFDIR | 0777 | ||
159 | return rec | ||
160 | |||
161 | |||
162 | class _FileHeader(object): | ||
163 | """Information about a single file in the archive. | ||
164 | 0 version needed to extract 2 bytes | ||
165 | 1 general purpose bit flag 2 bytes | ||
166 | 2 compression method 2 bytes | ||
167 | 3 last mod file time 2 bytes | ||
168 | 4 last mod file date 2 bytes | ||
169 | 5 crc-32 4 bytes | ||
170 | 6 compressed size 4 bytes | ||
171 | 7 uncompressed size 4 bytes | ||
172 | 8 file name length 2 bytes | ||
173 | 9 extra field length 2 bytes | ||
174 | """ | ||
175 | def __init__(self, raw_bin): | ||
176 | rec = struct.unpack('<5H3I2H', raw_bin) | ||
177 | |||
178 | if rec[2] == 8: | ||
179 | self.is_compressed = True | ||
180 | elif rec[2] == 0: | ||
181 | self.is_compressed = False | ||
182 | else: | ||
183 | raise ImportError('unrecognized compression format') | ||
184 | |||
185 | if rec[1] & (1 << 3): | ||
186 | self.has_trailer = True | ||
187 | else: | ||
188 | self.has_trailer = False | ||
189 | |||
190 | self.compressed_size = rec[6] | ||
191 | self.uncompressed_size = rec[7] | ||
192 | self.name_len = rec[8] | ||
193 | self.extra_len = rec[9] | ||
194 | self.mode = stat.S_IFREG | 0644 | ||
195 | |||
196 | |||
197 | class _CentIter(object): | ||
198 | def __init__(self, fd): | ||
199 | self._fd = fd | ||
200 | |||
201 | def __iter__(self): | ||
202 | return self | ||
203 | |||
204 | def next(self): | ||
205 | fd = self._fd | ||
206 | |||
207 | type_buf = fd.read(4) | ||
208 | type = struct.unpack('<I', type_buf)[0] | ||
209 | |||
210 | if type != 0x02014b50: # central directory | ||
211 | fd.unread(type_buf) | ||
212 | raise StopIteration() | ||
213 | |||
214 | rec = _CentHeader(fd.read(42)) | ||
215 | rec.name = fd.read(rec.name_len) | ||
216 | fd.read(rec.extra_len) | ||
217 | fd.read(rec.comment_len) | ||
218 | |||
219 | if rec.name.endswith('/'): | ||
220 | rec.name = rec.name[:-1] | ||
221 | rec.mode = stat.S_IFDIR | 0777 | ||
222 | return rec | ||
223 | |||
224 | |||
225 | class _CentHeader(object): | ||
226 | """Information about a single file in the archive. | ||
227 | 0 version made by 2 bytes | ||
228 | 1 version needed to extract 2 bytes | ||
229 | 2 general purpose bit flag 2 bytes | ||
230 | 3 compression method 2 bytes | ||
231 | 4 last mod file time 2 bytes | ||
232 | 5 last mod file date 2 bytes | ||
233 | 6 crc-32 4 bytes | ||
234 | 7 compressed size 4 bytes | ||
235 | 8 uncompressed size 4 bytes | ||
236 | 9 file name length 2 bytes | ||
237 | 10 extra field length 2 bytes | ||
238 | 11 file comment length 2 bytes | ||
239 | 12 disk number start 2 bytes | ||
240 | 13 internal file attributes 2 bytes | ||
241 | 14 external file attributes 4 bytes | ||
242 | 15 relative offset of local header 4 bytes | ||
243 | """ | ||
244 | def __init__(self, raw_bin): | ||
245 | rec = struct.unpack('<6H3I5H2I', raw_bin) | ||
246 | self.name_len = rec[9] | ||
247 | self.extra_len = rec[10] | ||
248 | self.comment_len = rec[11] | ||
249 | |||
250 | if (rec[0] & 0xff00) == 0x0300: # UNIX | ||
251 | self.mode = rec[14] >> 16 | ||
252 | else: | ||
253 | self.mode = stat.S_IFREG | 0644 | ||
254 | |||
255 | |||
256 | class _UngetStream(object): | ||
257 | """File like object to read and rewind a stream. | ||
258 | """ | ||
259 | def __init__(self, fd): | ||
260 | self._fd = fd | ||
261 | self._buf = None | ||
262 | |||
263 | def read(self, size = -1): | ||
264 | r = [] | ||
265 | try: | ||
266 | if size >= 0: | ||
267 | self._ReadChunk(r, size) | ||
268 | else: | ||
269 | while True: | ||
270 | self._ReadChunk(r, 2048) | ||
271 | except EOFError: | ||
272 | pass | ||
273 | |||
274 | if len(r) == 1: | ||
275 | return r[0] | ||
276 | return ''.join(r) | ||
277 | |||
278 | def unread(self, buf): | ||
279 | b = self._buf | ||
280 | if b is None or len(b) == 0: | ||
281 | self._buf = buf | ||
282 | else: | ||
283 | self._buf = buf + b | ||
284 | |||
285 | def _ReadChunk(self, r, size): | ||
286 | b = self._buf | ||
287 | try: | ||
288 | while size > 0: | ||
289 | if b is None or len(b) == 0: | ||
290 | b = self._Inflate(self._fd.read(2048)) | ||
291 | if not b: | ||
292 | raise EOFError() | ||
293 | continue | ||
294 | |||
295 | use = min(size, len(b)) | ||
296 | r.append(b[:use]) | ||
297 | b = b[use:] | ||
298 | size -= use | ||
299 | finally: | ||
300 | self._buf = b | ||
301 | |||
302 | def _Inflate(self, b): | ||
303 | return b | ||
304 | |||
305 | |||
306 | class _FixedLengthStream(_UngetStream): | ||
307 | """File like object to read a fixed length stream. | ||
308 | """ | ||
309 | def __init__(self, fd, have): | ||
310 | _UngetStream.__init__(self, fd) | ||
311 | self._have = have | ||
312 | |||
313 | def _Inflate(self, b): | ||
314 | n = self._have | ||
315 | if n == 0: | ||
316 | self._fd.unread(b) | ||
317 | return None | ||
318 | |||
319 | if len(b) > n: | ||
320 | self._fd.unread(b[n:]) | ||
321 | b = b[:n] | ||
322 | self._have -= len(b) | ||
323 | return b | ||
324 | |||
325 | |||
326 | class _InflateStream(_UngetStream): | ||
327 | """Inflates the stream as it reads input. | ||
328 | """ | ||
329 | def __init__(self, fd): | ||
330 | _UngetStream.__init__(self, fd) | ||
331 | self._z = zlib.decompressobj(-zlib.MAX_WBITS) | ||
332 | |||
333 | def _Inflate(self, b): | ||
334 | z = self._z | ||
335 | if not z: | ||
336 | self._fd.unread(b) | ||
337 | return None | ||
338 | |||
339 | b = z.decompress(b) | ||
340 | if z.unconsumed_tail != '': | ||
341 | self._fd.unread(z.unconsumed_tail) | ||
342 | elif z.unused_data != '': | ||
343 | self._fd.unread(z.unused_data) | ||
344 | self._z = None | ||
345 | return b | ||