1 files changed, 345 insertions, 0 deletions
diff --git a/import_zip.py b/import_zip.py
new file mode 100644
index 00000000..08aff326
--- /dev/null
+++ b/import_zip.py
@@ -0,0 +1,345 @@
+#
+# Copyright (C) 2008 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import stat
+import struct
+import zlib
+import cStringIO
+from import_ext import ImportExternal
+from error import ImportError
+class ImportZip(ImportExternal):
+  """Streams a zip file from the network directly into a Project's
+     Git repository.
+  """
+  @classmethod
+  def CanAccept(cls, url):
+    """Can this importer read and unpack the data stored at url?
+    """
+    if url.endswith('.zip') or url.endswith('.jar'):
+      return True
+    return False
+  def _UnpackFiles(self):
+    url_fd, url = self._OpenUrl()
+    try:
+      if not self.__class__.CanAccept(url):
+        raise ImportError('non-zip file extension: %s' % url)
+      zip = _ZipFile(url_fd)
+      for entry in zip.FileRecords():
+        data = zip.Open(entry).read()
+        sz = len(data)
+        if data and _SafeCRLF(data):
+          data = data.replace('\r\n', '\n')
+          sz = len(data)
+        fd = cStringIO.StringIO(data)
+        self._UnpackOneFile(entry.mode, sz, entry.name, fd)
+        zip.Close(entry)
+      for entry in zip.CentralDirectory():
+        self._SetFileMode(entry.name, entry.mode)
+      zip.CheckTail()
+    finally:
+      url_fd.close()
+def _SafeCRLF(data):
+  """Is it reasonably safe to perform a CRLF->LF conversion?
+     If the stream contains a NUL byte it is likely binary,
+     and thus a CRLF->LF conversion may damage the stream.
+     If the only NUL is in the last position of the stream,
+     but it otherwise can do a CRLF<->LF conversion we do
+     the CRLF conversion anyway.  At least one source ZIP
+     file has this structure in its source code.
+     If every occurrance of a CR and LF is paired up as a
+     CRLF pair then the conversion is safely bi-directional.
+     s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
+  """
+  nul = data.find('\0')
+  if 0 <= nul and nul < (len(data) - 1):
+    return False
+  n_lf = 0
+  last = 0
+  while True:
+    lf = data.find('\n', last)
+    if lf < 0:
+      break
+    if lf == 0 or data[lf - 1] != '\r':
+      return False
+    last = lf + 1
+    n_lf += 1
+  return n_lf > 0
+class _ZipFile(object):
+  """Streaming iterator to parse a zip file on the fly.
+  """
+  def __init__(self, fd):
+    self._fd = _UngetStream(fd)
+  def FileRecords(self):
+    return _FileIter(self._fd)
+  def CentralDirectory(self):
+    return _CentIter(self._fd)
+  def CheckTail(self):
+    type_buf = self._fd.read(4)
+    type = struct.unpack('<I', type_buf)[0]
+    if type != 0x06054b50:  # end of central directory
+      raise ImportError('zip record %x unsupported' % type)
+  def Open(self, entry):
+    if entry.is_compressed:
+      return _InflateStream(self._fd)
+    else:
+      if entry.has_trailer:
+        raise ImportError('unable to extract streamed zip')
+      return _FixedLengthStream(self._fd, entry.uncompressed_size)
+  def Close(self, entry):
+    if entry.has_trailer:
+      type = struct.unpack('<I', self._fd.read(4))[0]
+      if type == 0x08074b50:
+        # Not a formal type marker, but commonly seen in zips
+        # as the data descriptor signature.
+        #
+        struct.unpack('<3I', self._fd.read(12))
+      else:
+        # No signature for the data descriptor, so read the
+        # remaining fields out of the stream
+        #
+        self._fd.read(8)
+class _FileIter(object):
+  def __init__(self, fd):
+    self._fd = fd
+  def __iter__(self):
+    return self
+  def next(self):
+    fd = self._fd
+    type_buf = fd.read(4)
+    type = struct.unpack('<I', type_buf)[0]
+    if type != 0x04034b50:    # local file header
+      fd.unread(type_buf)
+      raise StopIteration()
+    rec = _FileHeader(fd.read(26))
+    rec.name = fd.read(rec.name_len)
+    fd.read(rec.extra_len)
+    if rec.name.endswith('/'):
+      rec.name = rec.name[:-1]
+      rec.mode = stat.S_IFDIR | 0777
+    return rec
+class _FileHeader(object):
+  """Information about a single file in the archive.
+     0  version needed to extract       2 bytes
+     1  general purpose bit flag        2 bytes
+     2  compression method              2 bytes
+     3  last mod file time              2 bytes
+     4  last mod file date              2 bytes
+     5  crc-32                          4 bytes
+     6  compressed size                 4 bytes
+     7  uncompressed size               4 bytes
+     8  file name length                2 bytes
+     9  extra field length              2 bytes
+  """
+  def __init__(self, raw_bin):
+    rec = struct.unpack('<5H3I2H', raw_bin)
+    
+    if rec[2] == 8:
+      self.is_compressed = True
+    elif rec[2] == 0:
+      self.is_compressed = False
+    else:
+      raise ImportError('unrecognized compression format')
+    if rec[1] & (1 << 3):
+      self.has_trailer = True
+    else:
+      self.has_trailer = False
+    self.compressed_size  = rec[6]
+    self.uncompressed_size = rec[7]
+    self.name_len = rec[8]
+    self.extra_len = rec[9]
+    self.mode = stat.S_IFREG | 0644
+class _CentIter(object):
+  def __init__(self, fd):
+    self._fd = fd
+  def __iter__(self):
+    return self
+  def next(self):
+    fd = self._fd
+    type_buf = fd.read(4)
+    type = struct.unpack('<I', type_buf)[0]
+    if type != 0x02014b50:  # central directory
+      fd.unread(type_buf)
+      raise StopIteration()
+    rec = _CentHeader(fd.read(42))
+    rec.name = fd.read(rec.name_len)
+    fd.read(rec.extra_len)
+    fd.read(rec.comment_len)
+    if rec.name.endswith('/'):
+      rec.name = rec.name[:-1]
+      rec.mode = stat.S_IFDIR | 0777
+    return rec
+class _CentHeader(object):
+  """Information about a single file in the archive.
+     0  version made by                 2 bytes
+     1  version needed to extract       2 bytes
+     2  general purpose bit flag        2 bytes
+     3  compression method              2 bytes
+     4  last mod file time              2 bytes
+     5  last mod file date              2 bytes
+     6  crc-32                          4 bytes
+     7  compressed size                 4 bytes
+     8  uncompressed size               4 bytes
+     9  file name length                2 bytes
+    10  extra field length              2 bytes
+    11  file comment length             2 bytes
+    12  disk number start               2 bytes
+    13  internal file attributes        2 bytes
+    14  external file attributes        4 bytes
+    15  relative offset of local header 4 bytes
+  """
+  def __init__(self, raw_bin):
+    rec = struct.unpack('<6H3I5H2I', raw_bin)
+    self.name_len = rec[9]
+    self.extra_len = rec[10]
+    self.comment_len = rec[11]
+    if (rec[0] & 0xff00) == 0x0300:  # UNIX
+      self.mode = rec[14] >> 16
+    else:
+      self.mode = stat.S_IFREG | 0644
+class _UngetStream(object):
+  """File like object to read and rewind a stream.
+  """
+  def __init__(self, fd):
+    self._fd = fd
+    self._buf = None
+  def read(self, size = -1):
+    r = []
+    try:
+      if size >= 0:
+        self._ReadChunk(r, size)
+      else:
+        while True:
+          self._ReadChunk(r, 2048)
+    except EOFError:
+      pass
+    if len(r) == 1:
+      return r[0]
+    return ''.join(r)
+  def unread(self, buf):
+    b = self._buf
+    if b is None or len(b) == 0:
+      self._buf = buf
+    else:
+      self._buf = buf + b
+  def _ReadChunk(self, r, size):
+    b = self._buf
+    try:
+      while size > 0:
+        if b is None or len(b) == 0:
+          b = self._Inflate(self._fd.read(2048))
+          if not b:
+            raise EOFError()
+          continue
+        use = min(size, len(b))
+        r.append(b[:use])
+        b = b[use:]
+        size -= use
+    finally:
+      self._buf = b
+  def _Inflate(self, b):
+    return b
+class _FixedLengthStream(_UngetStream):
+  """File like object to read a fixed length stream.
+  """
+  def __init__(self, fd, have):
+    _UngetStream.__init__(self, fd)
+    self._have = have
+  def _Inflate(self, b):
+    n = self._have
+    if n == 0:
+      self._fd.unread(b)
+      return None
+    if len(b) > n:
+      self._fd.unread(b[n:])
+      b = b[:n]
+    self._have -= len(b)
+    return b
+class _InflateStream(_UngetStream):
+  """Inflates the stream as it reads input.
+  """
+  def __init__(self, fd):
+    _UngetStream.__init__(self, fd)
+    self._z = zlib.decompressobj(-zlib.MAX_WBITS)
+  def _Inflate(self, b):
+    z = self._z
+    if not z:
+      self._fd.unread(b)
+      return None
+    b = z.decompress(b)
+    if z.unconsumed_tail != '':
+      self._fd.unread(z.unconsumed_tail)
+    elif z.unused_data != '':
+      self._fd.unread(z.unused_data)
+      self._z = None
+    return b

diff --git a/import_zip.py b/import_zip.py new file mode 100644 index 00000000..08aff326 --- /dev/null +++ b/import_zip.py
@@ -0,0 +1,345 @@
	1	#
	2	# Copyright (C) 2008 The Android Open Source Project
	3	#
	4	# Licensed under the Apache License, Version 2.0 (the "License");
	5	# you may not use this file except in compliance with the License.
	6	# You may obtain a copy of the License at
	7	#
	8	# http://www.apache.org/licenses/LICENSE-2.0
	9	#
	10	# Unless required by applicable law or agreed to in writing, software
	11	# distributed under the License is distributed on an "AS IS" BASIS,
	12	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	# See the License for the specific language governing permissions and
	14	# limitations under the License.
	15
	16	import stat
	17	import struct
	18	import zlib
	19	import cStringIO
	20
	21	from import_ext import ImportExternal
	22	from error import ImportError
	23
	24	class ImportZip(ImportExternal):
	25	"""Streams a zip file from the network directly into a Project's
	26	Git repository.
	27	"""
	28	@classmethod
	29	def CanAccept(cls, url):
	30	"""Can this importer read and unpack the data stored at url?
	31	"""
	32	if url.endswith('.zip') or url.endswith('.jar'):
	33	return True
	34	return False
	35
	36	def _UnpackFiles(self):
	37	url_fd, url = self._OpenUrl()
	38	try:
	39	if not self.__class__.CanAccept(url):
	40	raise ImportError('non-zip file extension: %s' % url)
	41
	42	zip = _ZipFile(url_fd)
	43	for entry in zip.FileRecords():
	44	data = zip.Open(entry).read()
	45	sz = len(data)
	46
	47	if data and _SafeCRLF(data):
	48	data = data.replace('\r\n', '\n')
	49	sz = len(data)
	50
	51	fd = cStringIO.StringIO(data)
	52	self._UnpackOneFile(entry.mode, sz, entry.name, fd)
	53	zip.Close(entry)
	54
	55	for entry in zip.CentralDirectory():
	56	self._SetFileMode(entry.name, entry.mode)
	57
	58	zip.CheckTail()
	59	finally:
	60	url_fd.close()
	61
	62
	63	def _SafeCRLF(data):
	64	"""Is it reasonably safe to perform a CRLF->LF conversion?
	65
	66	If the stream contains a NUL byte it is likely binary,
	67	and thus a CRLF->LF conversion may damage the stream.
	68
	69	If the only NUL is in the last position of the stream,
	70	but it otherwise can do a CRLF<->LF conversion we do
	71	the CRLF conversion anyway. At least one source ZIP
	72	file has this structure in its source code.
	73
	74	If every occurrance of a CR and LF is paired up as a
	75	CRLF pair then the conversion is safely bi-directional.
	76	s/\r\n/\n/g == s/\n/\r\\n/g can convert between them.
	77	"""
	78	nul = data.find('\0')
	79	if 0 <= nul and nul < (len(data) - 1):
	80	return False
	81
	82	n_lf = 0
	83	last = 0
	84	while True:
	85	lf = data.find('\n', last)
	86	if lf < 0:
	87	break
	88	if lf == 0 or data[lf - 1] != '\r':
	89	return False
	90	last = lf + 1
	91	n_lf += 1
	92	return n_lf > 0
	93
	94	class _ZipFile(object):
	95	"""Streaming iterator to parse a zip file on the fly.
	96	"""
	97	def __init__(self, fd):
	98	self._fd = _UngetStream(fd)
	99
	100	def FileRecords(self):
	101	return _FileIter(self._fd)
	102
	103	def CentralDirectory(self):
	104	return _CentIter(self._fd)
	105
	106	def CheckTail(self):
	107	type_buf = self._fd.read(4)
	108	type = struct.unpack('<I', type_buf)[0]
	109	if type != 0x06054b50: # end of central directory
	110	raise ImportError('zip record %x unsupported' % type)
	111
	112	def Open(self, entry):
	113	if entry.is_compressed:
	114	return _InflateStream(self._fd)
	115	else:
	116	if entry.has_trailer:
	117	raise ImportError('unable to extract streamed zip')
	118	return _FixedLengthStream(self._fd, entry.uncompressed_size)
	119
	120	def Close(self, entry):
	121	if entry.has_trailer:
	122	type = struct.unpack('<I', self._fd.read(4))[0]
	123	if type == 0x08074b50:
	124	# Not a formal type marker, but commonly seen in zips
	125	# as the data descriptor signature.
	126	#
	127	struct.unpack('<3I', self._fd.read(12))
	128	else:
	129	# No signature for the data descriptor, so read the
	130	# remaining fields out of the stream
	131	#
	132	self._fd.read(8)
	133
	134
	135	class _FileIter(object):
	136	def __init__(self, fd):
	137	self._fd = fd
	138
	139	def __iter__(self):
	140	return self
	141
	142	def next(self):
	143	fd = self._fd
	144
	145	type_buf = fd.read(4)
	146	type = struct.unpack('<I', type_buf)[0]
	147
	148	if type != 0x04034b50: # local file header
	149	fd.unread(type_buf)
	150	raise StopIteration()
	151
	152	rec = _FileHeader(fd.read(26))
	153	rec.name = fd.read(rec.name_len)
	154	fd.read(rec.extra_len)
	155
	156	if rec.name.endswith('/'):
	157	rec.name = rec.name[:-1]
	158	rec.mode = stat.S_IFDIR \| 0777
	159	return rec
	160
	161
	162	class _FileHeader(object):
	163	"""Information about a single file in the archive.
	164	0 version needed to extract 2 bytes
	165	1 general purpose bit flag 2 bytes
	166	2 compression method 2 bytes
	167	3 last mod file time 2 bytes
	168	4 last mod file date 2 bytes
	169	5 crc-32 4 bytes
	170	6 compressed size 4 bytes
	171	7 uncompressed size 4 bytes
	172	8 file name length 2 bytes
	173	9 extra field length 2 bytes
	174	"""
	175	def __init__(self, raw_bin):
	176	rec = struct.unpack('<5H3I2H', raw_bin)
	177
	178	if rec[2] == 8:
	179	self.is_compressed = True
	180	elif rec[2] == 0:
	181	self.is_compressed = False
	182	else:
	183	raise ImportError('unrecognized compression format')
	184
	185	if rec[1] & (1 << 3):
	186	self.has_trailer = True
	187	else:
	188	self.has_trailer = False
	189
	190	self.compressed_size = rec[6]
	191	self.uncompressed_size = rec[7]
	192	self.name_len = rec[8]
	193	self.extra_len = rec[9]
	194	self.mode = stat.S_IFREG \| 0644
	195
	196
	197	class _CentIter(object):
	198	def __init__(self, fd):
	199	self._fd = fd
	200
	201	def __iter__(self):
	202	return self
	203
	204	def next(self):
	205	fd = self._fd
	206
	207	type_buf = fd.read(4)
	208	type = struct.unpack('<I', type_buf)[0]
	209
	210	if type != 0x02014b50: # central directory
	211	fd.unread(type_buf)
	212	raise StopIteration()
	213
	214	rec = _CentHeader(fd.read(42))
	215	rec.name = fd.read(rec.name_len)
	216	fd.read(rec.extra_len)
	217	fd.read(rec.comment_len)
	218
	219	if rec.name.endswith('/'):
	220	rec.name = rec.name[:-1]
	221	rec.mode = stat.S_IFDIR \| 0777
	222	return rec
	223
	224
	225	class _CentHeader(object):
	226	"""Information about a single file in the archive.
	227	0 version made by 2 bytes
	228	1 version needed to extract 2 bytes
	229	2 general purpose bit flag 2 bytes
	230	3 compression method 2 bytes
	231	4 last mod file time 2 bytes
	232	5 last mod file date 2 bytes
	233	6 crc-32 4 bytes
	234	7 compressed size 4 bytes
	235	8 uncompressed size 4 bytes
	236	9 file name length 2 bytes
	237	10 extra field length 2 bytes
	238	11 file comment length 2 bytes
	239	12 disk number start 2 bytes
	240	13 internal file attributes 2 bytes
	241	14 external file attributes 4 bytes
	242	15 relative offset of local header 4 bytes
	243	"""
	244	def __init__(self, raw_bin):
	245	rec = struct.unpack('<6H3I5H2I', raw_bin)
	246	self.name_len = rec[9]
	247	self.extra_len = rec[10]
	248	self.comment_len = rec[11]
	249
	250	if (rec[0] & 0xff00) == 0x0300: # UNIX
	251	self.mode = rec[14] >> 16
	252	else:
	253	self.mode = stat.S_IFREG \| 0644
	254
	255
	256	class _UngetStream(object):
	257	"""File like object to read and rewind a stream.
	258	"""
	259	def __init__(self, fd):
	260	self._fd = fd
	261	self._buf = None
	262
	263	def read(self, size = -1):
	264	r = []
	265	try:
	266	if size >= 0:
	267	self._ReadChunk(r, size)
	268	else:
	269	while True:
	270	self._ReadChunk(r, 2048)
	271	except EOFError:
	272	pass
	273
	274	if len(r) == 1:
	275	return r[0]
	276	return ''.join(r)
	277
	278	def unread(self, buf):
	279	b = self._buf
	280	if b is None or len(b) == 0:
	281	self._buf = buf
	282	else:
	283	self._buf = buf + b
	284
	285	def _ReadChunk(self, r, size):
	286	b = self._buf
	287	try:
	288	while size > 0:
	289	if b is None or len(b) == 0:
	290	b = self._Inflate(self._fd.read(2048))
	291	if not b:
	292	raise EOFError()
	293	continue
	294
	295	use = min(size, len(b))
	296	r.append(b[:use])
	297	b = b[use:]
	298	size -= use
	299	finally:
	300	self._buf = b
	301
	302	def _Inflate(self, b):
	303	return b
	304
	305
	306	class _FixedLengthStream(_UngetStream):
	307	"""File like object to read a fixed length stream.
	308	"""
	309	def __init__(self, fd, have):
	310	_UngetStream.__init__(self, fd)
	311	self._have = have
	312
	313	def _Inflate(self, b):
	314	n = self._have
	315	if n == 0:
	316	self._fd.unread(b)
	317	return None
	318
	319	if len(b) > n:
	320	self._fd.unread(b[n:])
	321	b = b[:n]
	322	self._have -= len(b)
	323	return b
	324
	325
	326	class _InflateStream(_UngetStream):
	327	"""Inflates the stream as it reads input.
	328	"""
	329	def __init__(self, fd):
	330	_UngetStream.__init__(self, fd)
	331	self._z = zlib.decompressobj(-zlib.MAX_WBITS)
	332
	333	def _Inflate(self, b):
	334	z = self._z
	335	if not z:
	336	self._fd.unread(b)
	337	return None
	338
	339	b = z.decompress(b)
	340	if z.unconsumed_tail != '':
	341	self._fd.unread(z.unconsumed_tail)
	342	elif z.unused_data != '':
	343	self._fd.unread(z.unused_data)
	344	self._z = None
	345	return b