# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import os
import struct
import zlib
from collections
import OrderedDict
from io
import BytesIO, UnsupportedOperation
from zipfile
import ZIP_DEFLATED, ZIP_STORED
import six
import mozpack.path
as mozpath
from mozbuild.util
import ensure_bytes
JAR_STORED = ZIP_STORED
JAR_DEFLATED = ZIP_DEFLATED
MAX_WBITS = 15
class JarReaderError(Exception):
"""Error type for Jar reader errors."""
class JarWriterError(Exception):
"""Error type for Jar writer errors."""
class JarStruct(object):
"""
Helper used to define ZIP archive raw data structures. Data structures
handled by this helper all start
with a magic number, defined
in
subclasses MAGIC field
as a 32-bits unsigned integer, followed by data
structured
as described
in subclasses STRUCT field.
The STRUCT field contains a list of (name, type) pairs where name
is a
field name,
and the type can be one of
'uint32',
'uint16' or one of the
field names.
In the latter case, the field
is considered to be a string
buffer
with a length given
in that field.
For example,
.. code-block:: python
STRUCT = [
(
'version',
'uint32'),
(
'filename_size',
'uint16'),
(
'filename',
'filename_size')
]
describes a structure
with a
'version' 32-bits unsigned integer field,
followed by a
'filename_size' 16-bits unsigned integer field, followed by a
filename_size-long string buffer
'filename'.
Fields that are used
as other fields size are
not stored
in objects.
In the
above example, an instance of such subclass would only have two attributes:
- obj[
'version']
- obj[
'filename']
filename_size would be obtained
with len(obj[
'filename']).
JarStruct subclasses instances can be either initialized
from existing data
(deserialized),
or with empty fields.
"""
TYPE_MAPPING = {
"uint32": (b
"I", 4),
"uint16": (b
"H", 2)}
def __init__(self, data=
None):
"""
Create an instance
from the given data. Data may be omitted to create
an instance
with empty fields.
"""
assert self.MAGIC
and isinstance(self.STRUCT, OrderedDict)
self.size_fields = set(
t
for t
in six.itervalues(self.STRUCT)
if t
not in JarStruct.TYPE_MAPPING
)
self._values = {}
if data:
self._init_data(data)
else:
self._init_empty()
def _init_data(self, data):
"""
Initialize an instance
from data, following the data structure
described
in self.STRUCT. The self.MAGIC signature
is expected at
data[:4].
"""
assert data
is not None
self.signature, size = JarStruct.get_data(
"uint32", data)
if self.signature != self.MAGIC:
raise JarReaderError(
"Bad magic")
offset = size
# For all fields used as other fields sizes, keep track of their value
# separately.
sizes = dict((t, 0)
for t
in self.size_fields)
for name, t
in six.iteritems(self.STRUCT):
if t
in JarStruct.TYPE_MAPPING:
value, size = JarStruct.get_data(t, data[offset:])
else:
size = sizes[t]
value = data[offset : offset + size]
if isinstance(value, memoryview):
value = value.tobytes()
if name
not in sizes:
self._values[name] = value
else:
sizes[name] = value
offset += size
def _init_empty(self):
"""
Initialize an instance
with empty fields.
"""
self.signature = self.MAGIC
for name, t
in six.iteritems(self.STRUCT):
if name
in self.size_fields:
continue
self._values[name] = 0
if t
in JarStruct.TYPE_MAPPING
else ""
@staticmethod
def get_data(type, data):
"""
Deserialize a single field of given type (must be one of
JarStruct.TYPE_MAPPING) at the given offset
in the given data.
"""
assert type
in JarStruct.TYPE_MAPPING
assert data
is not None
format, size = JarStruct.TYPE_MAPPING[type]
data = data[:size]
if isinstance(data, memoryview):
data = data.tobytes()
return struct.unpack(b
"<" + format, data)[0], size
def serialize(self):
"""
Serialize the data structure according to the data structure definition
from self.STRUCT.
"""
serialized = struct.pack(b
"<I", self.signature)
sizes = dict(
(t, name)
for name, t
in six.iteritems(self.STRUCT)
if t
not in JarStruct.TYPE_MAPPING
)
for name, t
in six.iteritems(self.STRUCT):
if t
in JarStruct.TYPE_MAPPING:
format, size = JarStruct.TYPE_MAPPING[t]
if name
in sizes:
value = len(self[sizes[name]])
else:
value = self[name]
serialized += struct.pack(b
"<" + format, value)
else:
serialized += ensure_bytes(self[name])
return serialized
@property
def size(self):
"""
Return the size of the data structure, given the current values of all
variable length fields.
"""
size = JarStruct.TYPE_MAPPING[
"uint32"][1]
for name, type
in six.iteritems(self.STRUCT):
if type
in JarStruct.TYPE_MAPPING:
size += JarStruct.TYPE_MAPPING[type][1]
else:
size += len(self[name])
return size
def __getitem__(self, key):
return self._values[key]
def __setitem__(self, key, value):
if key
not in self.STRUCT:
raise KeyError(key)
if key
in self.size_fields:
raise AttributeError(
"can't set attribute")
self._values[key] = value
def __contains__(self, key):
return key
in self._values
def __iter__(self):
return six.iteritems(self._values)
def __repr__(self):
return "<%s %s>" % (
self.__class__.__name__,
" ".join(
"%s=%s" % (n, v)
for n, v
in self),
)
class JarCdirEnd(JarStruct):
"""
End of central directory record.
"""
MAGIC = 0x06054B50
STRUCT = OrderedDict(
[
(
"disk_num",
"uint16"),
(
"cdir_disk",
"uint16"),
(
"disk_entries",
"uint16"),
(
"cdir_entries",
"uint16"),
(
"cdir_size",
"uint32"),
(
"cdir_offset",
"uint32"),
(
"comment_size",
"uint16"),
(
"comment",
"comment_size"),
]
)
CDIR_END_SIZE = JarCdirEnd().size
class JarCdirEntry(JarStruct):
"""
Central directory file header
"""
MAGIC = 0x02014B50
STRUCT = OrderedDict(
[
(
"creator_version",
"uint16"),
(
"min_version",
"uint16"),
(
"general_flag",
"uint16"),
(
"compression",
"uint16"),
(
"lastmod_time",
"uint16"),
(
"lastmod_date",
"uint16"),
(
"crc32",
"uint32"),
(
"compressed_size",
"uint32"),
(
"uncompressed_size",
"uint32"),
(
"filename_size",
"uint16"),
(
"extrafield_size",
"uint16"),
(
"filecomment_size",
"uint16"),
(
"disknum",
"uint16"),
(
"internal_attr",
"uint16"),
(
"external_attr",
"uint32"),
(
"offset",
"uint32"),
(
"filename",
"filename_size"),
(
"extrafield",
"extrafield_size"),
(
"filecomment",
"filecomment_size"),
]
)
class JarLocalFileHeader(JarStruct):
"""
Local file header
"""
MAGIC = 0x04034B50
STRUCT = OrderedDict(
[
(
"min_version",
"uint16"),
(
"general_flag",
"uint16"),
(
"compression",
"uint16"),
(
"lastmod_time",
"uint16"),
(
"lastmod_date",
"uint16"),
(
"crc32",
"uint32"),
(
"compressed_size",
"uint32"),
(
"uncompressed_size",
"uint32"),
(
"filename_size",
"uint16"),
(
"extra_field_size",
"uint16"),
(
"filename",
"filename_size"),
(
"extra_field",
"extra_field_size"),
]
)
class JarFileReader(object):
"""
File-like
class for use by JarReader to give access to individual files
within a Jar archive.
"""
def __init__(self, header, data):
"""
Initialize a JarFileReader. header
is the local file header
corresponding to the file
in the jar archive, data a buffer containing
the file data.
"""
assert header[
"compression"]
in [JAR_DEFLATED, JAR_STORED]
self._data = data
# Copy some local file header fields.
for name
in [
"compressed_size",
"uncompressed_size",
"crc32"]:
setattr(self, name, header[name])
self.filename = six.ensure_text(header[
"filename"])
self.compressed = header[
"compression"] != JAR_STORED
self.compress = header[
"compression"]
def readable(self):
return True
def read(self, length=-1):
"""
Read some amount of uncompressed data.
"""
return self.uncompressed_data.read(length)
def readinto(self, b):
"""
Read bytes into a pre-allocated, writable bytes-like object `b`
and return
the number of bytes read.
"""
return self.uncompressed_data.readinto(b)
def readlines(self):
"""
Return a list containing all the lines of data
in the uncompressed
data.
"""
return self.read().splitlines(
True)
def __iter__(self):
"""
Iterator, to support the
"for line in fileobj" constructs.
"""
return iter(self.readlines())
def seek(self, pos, whence=os.SEEK_SET):
"""
Change the current position
in the uncompressed data. Subsequent reads
will start
from there.
"""
return self.uncompressed_data.seek(pos, whence)
def close(self):
"""
Free the uncompressed data buffer.
"""
self.uncompressed_data.close()
@property
def closed(self):
return self.uncompressed_data.closed
@property
def compressed_data(self):
"""
Return the raw compressed data.
"""
return self._data[: self.compressed_size]
@property
def uncompressed_data(self):
"""
Return the uncompressed data.
"""
if hasattr(self,
"_uncompressed_data"):
return self._uncompressed_data
data = self.compressed_data
if self.compress == JAR_STORED:
data = data.tobytes()
elif self.compress == JAR_DEFLATED:
data = zlib.decompress(data.tobytes(), -MAX_WBITS)
else:
assert False # Can't be another value per __init__
if len(data) != self.uncompressed_size:
raise JarReaderError(
"Corrupted file? %s" % self.filename)
self._uncompressed_data = BytesIO(data)
return self._uncompressed_data
class JarReader(object):
"""
Class with methods to read Jar files. Can open standard jar files
as well
as Mozilla jar files (see further details
in the JarWriter documentation).
"""
def __init__(self, file=
None, fileobj=
None, data=
None):
"""
Opens the given file
as a Jar archive. Use the given file-like object
if one
is given instead of opening the given file name.
"""
if fileobj:
data = fileobj.read()
elif file:
data = open(file,
"rb").read()
self._data = memoryview(data)
# The End of Central Directory Record has a variable size because of
# comments it may contain, so scan for it from the end of the file.
offset = -CDIR_END_SIZE
while True:
signature = JarStruct.get_data(
"uint32", self._data[offset:])[0]
if signature == JarCdirEnd.MAGIC:
break
if offset == -len(self._data):
raise JarReaderError(
"Not a jar?")
offset -= 1
self._cdir_end = JarCdirEnd(self._data[offset:])
def close(self):
"""
Free some resources associated
with the Jar.
"""
del self._data
@property
def compression(self):
entries = self.entries
if not entries:
return JAR_STORED
return max(f[
"compression"]
for f
in six.itervalues(entries))
@property
def entries(self):
"""
Return an ordered dict of central directory entries, indexed by
filename,
in the order they appear
in the Jar archive central
directory. Directory entries are skipped.
"""
if hasattr(self,
"_entries"):
return self._entries
preload = 0
if self.is_optimized:
preload = JarStruct.get_data(
"uint32", self._data)[0]
entries = OrderedDict()
offset = self._cdir_end[
"cdir_offset"]
for e
in six.moves.xrange(self._cdir_end[
"cdir_entries"]):
entry = JarCdirEntry(self._data[offset:])
offset += entry.size
# Creator host system. 0 is MSDOS, 3 is Unix
host = entry[
"creator_version"] >> 8
# External attributes values depend on host above. On Unix the
# higher bits are the stat.st_mode value. On MSDOS, the lower bits
# are the FAT attributes.
xattr = entry[
"external_attr"]
# Skip directories
if (host == 0
and xattr & 0x10)
or (host == 3
and xattr & (0o040000 << 16)):
continue
entries[six.ensure_text(entry[
"filename"])] = entry
if entry[
"offset"] < preload:
self._last_preloaded = six.ensure_text(entry[
"filename"])
self._entries = entries
return entries
@property
def is_optimized(self):
"""
Return whether the jar archive
is optimized.
"""
# In optimized jars, the central directory is at the beginning of the
# file, after a single 32-bits value, which is the length of data
# preloaded.
return self._cdir_end[
"cdir_offset"] == JarStruct.TYPE_MAPPING[
"uint32"][1]
@property
def last_preloaded(self):
"""
Return the name of the last file that
is set to be preloaded.
See JarWriter documentation
for more details on preloading.
"""
if hasattr(self,
"_last_preloaded"):
return self._last_preloaded
self._last_preloaded =
None
self.entries
return self._last_preloaded
def _getreader(self, entry):
"""
Helper to create a JarFileReader corresponding to the given central
directory entry.
"""
header = JarLocalFileHeader(self._data[entry[
"offset"] :])
for key, value
in entry:
if key
in header
and header[key] != value:
raise JarReaderError(
"Central directory and file header "
+
"mismatch. Corrupted archive?"
)
return JarFileReader(header, self._data[entry[
"offset"] + header.size :])
def __iter__(self):
"""
Iterate over all files
in the Jar archive,
in the form of
JarFileReaders.
for file
in jarReader:
...
"""
for entry
in six.itervalues(self.entries):
yield self._getreader(entry)
def __getitem__(self, name):
"""
Get a JarFileReader
for the given file name.
"""
return self._getreader(self.entries[name])
def __contains__(self, name):
"""
Return whether the given file name appears
in the Jar archive.
"""
return name
in self.entries
class JarWriter(object):
"""
Class with methods to write Jar files. Can write more-or-less standard jar
archives
as well
as jar archives optimized
for Gecko. See the documentation
for the close() member function
for a description of both layouts.
"""
def __init__(self, file=
None, fileobj=
None, compress=
True, compress_level=9):
"""
Initialize a Jar archive
in the given file. Use the given file-like
object
if one
is given instead of opening the given file name.
The compress option determines the default behavior
for storing data
in the jar archive. The optimize options determines whether the jar
archive should be optimized
for Gecko
or not. ``compress_level``
defines the zlib compression level. It must be a value between 0
and 9
and defaults to 9, the highest
and slowest level of compression.
"""
if fileobj:
self._data = fileobj
else:
self._data = open(file,
"wb")
if compress
is True:
compress = JAR_DEFLATED
self._compress = compress
self._compress_level = compress_level
self._contents = OrderedDict()
self._last_preloaded =
None
def __enter__(self):
"""
Context manager __enter__ method
for JarWriter.
"""
return self
def __exit__(self, type, value, tb):
"""
Context manager __exit__ method
for JarWriter.
"""
self.finish()
def finish(self):
"""
Flush
and close the Jar archive.
Standard jar archives are laid out like the following:
- Local file header 1
- File data 1
- Local file header 2
- File data 2
- (...)
- Central directory entry pointing at Local file header 1
- Central directory entry pointing at Local file header 2
- (...)
- End of central directory, pointing at first central directory
entry.
Jar archives optimized
for Gecko are laid out like the following:
- 32-bits unsigned integer giving the amount of data to preload.
- Central directory entry pointing at Local file header 1
- Central directory entry pointing at Local file header 2
- (...)
- End of central directory, pointing at first central directory
entry.
- Local file header 1
- File data 1
- Local file header 2
- File data 2
- (...)
- End of central directory, pointing at first central directory
entry.
The duplication of the End of central directory
is to accomodate some
Zip reading tools that want an end of central directory structure to
follow the central directory entries.
"""
offset = 0
headers = {}
preload_size = 0
# Prepare central directory entries
for entry, content
in six.itervalues(self._contents):
header = JarLocalFileHeader()
for name
in entry.STRUCT:
if name
in header:
header[name] = entry[name]
entry[
"offset"] = offset
offset += len(content) + header.size
if six.ensure_text(entry[
"filename"]) == self._last_preloaded:
preload_size = offset
headers[entry] = header
# Prepare end of central directory
end = JarCdirEnd()
end[
"disk_entries"] = len(self._contents)
end[
"cdir_entries"] = end[
"disk_entries"]
end[
"cdir_size"] = six.moves.reduce(
lambda x, y: x + y[0].size, self._contents.values(), 0
)
# On optimized archives, store the preloaded size and the central
# directory entries, followed by the first end of central directory.
if preload_size:
end[
"cdir_offset"] = 4
offset = end[
"cdir_size"] + end[
"cdir_offset"] + end.size
preload_size += offset
self._data.write(struct.pack(
"<I", preload_size))
for entry, _
in six.itervalues(self._contents):
entry[
"offset"] += offset
self._data.write(entry.serialize())
self._data.write(end.serialize())
# Store local file entries followed by compressed data
for entry, content
in six.itervalues(self._contents):
self._data.write(headers[entry].serialize())
if isinstance(content, memoryview):
self._data.write(content.tobytes())
else:
self._data.write(content)
# On non optimized archives, store the central directory entries.
if not preload_size:
end[
"cdir_offset"] = offset
for entry, _
in six.itervalues(self._contents):
self._data.write(entry.serialize())
# Store the end of central directory.
self._data.write(end.serialize())
self._data.close()
def add(self, name, data, compress=
None, mode=
None, skip_duplicates=
False):
"""
Add a new member to the jar archive,
with the given name
and the given
data.
The compress option indicates how the given data should be compressed
(one of JAR_STORED
or JAR_DEFLATE),
or compressed according
to the default defined when creating the JarWriter (
None).
True and
False are allowed values
for backwards compatibility, mapping,
respectively, to JAR_DEFLATE
and JAR_STORED.
When the data should be compressed, it
is only really compressed
if
the compressed size
is smaller than the uncompressed size.
The mode option gives the unix permissions that should be stored
for the
jar entry, which defaults to 0o100644 (regular file, u+rw, g+r, o+r)
if
not specified.
If a duplicated member
is found skip_duplicates will prevent raising
an exception
if set to
True.
The given data may be a buffer, a file-like instance, a Deflater
or a
JarFileReader instance. The latter two allow to avoid uncompressing
data to recompress it.
"""
name = mozpath.normsep(six.ensure_text(name))
if name
in self._contents
and not skip_duplicates:
raise JarWriterError(
"File %s already in JarWriter" % name)
if compress
is None:
compress = self._compress
if compress
is True:
compress = JAR_DEFLATED
if compress
is False:
compress = JAR_STORED
if isinstance(data, (JarFileReader, Deflater))
and data.compress == compress:
deflater = data
else:
deflater = Deflater(compress, compress_level=self._compress_level)
if isinstance(data, (six.binary_type, six.string_types)):
deflater.write(data)
elif hasattr(data,
"read"):
try:
data.seek(0)
except (UnsupportedOperation, AttributeError):
pass
deflater.write(data.read())
else:
raise JarWriterError(
"Don't know how to handle %s" % type(data))
# Fill a central directory entry for this new member.
entry = JarCdirEntry()
entry[
"creator_version"] = 20
if mode
is None:
# If no mode is given, default to u+rw, g+r, o+r.
mode = 0o000644
if not mode & 0o777000:
# If no file type is given, default to regular file.
mode |= 0o100000
# Set creator host system (upper byte of creator_version) to 3 (Unix) so
# mode is honored when there is one.
entry[
"creator_version"] |= 3 << 8
entry[
"external_attr"] = (mode & 0xFFFF) << 16
if deflater.compressed:
entry[
"min_version"] = 20
# Version 2.0 supports deflated streams
entry[
"general_flag"] = 2
# Max compression
entry[
"compression"] = deflater.compress
else:
entry[
"min_version"] = 10
# Version 1.0 for stored streams
entry[
"general_flag"] = 0
entry[
"compression"] = JAR_STORED
# January 1st, 2010. See bug 592369.
entry[
"lastmod_date"] = ((2010 - 1980) << 9) | (1 << 5) | 1
entry[
"lastmod_time"] = 0
entry[
"crc32"] = deflater.crc32
entry[
"compressed_size"] = deflater.compressed_size
entry[
"uncompressed_size"] = deflater.uncompressed_size
entry[
"filename"] = six.ensure_binary(name)
self._contents[name] = entry, deflater.compressed_data
def preload(self, files):
"""
Set which members of the jar archive should be preloaded when opening
the archive
in Gecko. This reorders the members according to the order
of given list.
"""
new_contents = OrderedDict()
for f
in files:
if f
not in self._contents:
continue
new_contents[f] = self._contents[f]
self._last_preloaded = f
for f
in self._contents:
if f
not in new_contents:
new_contents[f] = self._contents[f]
self._contents = new_contents
class Deflater(object):
"""
File-like interface to zlib compression. The data
is actually
not
compressed unless the compressed form
is smaller than the uncompressed
data.
"""
def __init__(self, compress=
True, compress_level=9):
"""
Initialize a Deflater. The compress argument determines how to
compress.
"""
self._data = BytesIO()
if compress
is True:
compress = JAR_DEFLATED
elif compress
is False:
compress = JAR_STORED
self.compress = compress
if compress == JAR_DEFLATED:
self._deflater = zlib.compressobj(compress_level, zlib.DEFLATED, -MAX_WBITS)
self._deflated = BytesIO()
else:
assert compress == JAR_STORED
self._deflater =
None
self.crc32 = 0
def write(self, data):
"""
Append a buffer to the Deflater.
"""
if isinstance(data, memoryview):
data = data.tobytes()
data = six.ensure_binary(data)
self._data.write(data)
if self.compress:
if self._deflater:
self._deflated.write(self._deflater.compress(data))
else:
raise JarWriterError(
"Can't write after flush")
self.crc32 = zlib.crc32(data, self.crc32) & 0xFFFFFFFF
def close(self):
"""
Close the Deflater.
"""
self._data.close()
if self.compress:
self._deflated.close()
def _flush(self):
"""
Flush the underlying zlib compression object.
"""
if self.compress
and self._deflater:
self._deflated.write(self._deflater.flush())
self._deflater =
None
@property
def compressed(self):
"""
Return whether the data should be compressed.
"""
return self._compressed_size < self.uncompressed_size
@property
def _compressed_size(self):
"""
Return the real compressed size of the data written to the Deflater.
If
the Deflater
is set
not to compress, the uncompressed size
is returned.
Otherwise, the actual compressed size
is returned, whether
or not it
is
a win over the uncompressed size.
"""
if self.compress:
self._flush()
return self._deflated.tell()
return self.uncompressed_size
@property
def compressed_size(self):
"""
Return the compressed size of the data written to the Deflater.
If the
Deflater
is set
not to compress, the uncompressed size
is returned.
Otherwise,
if the data should
not be compressed (the real compressed
size
is bigger than the uncompressed size),
return the uncompressed
size.
"""
if self.compressed:
return self._compressed_size
return self.uncompressed_size
@property
def uncompressed_size(self):
"""
Return the size of the data written to the Deflater.
"""
return self._data.tell()
@property
def compressed_data(self):
"""
Return the compressed data,
if the data should be compressed (real
compressed size smaller than the uncompressed size),
or the
uncompressed data otherwise.
"""
if self.compressed:
return self._deflated.getvalue()
return self._data.getvalue()
class JarLog(dict):
"""
Helper to read the file Gecko generates when setting MOZ_JAR_LOG_FILE.
The jar log
is then available
as a dict
with the jar path
as key,
and
the corresponding access log
as a list value. Only the first access to
a given member of a jar
is stored.
"""
def __init__(self, file=
None, fileobj=
None):
if not fileobj:
fileobj = open(file,
"r")
for line
in fileobj:
jar, path = line.strip().split(
None, 1)
if not jar
or not path:
continue
entry = self.setdefault(jar, [])
if path
not in entry:
entry.append(path)