Removing unwanted files from source.
parent
2c33ed73de
commit
5e9fcf9f63
Binary file not shown.
@ -1,23 +0,0 @@
|
|||||||
import iso
|
|
||||||
import source
|
|
||||||
|
|
||||||
|
|
||||||
def parse(path_or_url, cache_content=False, min_fetch=16):
|
|
||||||
"""
|
|
||||||
Returns an :class:`ISO` object for the given filesystem path or URL.
|
|
||||||
|
|
||||||
cache_content:
|
|
||||||
Whether to store sectors backing file content in the sector cache. If true, this will
|
|
||||||
cause memory usage to grow to the size of the ISO as more file content get accessed.
|
|
||||||
Even if false (default), an individual Record object will cache its own file content
|
|
||||||
for the lifetime of the Record, once accessed.
|
|
||||||
|
|
||||||
min_fetch:
|
|
||||||
The smallest number of sectors to fetch in a single operation, to speed up sequential
|
|
||||||
accesses, e.g. for directory traversal. Defaults to 16 sectors, or 32 KiB.
|
|
||||||
"""
|
|
||||||
if path_or_url.startswith("http"):
|
|
||||||
src = source.HTTPSource(path_or_url, cache_content=cache_content, min_fetch=min_fetch)
|
|
||||||
else:
|
|
||||||
src = source.FileSource(path_or_url, cache_content=cache_content, min_fetch=min_fetch)
|
|
||||||
return iso.ISO(src)
|
|
@ -1,56 +0,0 @@
|
|||||||
class ISO(object):
|
|
||||||
def __init__(self, source):
|
|
||||||
self._source = source
|
|
||||||
|
|
||||||
# Unpack volume descriptors
|
|
||||||
self.volume_descriptors = {}
|
|
||||||
sector = 16
|
|
||||||
while True:
|
|
||||||
self._source.seek(sector)
|
|
||||||
sector += 1
|
|
||||||
|
|
||||||
vd = self._source.unpack_volume_descriptor()
|
|
||||||
self.volume_descriptors[vd.name] = vd
|
|
||||||
|
|
||||||
if vd.name == "terminator":
|
|
||||||
break
|
|
||||||
|
|
||||||
# Unpack the path table
|
|
||||||
self._source.seek(
|
|
||||||
self.volume_descriptors['primary'].path_table_l_loc,
|
|
||||||
self.volume_descriptors['primary'].path_table_size)
|
|
||||||
self.path_table = self._source.unpack_path_table()
|
|
||||||
|
|
||||||
# Save a reference to the root record
|
|
||||||
self.root = self.volume_descriptors['primary'].root_record
|
|
||||||
|
|
||||||
def record(self, *path):
|
|
||||||
"""
|
|
||||||
Retrieves a record for the given path.
|
|
||||||
"""
|
|
||||||
path = [part.upper() for part in path]
|
|
||||||
record = None
|
|
||||||
pivot = len(path)
|
|
||||||
|
|
||||||
# Resolve as much of the path as possible via the path table
|
|
||||||
while pivot > 0:
|
|
||||||
try:
|
|
||||||
record = self.path_table.record(*path[:pivot])
|
|
||||||
except KeyError:
|
|
||||||
pivot -= 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
if record is None:
|
|
||||||
record = self.root
|
|
||||||
|
|
||||||
# Resolve the remainder of the path by walking record children
|
|
||||||
for part in path[pivot:]:
|
|
||||||
for child in record.children_unsafe:
|
|
||||||
if child.name == part:
|
|
||||||
record = child
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise KeyError(part)
|
|
||||||
|
|
||||||
return record
|
|
@ -1,31 +0,0 @@
|
|||||||
import record
|
|
||||||
|
|
||||||
|
|
||||||
class PathTable(object):
|
|
||||||
def __init__(self, source):
|
|
||||||
self._source = source
|
|
||||||
self.paths = {}
|
|
||||||
|
|
||||||
paths_list = []
|
|
||||||
|
|
||||||
while len(source) > 0:
|
|
||||||
name_length = source.unpack('B')
|
|
||||||
_ = source.unpack('B')
|
|
||||||
location = source.unpack('<I')
|
|
||||||
parent_idx = source.unpack('<H') - 1
|
|
||||||
name = source.unpack_string(name_length)
|
|
||||||
_ = source.unpack_raw(name_length % 2)
|
|
||||||
|
|
||||||
path = []
|
|
||||||
if len(paths_list) > 0:
|
|
||||||
path.extend(paths_list[parent_idx])
|
|
||||||
if name != "\x00":
|
|
||||||
path.append(name)
|
|
||||||
|
|
||||||
paths_list.append(path)
|
|
||||||
self.paths[tuple(path)] = location
|
|
||||||
|
|
||||||
def record(self, *path):
|
|
||||||
location = self.paths[path]
|
|
||||||
self._source.seek(location)
|
|
||||||
return self._source.unpack_record()
|
|
@ -1,70 +0,0 @@
|
|||||||
class Record(object):
|
|
||||||
def __init__(self, source, length):
|
|
||||||
self._source = source
|
|
||||||
self._content = None
|
|
||||||
target = source.cursor + length
|
|
||||||
|
|
||||||
_ = source.unpack('B') # TODO: extended attributes length
|
|
||||||
self.location = source.unpack_both('I')
|
|
||||||
self.length = source.unpack_both('I')
|
|
||||||
self.datetime = source.unpack_dir_datetime()
|
|
||||||
flags = source.unpack('B')
|
|
||||||
self.is_hidden = flags & 1
|
|
||||||
self.is_directory = flags & 2
|
|
||||||
# TODO: other flags
|
|
||||||
_ = source.unpack('B') # TODO: interleave unit size
|
|
||||||
_ = source.unpack('B') # TODO: interleave gap size
|
|
||||||
_ = source.unpack_both('h') # TODO: volume sequence
|
|
||||||
name_length = source.unpack('B')
|
|
||||||
self.name = source.unpack_string(name_length).split(';')[0]
|
|
||||||
if self.name == "\x00":
|
|
||||||
self.name = ""
|
|
||||||
|
|
||||||
# TODO: extended attributes
|
|
||||||
source.unpack_raw(target - source.cursor)
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "<Record (%s) name=%r>" % (
|
|
||||||
"directory" if self.is_directory else "file",
|
|
||||||
self.name)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def children_unsafe(self):
|
|
||||||
"""
|
|
||||||
Assuming this is a directory record, this generator yields a record for each child. Use
|
|
||||||
with caution: at each iteration, the generator assumes that the source cursor has not moved
|
|
||||||
since the previous child was yielded. For safer behaviour, use :func:`children`.
|
|
||||||
"""
|
|
||||||
assert self.is_directory
|
|
||||||
self._source.seek(self.location, self.length)
|
|
||||||
_ = self._source.unpack_record() # current directory
|
|
||||||
_ = self._source.unpack_record() # parent directory
|
|
||||||
while len(self._source) > 0:
|
|
||||||
record = self._source.unpack_record()
|
|
||||||
|
|
||||||
if record is None:
|
|
||||||
self._source.unpack_boundary()
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield record
|
|
||||||
|
|
||||||
@property
|
|
||||||
def children(self):
|
|
||||||
"""
|
|
||||||
Assuming this is a directory record, this property contains records for its children.
|
|
||||||
"""
|
|
||||||
return list(self.children_unsafe)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def content(self):
|
|
||||||
"""
|
|
||||||
Assuming this is a file record, this property contains the file's contents
|
|
||||||
"""
|
|
||||||
assert not self.is_directory
|
|
||||||
if self._content is None:
|
|
||||||
self._source.seek(self.location, self.length, is_content=True)
|
|
||||||
self._content = self._source.unpack_all()
|
|
||||||
return self._content
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,165 +0,0 @@
|
|||||||
import datetime
|
|
||||||
import struct
|
|
||||||
import urllib
|
|
||||||
|
|
||||||
import path_table
|
|
||||||
import record
|
|
||||||
import volume_descriptors
|
|
||||||
|
|
||||||
|
|
||||||
SECTOR_LENGTH = 2048
|
|
||||||
|
|
||||||
|
|
||||||
class SourceError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Source(object):
|
|
||||||
def __init__(self, cache_content=False, min_fetch=16):
|
|
||||||
self._buff = None
|
|
||||||
self._sectors = {}
|
|
||||||
self.cursor = None
|
|
||||||
self.cache_content = cache_content
|
|
||||||
self.min_fetch = min_fetch
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self._buff) - self.cursor
|
|
||||||
|
|
||||||
def unpack_raw(self, l):
|
|
||||||
if l > len(self):
|
|
||||||
raise SourceError("Source buffer under-run")
|
|
||||||
data = self._buff[self.cursor:self.cursor + l]
|
|
||||||
self.cursor += l
|
|
||||||
return data
|
|
||||||
|
|
||||||
def unpack_all(self):
|
|
||||||
return self.unpack_raw(len(self))
|
|
||||||
|
|
||||||
def unpack_boundary(self):
|
|
||||||
return self.unpack_raw(SECTOR_LENGTH - (self.cursor % SECTOR_LENGTH))
|
|
||||||
|
|
||||||
def unpack_both(self, st):
|
|
||||||
a = self.unpack('<'+st)
|
|
||||||
b = self.unpack('>'+st)
|
|
||||||
if a != b:
|
|
||||||
raise SourceError("Both-endian value mismatch")
|
|
||||||
return a
|
|
||||||
|
|
||||||
def unpack_string(self, l):
|
|
||||||
return self.unpack_raw(l).rstrip(' ')
|
|
||||||
|
|
||||||
def unpack(self, st):
|
|
||||||
if st[0] not in '<>':
|
|
||||||
st = '<' + st
|
|
||||||
d = struct.unpack(st, self.unpack_raw(struct.calcsize(st)))
|
|
||||||
if len(st) == 2:
|
|
||||||
return d[0]
|
|
||||||
else:
|
|
||||||
return d
|
|
||||||
|
|
||||||
def unpack_vd_datetime(self):
|
|
||||||
return self.unpack_raw(17) # TODO
|
|
||||||
|
|
||||||
def unpack_dir_datetime(self):
|
|
||||||
epoch = datetime.datetime(1970, 1, 1)
|
|
||||||
date = self.unpack_raw(7)
|
|
||||||
t = [struct.unpack('<B', i)[0] for i in date[:-1]]
|
|
||||||
t.append(struct.unpack('<b', date[-1])[0])
|
|
||||||
t[0] += 1900
|
|
||||||
t_offset = t.pop(-1) * 15 * 60. # Offset from GMT in 15min intervals, converted to secs
|
|
||||||
t_timestamp = (datetime.datetime(*t) - epoch).total_seconds() - t_offset
|
|
||||||
t_datetime = datetime.datetime.fromtimestamp(t_timestamp)
|
|
||||||
t_readable = t_datetime.strftime('%Y-%m-%d %H:%M:%S')
|
|
||||||
return t_readable
|
|
||||||
|
|
||||||
def unpack_volume_descriptor(self):
|
|
||||||
ty = self.unpack('B')
|
|
||||||
identifier = self.unpack_string(5)
|
|
||||||
version = self.unpack('B')
|
|
||||||
|
|
||||||
if identifier != "CD001":
|
|
||||||
raise SourceError("Wrong volume descriptor identifier")
|
|
||||||
if version != 1:
|
|
||||||
raise SourceError("Wrong volume descriptor version")
|
|
||||||
|
|
||||||
if ty == 0:
|
|
||||||
vd = volume_descriptors.BootVD(self)
|
|
||||||
elif ty == 1:
|
|
||||||
vd = volume_descriptors.PrimaryVD(self)
|
|
||||||
elif ty == 2:
|
|
||||||
vd = volume_descriptors.SupplementaryVD(self)
|
|
||||||
elif ty == 3:
|
|
||||||
vd = volume_descriptors.PartitionVD(self)
|
|
||||||
elif ty == 255:
|
|
||||||
vd = volume_descriptors.TerminatorVD(self)
|
|
||||||
else:
|
|
||||||
raise SourceError("Unknown volume descriptor type: %d" % ty)
|
|
||||||
return vd
|
|
||||||
|
|
||||||
def unpack_path_table(self):
|
|
||||||
return path_table.PathTable(self)
|
|
||||||
|
|
||||||
def unpack_record(self):
|
|
||||||
length = self.unpack('B')
|
|
||||||
if length == 0:
|
|
||||||
return None
|
|
||||||
return record.Record(self, length-1)
|
|
||||||
|
|
||||||
def seek(self, start_sector, length=SECTOR_LENGTH, is_content=False):
|
|
||||||
self.cursor = 0
|
|
||||||
self._buff = ""
|
|
||||||
do_caching = (not is_content or self.cache_content)
|
|
||||||
n_sectors = 1 + (length - 1) // SECTOR_LENGTH
|
|
||||||
fetch_sectors = max(self.min_fetch, n_sectors) if do_caching else n_sectors
|
|
||||||
need_start = None
|
|
||||||
|
|
||||||
def fetch_needed(need_count):
|
|
||||||
data = self._fetch(need_start, need_count)
|
|
||||||
self._buff += data
|
|
||||||
if do_caching:
|
|
||||||
for sector_idx in xrange(need_count):
|
|
||||||
self._sectors[need_start + sector_idx] = data[sector_idx*SECTOR_LENGTH:(sector_idx+1)*SECTOR_LENGTH]
|
|
||||||
|
|
||||||
for sector in xrange(start_sector, start_sector + fetch_sectors):
|
|
||||||
if sector in self._sectors:
|
|
||||||
if need_start is not None:
|
|
||||||
fetch_needed(sector - need_start)
|
|
||||||
need_start = None
|
|
||||||
# If we've gotten past the sectors we actually need, don't continue to fetch
|
|
||||||
if sector >= start_sector + n_sectors:
|
|
||||||
break
|
|
||||||
self._buff += self._sectors[sector]
|
|
||||||
elif need_start is None:
|
|
||||||
need_start = sector
|
|
||||||
|
|
||||||
if need_start is not None:
|
|
||||||
fetch_needed(start_sector + fetch_sectors - need_start)
|
|
||||||
|
|
||||||
self._buff = self._buff[:length]
|
|
||||||
|
|
||||||
def _fetch(self, sector, count=1):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class FileSource(Source):
|
|
||||||
def __init__(self, path, **kwargs):
|
|
||||||
super(FileSource, self).__init__(**kwargs)
|
|
||||||
self._file = open(path, 'rb')
|
|
||||||
|
|
||||||
def _fetch(self, sector, count=1):
|
|
||||||
self._file.seek(sector*SECTOR_LENGTH)
|
|
||||||
return self._file.read(SECTOR_LENGTH*count)
|
|
||||||
|
|
||||||
|
|
||||||
class HTTPSource(Source):
|
|
||||||
def __init__(self, url, **kwargs):
|
|
||||||
super(HTTPSource, self).__init__(**kwargs)
|
|
||||||
self._url = url
|
|
||||||
|
|
||||||
def _fetch(self, sector, count=1):
|
|
||||||
opener = urllib.FancyURLopener()
|
|
||||||
opener.http_error_206 = lambda *a, **k: None
|
|
||||||
opener.addheader("Range", "bytes=%d-%d" % (
|
|
||||||
SECTOR_LENGTH * sector,
|
|
||||||
SECTOR_LENGTH * (sector + count) - 1))
|
|
||||||
return opener.open(self._url).read()
|
|
@ -1,59 +0,0 @@
|
|||||||
class VolumeDescriptor(object):
|
|
||||||
name = None
|
|
||||||
|
|
||||||
def __init__(self, source):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "<VolumeDescriptor name=%r>" % self.name
|
|
||||||
|
|
||||||
|
|
||||||
class BootVD(VolumeDescriptor):
|
|
||||||
name = "boot"
|
|
||||||
|
|
||||||
|
|
||||||
class PrimaryVD(VolumeDescriptor):
|
|
||||||
name = "primary"
|
|
||||||
|
|
||||||
def __init__(self, source):
|
|
||||||
super(PrimaryVD, self).__init__(source)
|
|
||||||
|
|
||||||
_ = source.unpack_raw(1) # unused
|
|
||||||
self.system_identifier = source.unpack_string(32)
|
|
||||||
self.volume_identifier = source.unpack_string(32)
|
|
||||||
_ = source.unpack_raw(8) # unused
|
|
||||||
self.volume_space_size = source.unpack_both('i')
|
|
||||||
_ = source.unpack_raw(32) # unused
|
|
||||||
self.volume_set_size = source.unpack_both('h')
|
|
||||||
self.volume_seq_num = source.unpack_both('h')
|
|
||||||
self.logical_block_size = source.unpack_both('h')
|
|
||||||
self.path_table_size = source.unpack_both('i')
|
|
||||||
self.path_table_l_loc = source.unpack('<i')
|
|
||||||
self.path_table_opt_l_loc = source.unpack('<i')
|
|
||||||
self.path_table_m_loc = source.unpack('>i')
|
|
||||||
self.path_table_opt_m_loc = source.unpack('>i')
|
|
||||||
self.root_record = source.unpack_record()
|
|
||||||
self.volume_set_identifier = source.unpack_string(128)
|
|
||||||
self.publisher_identifier = source.unpack_string(128)
|
|
||||||
self.data_preparer_identifier = source.unpack_string(128)
|
|
||||||
self.application_identifier = source.unpack_string(128)
|
|
||||||
self.copyright_file_identifier = source.unpack_string(38)
|
|
||||||
self.abstract_file_identifier = source.unpack_string(36)
|
|
||||||
self.bibliographic_file_identifier = source.unpack_string(37)
|
|
||||||
self.volume_datetime_created = source.unpack_vd_datetime()
|
|
||||||
self.volume_datetime_modified = source.unpack_vd_datetime()
|
|
||||||
self.volume_datetime_expires = source.unpack_vd_datetime()
|
|
||||||
self.volume_datetime_effective = source.unpack_vd_datetime()
|
|
||||||
self.file_structure_version = source.unpack('B')
|
|
||||||
|
|
||||||
|
|
||||||
class SupplementaryVD(VolumeDescriptor):
|
|
||||||
name = "supplementary"
|
|
||||||
|
|
||||||
|
|
||||||
class PartitionVD(VolumeDescriptor):
|
|
||||||
name = "partition"
|
|
||||||
|
|
||||||
|
|
||||||
class TerminatorVD(VolumeDescriptor):
|
|
||||||
name = "terminator"
|
|
Loading…
Reference in New Issue