Removing unwanted files from source.

pull/74/head
mbusb 8 years ago
parent 2c33ed73de
commit 5e9fcf9f63

Binary file not shown.

@ -141,6 +141,7 @@ def detect_iso_from_file_list(iso_link):
if os.path.exists(iso_link):
iso9660fs = ISO9660(iso_link)
iso_file_list = iso9660fs.readDir("/")
print(iso_file_list)
if any("sources" in s.lower() for s in iso_file_list):
return "Windows"
elif any("config.isoclient" in s.lower() for s in iso_file_list):

@ -1,23 +0,0 @@
import iso
import source
def parse(path_or_url, cache_content=False, min_fetch=16):
"""
Returns an :class:`ISO` object for the given filesystem path or URL.
cache_content:
Whether to store sectors backing file content in the sector cache. If true, this will
cause memory usage to grow to the size of the ISO as more file content get accessed.
Even if false (default), an individual Record object will cache its own file content
for the lifetime of the Record, once accessed.
min_fetch:
The smallest number of sectors to fetch in a single operation, to speed up sequential
accesses, e.g. for directory traversal. Defaults to 16 sectors, or 32 KiB.
"""
if path_or_url.startswith("http"):
src = source.HTTPSource(path_or_url, cache_content=cache_content, min_fetch=min_fetch)
else:
src = source.FileSource(path_or_url, cache_content=cache_content, min_fetch=min_fetch)
return iso.ISO(src)

@ -1,56 +0,0 @@
class ISO(object):
def __init__(self, source):
self._source = source
# Unpack volume descriptors
self.volume_descriptors = {}
sector = 16
while True:
self._source.seek(sector)
sector += 1
vd = self._source.unpack_volume_descriptor()
self.volume_descriptors[vd.name] = vd
if vd.name == "terminator":
break
# Unpack the path table
self._source.seek(
self.volume_descriptors['primary'].path_table_l_loc,
self.volume_descriptors['primary'].path_table_size)
self.path_table = self._source.unpack_path_table()
# Save a reference to the root record
self.root = self.volume_descriptors['primary'].root_record
def record(self, *path):
"""
Retrieves a record for the given path.
"""
path = [part.upper() for part in path]
record = None
pivot = len(path)
# Resolve as much of the path as possible via the path table
while pivot > 0:
try:
record = self.path_table.record(*path[:pivot])
except KeyError:
pivot -= 1
else:
break
if record is None:
record = self.root
# Resolve the remainder of the path by walking record children
for part in path[pivot:]:
for child in record.children_unsafe:
if child.name == part:
record = child
break
else:
raise KeyError(part)
return record

@ -1,31 +0,0 @@
import record
class PathTable(object):
def __init__(self, source):
self._source = source
self.paths = {}
paths_list = []
while len(source) > 0:
name_length = source.unpack('B')
_ = source.unpack('B')
location = source.unpack('<I')
parent_idx = source.unpack('<H') - 1
name = source.unpack_string(name_length)
_ = source.unpack_raw(name_length % 2)
path = []
if len(paths_list) > 0:
path.extend(paths_list[parent_idx])
if name != "\x00":
path.append(name)
paths_list.append(path)
self.paths[tuple(path)] = location
def record(self, *path):
location = self.paths[path]
self._source.seek(location)
return self._source.unpack_record()

@ -1,70 +0,0 @@
class Record(object):
def __init__(self, source, length):
self._source = source
self._content = None
target = source.cursor + length
_ = source.unpack('B') # TODO: extended attributes length
self.location = source.unpack_both('I')
self.length = source.unpack_both('I')
self.datetime = source.unpack_dir_datetime()
flags = source.unpack('B')
self.is_hidden = flags & 1
self.is_directory = flags & 2
# TODO: other flags
_ = source.unpack('B') # TODO: interleave unit size
_ = source.unpack('B') # TODO: interleave gap size
_ = source.unpack_both('h') # TODO: volume sequence
name_length = source.unpack('B')
self.name = source.unpack_string(name_length).split(';')[0]
if self.name == "\x00":
self.name = ""
# TODO: extended attributes
source.unpack_raw(target - source.cursor)
def __repr__(self):
return "<Record (%s) name=%r>" % (
"directory" if self.is_directory else "file",
self.name)
@property
def children_unsafe(self):
"""
Assuming this is a directory record, this generator yields a record for each child. Use
with caution: at each iteration, the generator assumes that the source cursor has not moved
since the previous child was yielded. For safer behaviour, use :func:`children`.
"""
assert self.is_directory
self._source.seek(self.location, self.length)
_ = self._source.unpack_record() # current directory
_ = self._source.unpack_record() # parent directory
while len(self._source) > 0:
record = self._source.unpack_record()
if record is None:
self._source.unpack_boundary()
continue
yield record
@property
def children(self):
"""
Assuming this is a directory record, this property contains records for its children.
"""
return list(self.children_unsafe)
@property
def content(self):
"""
Assuming this is a file record, this property contains the file's contents
"""
assert not self.is_directory
if self._content is None:
self._source.seek(self.location, self.length, is_content=True)
self._content = self._source.unpack_all()
return self._content

@ -1,165 +0,0 @@
import datetime
import struct
import urllib
import path_table
import record
import volume_descriptors
SECTOR_LENGTH = 2048
class SourceError(Exception):
pass
class Source(object):
def __init__(self, cache_content=False, min_fetch=16):
self._buff = None
self._sectors = {}
self.cursor = None
self.cache_content = cache_content
self.min_fetch = min_fetch
def __len__(self):
return len(self._buff) - self.cursor
def unpack_raw(self, l):
if l > len(self):
raise SourceError("Source buffer under-run")
data = self._buff[self.cursor:self.cursor + l]
self.cursor += l
return data
def unpack_all(self):
return self.unpack_raw(len(self))
def unpack_boundary(self):
return self.unpack_raw(SECTOR_LENGTH - (self.cursor % SECTOR_LENGTH))
def unpack_both(self, st):
a = self.unpack('<'+st)
b = self.unpack('>'+st)
if a != b:
raise SourceError("Both-endian value mismatch")
return a
def unpack_string(self, l):
return self.unpack_raw(l).rstrip(' ')
def unpack(self, st):
if st[0] not in '<>':
st = '<' + st
d = struct.unpack(st, self.unpack_raw(struct.calcsize(st)))
if len(st) == 2:
return d[0]
else:
return d
def unpack_vd_datetime(self):
return self.unpack_raw(17) # TODO
def unpack_dir_datetime(self):
epoch = datetime.datetime(1970, 1, 1)
date = self.unpack_raw(7)
t = [struct.unpack('<B', i)[0] for i in date[:-1]]
t.append(struct.unpack('<b', date[-1])[0])
t[0] += 1900
t_offset = t.pop(-1) * 15 * 60. # Offset from GMT in 15min intervals, converted to secs
t_timestamp = (datetime.datetime(*t) - epoch).total_seconds() - t_offset
t_datetime = datetime.datetime.fromtimestamp(t_timestamp)
t_readable = t_datetime.strftime('%Y-%m-%d %H:%M:%S')
return t_readable
def unpack_volume_descriptor(self):
ty = self.unpack('B')
identifier = self.unpack_string(5)
version = self.unpack('B')
if identifier != "CD001":
raise SourceError("Wrong volume descriptor identifier")
if version != 1:
raise SourceError("Wrong volume descriptor version")
if ty == 0:
vd = volume_descriptors.BootVD(self)
elif ty == 1:
vd = volume_descriptors.PrimaryVD(self)
elif ty == 2:
vd = volume_descriptors.SupplementaryVD(self)
elif ty == 3:
vd = volume_descriptors.PartitionVD(self)
elif ty == 255:
vd = volume_descriptors.TerminatorVD(self)
else:
raise SourceError("Unknown volume descriptor type: %d" % ty)
return vd
def unpack_path_table(self):
return path_table.PathTable(self)
def unpack_record(self):
length = self.unpack('B')
if length == 0:
return None
return record.Record(self, length-1)
def seek(self, start_sector, length=SECTOR_LENGTH, is_content=False):
self.cursor = 0
self._buff = ""
do_caching = (not is_content or self.cache_content)
n_sectors = 1 + (length - 1) // SECTOR_LENGTH
fetch_sectors = max(self.min_fetch, n_sectors) if do_caching else n_sectors
need_start = None
def fetch_needed(need_count):
data = self._fetch(need_start, need_count)
self._buff += data
if do_caching:
for sector_idx in xrange(need_count):
self._sectors[need_start + sector_idx] = data[sector_idx*SECTOR_LENGTH:(sector_idx+1)*SECTOR_LENGTH]
for sector in xrange(start_sector, start_sector + fetch_sectors):
if sector in self._sectors:
if need_start is not None:
fetch_needed(sector - need_start)
need_start = None
# If we've gotten past the sectors we actually need, don't continue to fetch
if sector >= start_sector + n_sectors:
break
self._buff += self._sectors[sector]
elif need_start is None:
need_start = sector
if need_start is not None:
fetch_needed(start_sector + fetch_sectors - need_start)
self._buff = self._buff[:length]
def _fetch(self, sector, count=1):
raise NotImplementedError
class FileSource(Source):
def __init__(self, path, **kwargs):
super(FileSource, self).__init__(**kwargs)
self._file = open(path, 'rb')
def _fetch(self, sector, count=1):
self._file.seek(sector*SECTOR_LENGTH)
return self._file.read(SECTOR_LENGTH*count)
class HTTPSource(Source):
def __init__(self, url, **kwargs):
super(HTTPSource, self).__init__(**kwargs)
self._url = url
def _fetch(self, sector, count=1):
opener = urllib.FancyURLopener()
opener.http_error_206 = lambda *a, **k: None
opener.addheader("Range", "bytes=%d-%d" % (
SECTOR_LENGTH * sector,
SECTOR_LENGTH * (sector + count) - 1))
return opener.open(self._url).read()

@ -1,59 +0,0 @@
class VolumeDescriptor(object):
name = None
def __init__(self, source):
pass
def __repr__(self):
return "<VolumeDescriptor name=%r>" % self.name
class BootVD(VolumeDescriptor):
name = "boot"
class PrimaryVD(VolumeDescriptor):
name = "primary"
def __init__(self, source):
super(PrimaryVD, self).__init__(source)
_ = source.unpack_raw(1) # unused
self.system_identifier = source.unpack_string(32)
self.volume_identifier = source.unpack_string(32)
_ = source.unpack_raw(8) # unused
self.volume_space_size = source.unpack_both('i')
_ = source.unpack_raw(32) # unused
self.volume_set_size = source.unpack_both('h')
self.volume_seq_num = source.unpack_both('h')
self.logical_block_size = source.unpack_both('h')
self.path_table_size = source.unpack_both('i')
self.path_table_l_loc = source.unpack('<i')
self.path_table_opt_l_loc = source.unpack('<i')
self.path_table_m_loc = source.unpack('>i')
self.path_table_opt_m_loc = source.unpack('>i')
self.root_record = source.unpack_record()
self.volume_set_identifier = source.unpack_string(128)
self.publisher_identifier = source.unpack_string(128)
self.data_preparer_identifier = source.unpack_string(128)
self.application_identifier = source.unpack_string(128)
self.copyright_file_identifier = source.unpack_string(38)
self.abstract_file_identifier = source.unpack_string(36)
self.bibliographic_file_identifier = source.unpack_string(37)
self.volume_datetime_created = source.unpack_vd_datetime()
self.volume_datetime_modified = source.unpack_vd_datetime()
self.volume_datetime_expires = source.unpack_vd_datetime()
self.volume_datetime_effective = source.unpack_vd_datetime()
self.file_structure_version = source.unpack('B')
class SupplementaryVD(VolumeDescriptor):
name = "supplementary"
class PartitionVD(VolumeDescriptor):
name = "partition"
class TerminatorVD(VolumeDescriptor):
name = "terminator"
Loading…
Cancel
Save