#!/usr/bin/python3 import sys import pprint if len(sys.argv) == 1 or (len(sys.argv) == 2 and sys.argv[1] == '-'): f = sys.stdin.buffer elif len(sys.argv) != 2 or sys.argv[1].startswith('-'): print("Usage: {} FILE -- dumps a bencoded file".format(sys.argv[0]), file=sys.stderr) sys.exit(1) else: f = open(sys.argv[1], 'rb') initial = f.peek(2) is_hex = False if initial.startswith(b'64') or initial.startswith(b'6c'): print("Input looks like hex bencoded data; parsing as hex input", file=sys.stderr) is_hex = True class HexPrinter(): def __init__(self, data): self.data = data def __repr__(self): return "hex({} bytes):'{}'".format(len(self.data), self.data.hex()) def next_byte(): if is_hex: pair = f.read(2) assert pair is not None and len(pair) == 2 b = int(pair, 16).to_bytes(1, 'big') else: b = f.read(1) assert b is not None and len(b) == 1 return b def parse_int(): s = b'' x = next_byte() while x in b"0123456789-": s += x x = next_byte() assert x == b'e' and len(s) > 0, "Invalid integer encoding" return int(s) def parse_string(s): x = next_byte() while x in b"0123456789": s += x x = next_byte() assert x == b':', "Invalid string encoding" s = int(s) if is_hex: data = bytes.fromhex(f.read(2*s).decode('ascii')) else: data = f.read(s) assert len(data) == s, "Truncated string data" # If the string is ascii then convert to string: if all(0x20 <= b <= 0x7e for b in data): return data.decode() # Otherwise display as hex: return HexPrinter(data) def parse_dict(): d = {} last_key = None while True: t = next_byte() if t == b'e': return d assert t in b"0123456789", "Invalid dict: dict keys must be strings" key = parse_string(t) raw_key = key.data if isinstance(key, HexPrinter) else key.encode() if last_key is not None and raw_key <= last_key: print("Warning: found out-of-order dict keys ({} after {})".format(raw_key, last_key), file=sys.stderr) last_key = raw_key t = next_byte() d[key] = parse_thing(t) def parse_list(): l = [] while True: t = next_byte() if t == b'e': return l l.append(parse_thing(t)) def parse_thing(t): if t == b'd': return parse_dict() if t == b'l': return parse_list() if t == b'i': return parse_int() if t in b"0123456789": return parse_string(t) assert False, "Parsing error: encountered invalid type '{}'".format(t) pprint.PrettyPrinter( indent=2 ).pprint(parse_thing(next_byte()))