Floris Bruynooghe
@flubdevork
Contents
(Image courtesy Ned Batchelder)
"""Docstring for example.py""" def sum(a, b): """Return a * 2 + b * 3""" a = a * 2 c = b * 3 return a + c if __name__ == '__main__': print(sum(15, 4))
Happens implicit
Explicit:
python3 -m py_compile example.py
Creates __pycache__/example.cpython-32.pyc (CPython 3.2)
See imp.cache_from_source() & Co
Benefits over 2.x .pyc files:
Reading .pyc Files
http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
4 bytes -> b'l\x0c\r\n'
CR LF to detect encoding corruption
struct.unpack('<H', b'l\x0c\r\n') -> 3180
Python/import.c:
Python 3.2a2 3180 (add DELETE_DEREF)
imp.get_magic()
Unix timestamp, 4 bytes:
b'\xe4\xd7\xfdM'
Unsigned long, little endian
time.ctime(struct.unpack('<L', b'xe4xd7xfdM')[0]):
'Sun Jun 19 12:05:08 2011'
Marshal module:
marshal.load(fp):
<code object <module> at 0x1a3bbe0, file "example.py", line 1>
with open('__pycache__/example.cpython-32.pyc', 'rb') as fp: magic = fp.read(4) assert magic == imp.get_magic() timestamp = struct.unpack('<L', fp.read(4)) timestamp = timestamp[0] code = marshal.load(fp) # Use code here...
Compile your own code object:
compile(source, filename, mode, flags=0, dont_inherit=False, optimize=-1)
source: open('example.py').read()
filename: 'example.py', '<string>'
mode: "exec", "eval" or "single"
"exec" for a module
"eval" for an expression
"single" automatically prints to the terminal for you etc.
flags: __future__
dont_inherit: inherit flags from calling code?
optimize: -O -O2
co_filename = 'example.py'
co_firstlineno = 1
co_lnotab = b'x06x03tx07x0cx01' (Objects/lnotab_notes.txt)
co_flags = 0x40
co_name = '<module>'
co_names = ('__doc__', 'sum', '__name__', 'print')
co_stacksize = 4
co_code = b'dx00x00Zx00x00dx01x00x84x00x00Zx01x00ex02x00dx02x00kx02x00r1x00ex03x00ex01x00dx03x00dx04x00x83x02x00x83x01x00x01nx00x00dx05x00S'
co_filename = 'example.py'
co_flags = 0x43
co_firstlineno = 4
co_lnotab = b'x00x02nx01nx01'
co_name = 'sum'
co_names = ()
co_consts = ('Return a * 2 + b * 3', 2, 3)
The first item would be None if there was no docstring.
co_argcount = 2
co_kwonlyargcount = 0
co_cellvars = ()
This is used when nested functions use variables from this function.
co_freevars = ()
Free variables which are variables which need to come from an enclosing context.
co_nlocals = 3
co_varnames = ('a', 'b', 'c')
co_stacksize = 2
co_code = b'|x00x00dx01x00x14}x00x00|x01x00dx02x00x14}x02x00|x00x00|x02x00x17S'
Bytecode
>>> dis.show_code(code_sum) Name: sum Filename: example.py Argument count: 2 Kw-only arguments: 0 Number of locals: 3 Stack size: 2 Flags: OPTIMIZED, NEWLOCALS, NOFREE Constants: 0: 'Return a * 2 + b * 3' 1: 2 2: 3 Variable names: 0: a 1: b 2: c
def sum(a, b): a = a * 2 c = b * 3 return a + c
>>> dis.dis(code_sum) 6 0 LOAD_FAST 0 (a) 3 LOAD_CONST 1 (2) 6 BINARY_MULTIPLY 7 STORE_FAST 0 (a) 7 10 LOAD_FAST 1 (b) 13 LOAD_CONST 2 (3) 16 BINARY_MULTIPLY 17 STORE_FAST 2 (c) 8 20 LOAD_FAST 0 (a) 23 LOAD_FAST 2 (c) 26 BINARY_ADD 27 RETURN_VALUE
LOAD_FAST(var_num): | |
---|---|
Pushes a reference to the local co_varnames[var_num] onto the stack. | |
LOAD_CONST(consti): | |
Pushes co_consts[consti] onto the stack. | |
BINARY_MULTIPLY: | |
Implements TOS = TOS1 * TOS. | |
STORE_FAST(var_num): | |
Stores TOS into the local co_varnames[var_num]. | |
BINARY_ADD: | Implements TOS = TOS1 + TOS. |
RETURN_VALUE: | Returns with TOS to the caller of the function. |
Sorry, no pretty picture:
class Frame: def __init__(self, code, prev_frame=None, globals=None, locals=None, builtins=builtins.__dict__): self.f_back = prev_frame self.f_code = code self.f_lasti = None self.f_globals = globals if globals else {} self.f_locals = locals if locals else copy.copy(self.f_globals) self.f_builtins = builtins self.stack = [] def exec(self, *args): pass
co_code = b'|x00x00dx01x00x14}x00x00|x01x00dx02x00x14}x02x00|x00x00|x02x00x17S'
' '.join(format(i, 'd') for i in cobj_sum.co_code)
'124 0 0 100 1 0 20 125 0 0 124 1 0 100 2 0 20 125 2 0 124 0 0 124 2 0 23 83'
dis.opname[124] = 'LOAD_FAST'
dis.opmap['LOAD_FAST'] = 124
def exec(self, *args): co_code = self.f_code.co_code self.f_lasti = 0 while True: opcode = co_code[self.f_lasti] meth = getattr(self, dis.opname[opcode]) if opcode >= dis.HAVE_ARGUMENT: arg = co_code[self.f_lasti+1] + (co_code[self.f_lasti+2] << 8) meth(arg) self.f_lasti += 3 else: meth() self.f_lasti += 1 if opcode == dis.opmap['RETURN_VALUE']: return self.stack.pop()
Arguments passed in order of co_varnames
Need to be inserted into locals
def exec(self, *args): # XXX Doesn't handle default values or kwonly args assert len(args) == self.f_code.co_argcount assert self.f_code.co_kwonlyargcount == 0 for i, arg in enumerate(args): self.f_locals[self.f_code.co_varnames[i]] = arg code = self.f_code.co_code self.f_lasti = 0 while True: ...
Here implemented as Frame methods
Only 6 so far:
>>> dis.dis(code_sum) 6 0 LOAD_FAST 0 (a) 3 LOAD_CONST 1 (2) 6 BINARY_MULTIPLY 7 STORE_FAST 0 (a) 7 10 LOAD_FAST 1 (b) 13 LOAD_CONST 2 (3) 16 BINARY_MULTIPLY 17 STORE_FAST 2 (c) 8 20 LOAD_FAST 0 (a) 23 LOAD_FAST 2 (c) 26 BINARY_ADD 27 RETURN_VALUE
Move variables between the frame and the stack
def LOAD_FAST(self, var_num): varname = self.f_code.co_varnames[var_num] self.stack.append(self.f_locals[varname])
def STORE_FAST(self, var_num): varname = self.f_code.co_varnames[var_num] self.f_locals[varname] = self.stack.pop()
def LOAD_CONST(self, consti): self.stack.append(self.f_code.co_consts[consti])
obj.__mul__(other)
Can return NotImplemented
def BINARY_MULTIPLY(self): right = self.stack.pop() left = self.stack.pop() res = NotImplemented if hasattr(left, '__mul__'): res = left.__mul__(right) if res is NotImplemented and hasattr(right, '__mul__'): res = right.__mul__(left) if res is NotImplemented: raise TypeError( "unsuppored oprand type(s) for *: '{}' and '{}'".format( left.__class__.__name__, right.__class__.__name__)) self.stack.append(res)
obj.__add__(other)
Can return NotImplemented
def BINARY_ADD(self): right = self.stack.pop() left = self.stack.pop() res = NotImplemented if hasattr(left, '__add__'): res = left.__add__(right) if res is NotImplemented and hasattr(right, '__add__'): res = right.__add__(left) if res is NotImplemented: raise TypeError( "usuppored operand type(s) for +: '{}' and '{}'".format( left.__class__.__name__, right.__class__.__name__)) self.stack.append(res)
def RETURN_VALUE(self): pass
def exec(self, *args): ... while True: ... if opcode == dis.opmap['RETURN_VALUE']: return self.stack.pop()
Can execute sum()!
>>> f_sum = Frame(code_sum) >>> f_sum.exec(15, 4) 42
How about executing the module?
>>> dis.dis(code_mod) 1 0 LOAD_CONST 0 ('Docstring for example.py') 3 STORE_NAME 0 (__doc__) 4 6 LOAD_CONST 1 (<code object sum at 0x12b3470, file "example.py", line 4>) 9 MAKE_FUNCTION 0 12 STORE_NAME 1 (sum) 11 15 LOAD_NAME 2 (__name__) 18 LOAD_CONST 2 ('__main__') 21 COMPARE_OP 2 (==) 24 POP_JUMP_IF_FALSE 49 12 27 LOAD_NAME 3 (print) 30 LOAD_NAME 1 (sum) 33 LOAD_CONST 3 (15) 36 LOAD_CONST 4 (4) 39 CALL_FUNCTION 2 42 CALL_FUNCTION 1 45 POP_TOP 46 JUMP_FORWARD 0 (to 49) >> 49 LOAD_CONST 5 (None) 52 RETURN_VALUE
12 27 LOAD_NAME 3 (print) 30 LOAD_NAME 1 (sum) 33 LOAD_CONST 3 (15) 36 LOAD_CONST 4 (4) 39 CALL_FUNCTION 2 42 CALL_FUNCTION 1 45 POP_TOP 46 JUMP_FORWARD 0 (to 49) >> 49 LOAD_CONST 5 (None) 52 RETURN_VALUE
def STORE_NAME(self, namei): name = self.f_code.co_names[namei] self.f_locals[name] = self.stack.pop()
def LOAD_NAME(self, namei): name = self.f_code.co_names[namei] if name in self.f_locals: val = self.f_locals[name] elif name in self.f_globals: val = self.f_globals[name] elif name in self.f_builtins: val = self.f_builtins[name] else: raise NameError("name '{}' is not defined".format(name)) self.stack.append(val)
def MAKE_FUNCTION(self, argc): # XXX Doesn't handle default argument values cobj = self.stack.pop() func = types.FunctionType(cobj, self.f_globals) self.stack.append(func)
Re-using CPython's function type
__doc__, __name__, __module__, __defaults__, __code__, __globals__, __dict__, __closure__, __annotations__, __kwdefaults__
def CALL_FUNCTION(self, argc): # XXX Doesn't handle default values or kwonly args nkwargs = argc >> 8 nargs = argc & 0xff assert nkwargs == 0 args = [self.stack.pop() for i in range(nargs)] args.reverse() func = self.stack.pop() if hasattr(func, '__code__'): frame = Frame(func.__code__, prev_frame=self, globals=func.__globals__) ret = frame.exec(*args) else: ret = func.__call__(*args) # assume builtin function self.stack.append(ret)
def POP_JUMP_IF_FALSE(self, target): if self.stack.pop(): self.f_lasti = target - 3
-3 as eval loop will add 3:
meth = getattr(self, dis.opname[opcode]) if opcode >= dis.HAVE_ARGUMENT: arg = co_code[self.f_lasti+1] + \ (co_code[self.f_lasti+2] << 8) meth(arg) self.f_lasti += 3
def POP_TOP(self): self.stack.pop()
def JUMP_FORWARD(self, delta): self.f_lasti += delta
if delta == 0: advance as normal
with open(imp.cache_from_source('example.py'), 'rb') as fp: assert fp.read(4) == imp.get_magic() fp.seek(4, io.SEEK_CUR) code = marshal.load(fp) f = Frame(code_mod) f.exec()
$ python3 vm.py 42
Abusing Bytecode
?
Thanks to my employer