Python Cookbook
Table of Contents
1 Data Structures and Algorithms
1.1 Quick Summary
# unpacking data = ["ACME", 50, 91.1, (2012, 12, 21)] name, *_, (year, mon, day) = data # fixed-length queue from collections import deque d = deque(maxlen=N) # heap related import heapq heapq.nlargest(N, items) # faster than sorted(items)[:N] if N is small heapq.nsmallest(N, items, key=None) nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2] heapq.heapify(nums) # inplace function # agg by values prices = {"AAPL": 612.78, "IBM": 205.55, "FB": 10.75} min_price = min(zip(prices.values(), prices.keys())) # zip to ((value, key)) generator # key-views support set oparations d.keys() - d2.keys() d.keys() & d2.keys() d.keys() | d2.keys() # boolean selectors from itertools import compress alist = list(range(10)) list(compress(alist, [i % 2 == 0 for i in alist]))
1.1.1 others
- slice, itertools.islice
- collections.Counter
from operator import itemgetter, attrgetter- itertools.groupby
- from collections.ChainMap
1.2 PriorityQueue Implemention
import heapq class PriorityQueue: def __init__(self): self._queue = [] self._index = 0 # using index to properly order items with the same priority level def push(self, item, priority): heapq.heappush(self._queue, (-priority, self._index, item)) self._index += 1 def pop(self): return heapq.heappop(self._queue)[-1]
2 String Manipulation
2.1 Quick Summary
import re re.split(r"[./]", "btc.usd/coinbase") # => ['btc', 'usd', 'coinbase'] slower than split("/").split(".") # shell-like matching fnmatch("foo.txt", "*.txt") # searching & replacing # str.find, str.findall, re.match # str.replace, re.sub # Case-Insensitive Option: flags=re.IGNORECASE # For the Shortest Match str_pat = re.compile(r"\"(.*)\"") # default: greedy regular expression text = 'Computer says "no." Phone says "yes."' str_pat.findall(text) # => ['no." Phone says "yes.'] str_pat = re.compile(r"\"(.*?)\"") # nongreedy and produces the shortest match instead. str_pat.findall(text) # => ['no.', 'yes.'] # Aligning # str.ljust, str.rjust, str.center # f"{text:<20}", f"{text:>20}", f"{text:^20}" text = "Hello World" ftext = f"{text:=>20}" # => "=========Hello World" ftext = f"{text:*^20}" # => "****Hello World*****" # Byte Strings bstr = b"Hello World" barray = bytearray(b"Hello World") bstr.decode("ascii") # => "Hello World"
2.1.1 others
- The
textwrapmodule is a straightforward way to clean up text for printing.textwrap.fill(s, 70, initial_indent=" ")
2.2 Tokenizing Text
Tokenizing is often the first step for more advanced kinds of text parsing and handling. https://docs.python.org/3/library/re.html#writing-a-tokenizer
text = "foo = 23 + 42 * 10" # expected results tokens = [ ("NAME", "foo"), ("EQ", "="), ("NUM", "23"), ("PLUS", "+"), ("NUM", "42"), ("TIMES", "*"), ("NUM", "10"), ] import re NAME = r"(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)" NUM = r"(?P<NUM>\d+)" PLUS = r"(?P<PLUS>\+)" TIMES = r"(?P<TIMES>\*)" EQ = r"(?P<EQ>=)" WS = r"(?P<WS>\s+)" # the ?P<TOKENNAME> convention is used to assign a name to the pattern master_pat = re.compile("|".join([NAME, NUM, PLUS, TIMES, EQ, WS])) for mo in re.finditer(master_pat, "foo = 42"): print(mo.lastgroup, mo.group()) # NAME foo, WS , EQ =, ...
3 TODO Numbers
3.1 round
round(1.29, 1) # => 1.3 round(1245, -1) # => 1240 round(1275, -1) # => 1280
3.2 Decimal
from decimal import Decimal, localcontext a = Decimal('6.32') b = Decimal('2.41') with localcontext() as ctx: ctx.prec = 5 print(a/b) # 2.6224
3.3 Formatting
x = 1234.56789 format(x, '0.2f') # => '1234.57' # round format(x, '>10.1f') # => ' 1234.6' format(x, '0,.1f') # => '1,234.6 x = 1234 bin(x) # others: oct, hex # => '0b10011010010' format(x, 'b') # others: o, x # => '0011010010' int('10011010010', 2) # => 1234
3.4 Bin, Oct, Hex Int
x = -1234 format(x, 'b') #=> '-10011010010' format(x, 'x') #=> '-4d2' format(2**32 + x, 'b') #=> '11111111111111111111101100101110' format(2**32 + x, 'x') #=> 'fffffb2e' int('4d2', 16) #=> 1234 int('10011010010', 2) #=> 1234
3.5 Bytes2Int
data = b'\x00\x124V\x00x\x90\xab\x00\xcd\xef\x01\x00#\x004' x = int.from_bytes(data, 'little') # or 'big x = 94522842520747284487117727783387188 x.to_bytes(16, 'little')
useful in cryptography or networking domains
structmoduleint.bit_length()
3.6 Complex Math
a = complex(2, 4) b = 3 - 5j a.conjugate() #=> (2-4j) abs(a) #=> 4.47213595499958 a * b #=> (26+2j) import cmath cmath.sin(a) #=> (24.83130584894638-11.356612711218174j) import numpy as np a = np.array([2 + 3j, 4 + 5j, 6 - 7j, 8 + 9j]) np.sin(a)
3.7 random
random.choicerandom.samplerandom.shufflerandom.randintrandom.random: 0 to 1random.getrandbits
3.7.1 seed
random.seed() # Seed based on system time or os.urandom() random.seed(12345) # Seed based on integer given random.seed(b'bytedata') # Seed based on byte data
3.7.2 distribution
random.uniformrandom.gauss
3.8 math.f***
math.fsummath.fmodmath.fabs
4 Datetime
4.1 Finding Last Friday
from dateutil.relativedelta import relativedelta from dateutil.rrule import FR d = datetime.now() print(d + relativedelta(weekday=FR(-1)))
4.2 Timezone
import pytz d = datetime.now() # no timezone info print(d) # => 2018-12-21 17:14:01.258941 shanghai = pytz.timezone('Asia/Shanghai') loc_d = shanghai.localize(d) # Localize the date for Shanghai print(loc_d) # => 2018-12-21 17:14:01.258941+08:00 # Once the date has been localized, it can be converted to other time zones utc_d = loc_d.astimezone(pytz.utc) print(utc_d) # => 2018-12-21 09:14:01.258941+00:00
datetime.replacedatetime.astimezone
5 Iterator
5.1 Manually Consuming an Iterator
iterable = iter(range(5)) # Invokes range.__iter__() try: while True: line = next(iterable) # Invokes iterable.__next__() print(line, end='') except StopIteration: pass # non exception version while True: line = next(iterable, None) if line is None: break print(line, end='')
- Python’s iterator protocol requires
__iter__()to return a special iterator object that implements a__next__()method to carry out the actual iteration.
5.2 Iterating Over Multi Sequences
a = [1, 2, 3] b = ['w', 'x', 'y', 'z'] for i in zip(a, b): print(i) #=> (1, 'w') (2, 'x') (3, 'y') from itertools import zip_longest for i in zip_longest(a, b): print(i) #=> (1, 'w') (2, 'x') (3, 'y') (None, 'z')
5.3 dropwhile
Drop all of the initial comment lines.
5.4 Permutation & Combination
combinations,permutations,combinations_with_replacement
5.5 itertools.chain
Concatenate two iterables(copy-free)
5.6 Data Processing Pipelines
5.7 Flattening a Nested Sequence
from collections import Iterable def flatten(items, ignore_types=(str, bytes)): for x in items: if isinstance(x, Iterable) and not isinstance(x, ignore_types): yield from flatten(x) else: yield x items = ['Dave', 'Paula', ['Thomas', 'Lewis']] for x in flatten(items): print(x)
5.8 Merge Two Sorted Iterables
import heapq a = [1, 4, 7, 10] b = [2, 5, 6, 11] for c in heapq.merge(a, b): print(c)
6 I/O
6.1 Encoding
with open('somefile.txt', 'rt', encoding='latin-1') as f: ...
latin-1 encoding is notable in that it will never produce a decoding error when reading text of a possibly unknown encoding.
# Replace bad chars with Unicode U+fffd replacement char open('sample.txt', 'rt', encoding='ascii', errors='replace') # Ignore bad chars entirely open('sample.txt', 'rt', encoding='ascii', errors='ignore')
6.2 readinto
import array a = array.array('i', [0, 0, 0, 0, 0, 0, 0, 0]) with open('data.bin', 'rb') as f: f.readinto(a)
readinto() fills the contents of an existing buffer
- One caution with using
f.readinto()~is that you must always make sure to check its return code, which is the number of bytes actually read.
6.3 io.StringIO, io.BytesIO
6.4 gzip.open, bz2.open
6.5 Iterating Over Fixed-Sized Records
from functools import partial RECORD_SIZE = 32 with open('somefile.data', 'rb') as f: records = iter(partial(f.read, RECORD_SIZE), b'') for r in records: ...
6.6 In-memory Modification
6.6.1 nmap
Use the mmap module to memory map files for random access to its contents or to make in-place modifications.
nmapalso can be used to exchange data between interpreters
6.6.2 memoryview
buf = bytearray(b'Hello World') m1 = memoryview(buf) m2 = m1[-5:] #m2=> <memory at 0x100681390> m2[:] = b'WORLD' #buf=> bytearray(b'Hello WORLD')
6.7 os.path
os.path.basename(path) os.path.dirname(path) os.path.expanduser(path) os.path.splitext(path) # Split the file extension os.path.exists(path) os.path.isfile(path) # isdir, islink os.path.realpath('/usr/local/bin/python3') # => '/usr/local/bin/python3.3' os.path.getsize() # getmtime os.listdir(dir)
- other module:
glob,fnmatchused for filename matching
6.8 Changing Encoding of a File
import io # decode a binary file with open('some_binary_file.bin', 'rb') as open_file: fs = io.TextIOWrapper(open_file, encoding='utf8') text = fs.read() # change encoding import sys sys.stdout.encoding #=> 'UTF-8' # use its detach() method to remove the existing text encoding layer before replacing it with a new one sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='latin-1') sys.stdout.encoding #=> 'latin-1'
- layers on I/O:
f = open('sample.txt', 'w') # a text-handling layer that encodes and decodes Unicode f # => <_io.TextIOWrapper name='sample.txt' mode='w' encoding='UTF-8'> # a buffered I/O layer that handles binary data f.buffer # => <_io.BufferedWriter name='sample.txt'> f.buffer.write(b'hello\n') # write bytes to a text file # io.FileIO is a raw file representing the low-level file descriptor in the operating system f.buffer.raw # => <_io.FileIO name='sample.txt' mode='wb'>
detach: disconnects the topmost layer of a file and returns the next lower layer.
6.9 File Descriptor
# Create a file object, but don't close underlying fd when done f = open(fd, 'wt', closefd=False) def echo_client(client_sock, addr): print('Got connection from', addr) # Make text-mode file wrappers for socket reading/writing, only works on Unix-based systems # Use the makefile() method of sockets instead to be cross platform client_in = open(client_sock.fileno(), 'rt', encoding='latin-1', closefd=False) client_out = open(client_sock.fileno(), 'wt', encoding='latin-1', closefd=False) # Echo lines back to the client using file I/O for line in client_in: client_out.write(line) client_out.flush() client_sock.close()
6.10 Temporary Files
from tempfile import TemporaryFile, NamedTemporaryFile, TemporaryDirectory with TemporaryFile('w+t', encoding='utf-8', errors='ignore') as f: f.write('Hello World\n') with NamedTemporaryFile( 'w+t', delete=False, prefix='mytemp', suffix='.txt', dir='/tmp') as f: print('filename is:', f.name) #=> /tmp/mytemp2tmz4nl5.txt with TemporaryDirectory() as dirname: print('dirname is:', dirname)
6.11 Serializing Python Objects
pickle is a Python-specific self-describing data encoding
6.11.1 Dealing with Multiple Objects
import pickle with open('somedata', 'wb') as fs: pickle.dump([1, 2, 3, 4], fs) pickle.dump('hello', fs) with open('somedata', 'rb') as fs: pickle.load(fs) # => [1, 2, 3, 4] pickle.load(fs) # => hello
6.11.2 Safety
pickle.load() should never be used on untrusted data
6.11.3 User-defined Classes
Certain kinds of objects can’t be pickled. These are typically objects that involve some sort of external system state, such as open files,
open network connections, threads, processes, stack frames, and so forth. User-defined classes can sometimes work around these limitations
by providing __getstate__() and __setstate__() methods
pickle.dump()will call__getstate__()to get an object that can be pickled
7 Encoding
7.1 csv
7.1.1 reader
from collections import namedtuple import re import csv with open('stock.csv') as f: f_csv = csv.reader(f) headings = next(f_csv) Row = namedtuple('Row', headings) for r in f_csv: row = Row(*r) # Process row
7.1.2 DictReader
import csv with open('stocks.csv') as f: f_csv = csv.DictReader(f) for row in f_csv: # process row ...
7.1.3 writer
writer.writerowandwriter.writerows
7.1.4 DictWriter
writer.writeheaderandwriter.writerows
7.2 json2object
- use
object_pairs_hookandobject_hookoptions
import json from collections import OrderedDict s = '{"name": "ACME", "shares": 50, "price": 490.1}' data = json.loads(s, object_pairs_hook=OrderedDict) # data => OrderedDict([('name', 'ACME'), ('shares', 50), ('price', 490.1)]) class JSONObject: def __init__(self, d): self.__dict__ = d def __str__(self): return str(self.__dict__) obj = json.loads(s, object_hook=JSONObject) obj.name = 'def' json.dumps(vars(obj)) # vars(obj) same as obj.__dict__ # or json.dumps(obj, default=vars) # use vars as a serializing function
7.3 xml
from xml.etree.ElementTree import parse doc = parse(xml_str)
lxml- for huge xml:
Recipe 6.4 - more:
Recipe 6.3~6.7
7.4 hex encoding
binascii
import binascii s = b'hello' h = binascii.b2a_hex(s) # bytes2hexbytes b'68656c6c6f' b = binascii.a2b_hex(h) # hexbytes2bytes
base64
import base64 s = b'hello' h = base64.b16encode(s) # b'68656C6C6F' uppercase b = base64.b16decode(h)
7.5 base64
base64.b64encodebase64.b64decode
7.6 struct
from struct import Struct def write_records(records: tuple, format, f): record_struct = Struct(format) for r in records: f.write(record_struct.pack(*r)) def read_records(format, f) -> tuple: record_struct = Struct(format) chunks = iter(lambda: f.read(record_struct.size), b'') # star! return (record_struct.unpack(chunk) for chunk in chunks) # star!
- more to explore
Recipe 6.12
8 Functions
8.1 Keyword-only Arguments
def recv(maxsize, *, block): pass recv(1024, True)# TypeError recv(1024, block=True) # OK
8.2 Capture Variables
x = 10 a = lambda y, x=x: x + y # use x=x to bind at definition time x = 20 a(5) # => 15
8.3 Replace Single-method Classes with Closures
8.4 Callback Shared State
Recipe 7.10 four ways:
- single-method class
- closure
- coroutine: use coroutine.send as callback
- use
functools.partial
9 Class
9.1 String Representation
__repr__: returns the code representation of an instance, and is usually the text you would type to recreate the instance.eval(repr(x)) == x__str__: converts the instance to a string.
9.2 __format__
_formats = { 'ymd' : '{d.year}-{d.month}-{d.day}', 'mdy' : '{d.month}/{d.day}/{d.year}', 'dmy' : '{d.day}/{d.month}/{d.year}' } from datetime import date d = date.today() format(d, 'mdy') 'The date is {:ymd}'.format(d)
9.3 Context Management
class Connection: def __enter__(self): self.fs = open('somefile.txt', 'rt') return self.fs def __exit__(self, exc_ty, exc_val, tb): self.fs.close() self.fs = None
9.4 Saving Memory __slots__
- Instances are built around a small fixed-sized array instead of a dictionary.
- A side effect of using slots is that it is no longer possible to add new attributes to instances.
class Date: __slots__ = ['year', 'month', 'day'] def __init__(self, year, month, day): self.year = year self.month = month self.day = day
9.5 Properties
class Object: @property def attr(self): return self.__attr @attr.setter def attr(self, value): self.__attr = value @attr.deleter def attr(self): self.__attr = None # del obj.attr
9.5.1 Extending a Property
class SubPerson(Person): @property def name(self): print('Getting name') return super().name @name.setter def name(self, value): print('Setting name to', value) # the only way to get to setter method is to access it as a class variable super(SubPerson, SubPerson).name.__set__(self, value) @name.deleter def name(self): print('Deleting name') super(SubPerson, SubPerson).name.__delete__(self)
Extending only
gettermethodclass SubPerson(Person): @Person.name.getter def name(self): print('Getting name') return super().name
9.6 super()
- To avoid double-invocation when involving multiple inheritance.
- Use
__mro__to see method resolution order.
9.6.1 MRO
The actual determination of the MRO list itself is made using a technique known as C3 Linearization.
- Child classes get checked before parents.
- Multiple parents get checked in the order listed.
- If there are two valid choices for the next class, pick the one from the first parent.
When you use the super() function, Python continues its search starting with the next class on the MRO. See Chapter 8.7 More details.
- Hint:
super(MyClass, self).__init__()provides the next__init__method according to the used Method Resolution Ordering(MRO)
9.6.2 Multiple Inheritance with Different Arguments to Constructors
class A: def __init__(self, a, **kw): super().__init__(**kw) print('A a', a) class B: def __init__(self, b, c=0, **kw): super().__init__(**kw) print('B b', b) print('B c', c) class C(A, B): def __init__(self, a, b, c, d): super().__init__(a=a, b=b, c=c) print('C d', d)
or
class C(A, B): def __init__(self, a, b, c): A.__init__(self, a) B.__init__(self, b, c) # should be careful with double-invocation
9.7 Descriptor
Use Descriptor to create a new kind of instance attribute with some extra functionality, such as type checking.
Descriptors provide the underlying magic for most of Python’s class features, such as @classmethod, @staticmethod, @property.
__get__(self, instance, cls)__set__(self, instance, value)__delete__(self, instance)
class String: def __init__(self, name): self.name = name def __get__(self, instance, cls): if instance is None: # called as class variable return self return instance.__dict__[self.name] def __set__(self, instance, value): if not isinstance(value, str): raise TypeError('Expected a string') instance.__dict__[self.name] = value def __delete__(self, instance): del instance.__dict__[self.name] class Person: name = String('name') def __init__(self, name): self.name = name
9.7.1 Advanced Usage
# Descriptor for a type-checked attribute class Typed: def __init__(self, name, expected_type): self.name = name self.expected_type = expected_type def __get__(self, instance, cls): if instance is None: return self else: return instance.__dict__[self.name] def __set__(self, instance, value): if not isinstance(value, self.expected_type): raise TypeError('Expected ' + str(self.expected_type)) instance.__dict__[self.name] = value def __delete__(self, instance): del instance.__dict__[self.name] # Class decorator that applies it to selected attributes def typeassert(**kwargs): def decorate(cls): for name, expected_type in kwargs.items(): # Attach a Typed descriptor to the class setattr(cls, name, Typed(name, expected_type)) return cls return decorate # Example use @typeassert(name=str, shares=int, price=float) class Stock: def __init__(self, name, shares, price): self.name = name self.shares = shares self.price = price
9.7.2 Lazy Properties
Define a read-only attribute as a property that only gets computed on access.
class lazyproperty: def __init__(self, func): self.func = func def __get__(self, instance, cls): if instance is None: return self else: value = self.func(instance) setattr(instance, self.func.__name__, value) return value class Circle: def __init__(self, radius): self.radius = radius @lazyproperty def area(self): print('Computing area') return math.pi * self.radius**2
9.7.3 Data Model
# Base class. Uses a descriptor to set a value class Descriptor: def __init__(self, name=None, **opts): self.name = name for key, value in opts.items(): setattr(self, key, value) def __set__(self, instance, value): instance.__dict__[self.name] = value # Descriptor for enforcing types class Typed(Descriptor): expected_type = type(None) def __set__(self, instance, value): if not isinstance(value, self.expected_type): raise TypeError('expected ' + str(self.expected_type)) super().__set__(instance, value) # Descriptor for enforcing values class Unsigned(Descriptor): def __set__(self, instance, value): if value < 0: raise ValueError('Expected >= 0') super().__set__(instance, value) class Integer(Typed): expected_type = int class UnsignedInteger(Integer, Unsigned): pass
- Simplify the Specification by Class Decorator
# Class decorator to apply constraints def check_attributes(**kwargs): def decorate(cls): for key, value in kwargs.items(): if isinstance(value, Descriptor): value.name = key setattr(cls, key, value) else: setattr(cls, key, value(key)) return cls return decorate # Example @check_attributes(name=SizedString(size=8), shares=UnsignedInteger, price=UnsignedFloat) class Stock: def __init__(self, name, shares, price): self.name = name self.shares = shares self.price = price
- Simplify the Specification by Metaclass
# A metaclass that applies checking class checkedmeta(type): def __new__(cls, clsname, bases, methods): # Attach attribute names to the descriptors for key, value in methods.items(): if isinstance(value, Descriptor): value.name = key return type.__new__(cls, clsname, bases, methods) # Example class Stock(metaclass=checkedmeta): name = SizedString(size=8) shares = UnsignedInteger() # no need to give a name price = UnsignedFloat() def __init__(self, name, shares, price): self.name = name self.shares = shares self.price = price
- Decorator Version(Preferred Approach)
# Base class. Uses a descriptor to set a value class Descriptor: def __init__(self, name=None, **opts): self.name = name for key, value in opts.items(): setattr(self, key, value) def __set__(self, instance, value): instance.__dict__[self.name] = value # Decorator for applying type checking def Typed(expected_type, cls=None): if cls is None: return lambda cls: Typed(expected_type, cls) super_set = cls.__set__ def __set__(self, instance, value): if not isinstance(value, expected_type): raise TypeError('expected ' + str(expected_type)) super_set(self, instance, value) cls.__set__ = __set__ return cls @Typed(int) class Integer(Descriptor): pass
9.8 Simplifying Initialization
class Structure: # Class variable that specifies expected fields _fields = [] def __init__(self, *args): if len(args) != len(self._fields): raise TypeError('Expected {} arguments'.format(len(self._fields))) # Set the arguments for name, value in zip(self._fields, args): setattr(self, name, value) class Stock(Structure): _fields = ['name', 'shares', 'price']
- Downside: documentation and help features of IDEs. It can be solved by attaching or enforcing a type signature
9.9 Abstract Base Class
- Define an abc class:
class AbstractBase(metaclass=abc.ABCMeta) - Using
registerto bind other class which is already defined @abstractmethodcan work with@staticmethod,@classmethod
9.10 Implementing Custom Containers
Container:__contains__Iterable:__iter__Sized:__len__Sequence:__getitem__,__len__MutableSequence:__delitem__,__getitem__,__len__,__setitem__,insert
9.11 Proxy Class
Implement __getattr__, __setattr__, __delattr__
__getattr__method is actually a fallback method that only gets called when an attribute is not found.
9.12 Multiple Constructors
- Use
@classmethod, example:Date.today - Use
cls.__new__(cls)to create instance without initialization.
9.13 Mixin Class
- To enhance the functionality of existing classes with optional features.
- See Recipe 8.18, example:
ThreadedXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer) - Mixin classes are never meant to be instantiated directly.
- Mixin classes typically have no state of their own(not a restriction)
- Use
__slots__ = ()to serve as a strong hint that the mixin classes do not have their own instance data. - Use class decorator to patch method(preferred approach)
9.14 State Machine
Based on state design pattern. See Recipe 8.19, should use __class__
9.15 Calling a Method by Name
getattroperator.methodcaller()
9.16 Visitor Pattern
Recipe 8.21
9.16.1 Without Recursion
Recipe 8.22
- Use stack (like depth-first traversal) and generator
import types class Node: pass class NodeVisitor: def visit(self, node): stack = [node] last_result = None while stack: try: last = stack[-1] if isinstance(last, types.GeneratorType): stack.append(last.send(last_result)) last_result = None elif isinstance(last, Node): stack.append(self._visit(stack.pop())) else: last_result = stack.pop() except StopIteration: stack.pop() return last_result def _visit(self, node): methname = 'visit_' + type(node).__name__ meth = getattr(self, methname, None) if meth is None: meth = self.generic_visit return meth(node) def generic_visit(self, node): raise RuntimeError('No {} method'.format('visit_' + type(node).__name__))
9.17 Comparison
__le__, __ge__, __lt__, __gt__, __eq__
9.18 weakref
9.18.1 Avoid Cyclic Reference
weakref.ref
9.18.2 Cache Instances(like logging.getLogger)
Use weakref.WeakValueDictionary to store instances as weak reference
10 Metaprogramming
10.1 functools.wrap
- Preserving function metadata,
__name__,__doc__,__annotations__. - original function in
__wrapped__(hint:@classmethodand@staticmethodstore original function in__func__)
10.2 Decorator with get/set
def attach_wrapper(obj, func=None): if func is None: return partial(attach_wrapper, obj) setattr(obj, func.__name__, func) return func
10.3 Decorator with Optional Arguments
example:
import logging from functools import partial, wraps def logged(func=None, *, level=logging.DEBUG, name=None, message=None): if func is None: return partial(logged, level=level, name=name, message=message) logname = name if name else func.__module__ log = logging.getLogger(logname) logmsg = message if message else func.__name__ @wraps(func) def wrapper(*args, **kwargs): log.log(level, logmsg) return func(*args, **kwargs) return wrapper
10.4 Type Checking Decorator
from inspect import signature from functools import wraps def typeassert(*ty_args, **ty_kwargs): def decorate(func): # If in optimized mode, disable type checking if not __debug__: return func # Map function argument names to supplied types sig = signature(func) bound_types = sig.bind_partial(*ty_args, **ty_kwargs).arguments @wraps(func) def wrapper(*args, **kwargs): bound_values = sig.bind(*args, **kwargs) # Enforce type assertions across supplied arguments for name, value in bound_values.arguments.items(): if name in bound_types: if not isinstance(value, bound_types[name]): raise TypeError('Argument {} must be {}'.format( name, bound_types[name])) return func(*args, **kwargs) return wrapper return decorate @typeassert(int, z=int) def add(x, y, z=42): return x + y + z
10.5 Decorator as Functional Class
import types from functools import wraps class Profiled: def __init__(self, func): wraps(func)(self) self.ncalls = 0 def __call__(self, *args, **kwargs): self.ncalls += 1 return self.__wrapped__(*args, **kwargs) def __get__(self, instance, cls): if instance is None: return self else: return types.MethodType(self, instance) @Profiled def add(x, y): return x + y add(4, 5) print(add.ncalls)
10.6 inspect
- signature
- getargspec
- Parameter
10.7 Using Decorators to Patch Class Definitions
def patch(cls): orig_method = cls.method def new_method(self): return orig_method(self) cls.method = new_method
10.8 Enforcing an Argument Signature on *args and **kwargs
10.8.1 Creating a Function Signature
from inspect import Signature, Parameter # Make a signature for a func(x, y=42, *, z=None) parms = [ Parameter('x', Parameter.POSITIONAL_OR_KEYWORD), Parameter('y', Parameter.POSITIONAL_OR_KEYWORD, default=42), Parameter('z', Parameter.KEYWORD_ONLY, default=None) ] sig = Signature(parms) print(sig) def func(*args, **kwargs): bound_values = sig.bind(*args, **kwargs)
10.8.2 Enforcing Function Signatures
from inspect import Signature, Parameter def make_sig(*names): parms = [Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) for name in names] return Signature(parms) class Structure: __signature__ = make_sig() # inspect.signature will lookup __signature__ def __init__(self, *args, **kwargs): bound_values = self.__signature__.bind(*args, **kwargs) for name, value in bound_values.arguments.items(): setattr(self, name, value) class Stock(Structure): __signature__ = make_sig('name', 'shares', 'price') def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)
10.8.3 Metaclass Approach
from inspect import Signature, Parameter def make_sig(*names): parms = [ Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) for name in names ] return Signature(parms) class StructureMeta(type): def __new__(cls, clsname, bases, clsdict): clsdict['__signature__'] = make_sig(*clsdict.get('_fields', [])) return super().__new__(cls, clsname, bases, clsdict) class Structure(metaclass=StructureMeta): fields = [] def __init__(self, *args, **kwargs): bound_values = self.__signature__.bind(*args, **kwargs) for name, value in bound_values.arguments.items(): setattr(self, name, value)
10.9 Parsing and Analyzing Python Source
eval('2 + 3*4 + x')
10.9.1 exec
exec('for i in range(10): print(i)')
def test1(): x = 0 exec('x += 1') print(x) # => 0 def test2(): x = 0 loc = locals() exec('x += 1') x = loc['x'] print(x) # => 1
10.9.2 ast
compile Python source code into an abstract syntax tree(AST)
import ast ex = ast.parse('2 + 3*4 + x', mode='eval') ast.dump(ex) # "Expression(body=BinOp(left=BinOp(left=Num(n=2), op=Add(), right=BinOp(left=Num(n=3), op=Mult(), right=Num(n=4))), op=Add(), right=Name(id='x', ctx=Load())))"
10.9.3 Rewriting AST to Achieve Performance Improvement
- see 9.24
10.10 dis
dis.dis- disassembled code:
some_func.__code__.co_code
10.11 Simple namedtuple
import operator class StructTupleMeta(type): def __init__(cls, *args, **kwargs): super().__init__(*args, **kwargs) for n, name in enumerate(cls._fields): setattr(cls, name, property(operator.itemgetter(n))) # After f = itemgetter(2), the call f(r) returns r[2] class StructTuple(tuple, metaclass=StructTupleMeta): _fields = [] def __new__(cls, *args): if len(args) != len(cls._fields): raise ValueError('{} arguments required'.format(len(cls._fields))) return super().__new__(cls, args) class Stock(StructTuple): _fields = ['name', 'shares', 'price'] s = Stock('ACME', 50, 91.1)
10.12 Multimethod 9.20
10.13 Avoiding Repetitive Property
def typed_property(name, expected_type): storage_name = '_' + name @property def prop(self): return getattr(self, storage_name) @prop.setter def prop(self, value): if not isinstance(value, expected_type): raise TypeError('{} must be a {}'.format(name, expected_type)) setattr(self, storage_name, value) return prop from functools import partial String = partial(typed_property, expected_type=str) Integer = partial(typed_property, expected_type=int) # Example use class Person: name = String('name') age = Integer('age') def __init__(self, name, age): self.name = name self.age = age
10.14 Defining Context Manager the Easy Way
import time from contextlib import contextmanager @contextmanager def timethis(label): start = time.time() try: yield finally: end = time.time() print('{}: {}'.format(label, end - start)) # Example use with timethis('counting'): n = 10000000 while n > 0: n -= 1
- all of the code prior to the yield executes as the
__enter__()method of a context manager. All of the code after theyieldexecutes as the__exit__()method - If there was an exception, it is raised at the
yieldstatement.
11 Metaclass
11.1 Basic
When writing metaclasses, it is somewhat common to only define a __new__() or __init__() method, but not both.
class MyMeta(type): def __new__(self, clsname, bases, clsdict): # self is a class object # clsname is name of class being defined # bases is tuple of base classes # clsdict is class dictionary return super().__new__(self, clsname, bases, clsdict)
or
class MyMeta(type): def __init__(cls, clsname, bases, clsdict): super().__init__(clsname, bases, clsdict)
__prepare__- is called first and used to create the class namespace prior to the body of any class definition being processed. Normally, this method simply returns a dictionary or other mapping object
__new__- is invoked prior to class creation and is typically used when a metaclass wants to alter the class definition in some way
__init__- is invoked after a class has been created, and is useful if you want to write code that works with the fully formed class object.
11.2 NoInstances
class NoInstances(type): def __call__(self, *args, **kwargs): raise TypeError("Can't instantiate directly") # Example class Spam(metaclass=NoInstances): @staticmethod def grok(x): print('Spam.grok')
11.3 Singleton
class Singleton(type): def __init__(self, *args, **kwargs): self.__instance = None super().__init__(*args, **kwargs) def __call__(self, *args, **kwargs): if self.__instance is None: self.__instance = super().__call__(*args, **kwargs) return self.__instance else: return self.__instance
11.4 Cached Instances
import weakref class Cached(type): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.__cache = weakref.WeakValueDictionary() def __call__(self, *args): if args in self.__cache: return self.__cache[args] else: obj = super().__call__(*args) self.__cache[args] = obj return obj
11.5 OrderedDict for Class Body
This method is invoked immediately at the start of a class definition with the class name and base classes. It must then return a mapping object to use when processing the class body.
# Metaclass that uses an OrderedDict for class body class OrderedMeta(type): def __new__(cls, clsname, bases, clsdict): d = dict(clsdict) order = [] for name, value in clsdict.items(): if isinstance(value, Typed): value._name = name order.append(name) d['_order'] = order return type.__new__(cls, clsname, bases, d) @classmethod def __prepare__(cls, clsname, bases): return OrderedDict()
11.6 Optional Arguments on Class Definitions
class Spam(metaclass=MyMeta, debug=True, synchronize=True): ...
To support such keyword arguments in a metaclass, make sure you define them on the
__prepare__(), __new__(), and __init__() methods using keyword-only arguments
class MyMeta(type): # Optional @classmethod def __prepare__(cls, name, bases, *, debug=False, synchronize=False): # Custom processing ... return super().__prepare__(name, bases) # Required def __new__(cls, name, bases, ns, *, debug=False, synchronize=False): # Custom processing ... return super().__new__(cls, name, bases, ns) # Required def __init__(self, name, bases, ns, *, debug=False, synchronize=False): # Custom processing ... super().__init__(name, bases, ns)
12 Packages and Modules
12.1 Lazy Import
# __init__.py def A(): from .a import A return A()
12.2 Organize Add-on Packages into a Common Package
# foo-package/ # spam/ # blah.py # bar-package/ # spam/ # grok.py import sys sys.path.extend(['foo-package', 'bar-package']) import spam.blah import spam.grok
12.3 Read Data File
pkgutil.get_data
12.4 Import Hooks
- See
10.11,10.12
13 Network
13.1 socketserver
13.2 CIDR network address
import ipaddress net = ipaddress.ip_network('123.45.67.64/27') list(net) net.num_addresses inet = ipaddress.ip_interface('123.45.67.73/27') inet.network inet.ip
13.3 Simple WSGI
WSGI: Web Server Gateway Interface. Same code for different web framework.
def application(environ, start_response): start_response('200 OK', [('Content-Type', 'text/html')]) return [b'<h1>Hello, web!</h1>'] from wsgiref.simple_server import make_server httpd = make_server('', 8000, application) print('Serving HTTP on port 8000...') httpd.serve_forever()
- environ: http request info
start_response: a function that must be called to initiate a response- return: response body
13.3.1 PathDispatcher
import cgi def notfound_404(environ, start_response): start_response('404 Not Found', [('Content-type', 'text/plain')]) return [b'Not Found'] class PathDispatcher: def __init__(self): self.pathmap = {} def __call__(self, environ, start_response): path = environ['PATH_INFO'] # extracts supplied query parameters from the request and puts them into a dictionary-like object params = cgi.FieldStorage(environ['wsgi.input'], environ=environ) method = environ['REQUEST_METHOD'].lower() environ['params'] = {key: params.getvalue(key) for key in params} handler = self.pathmap.get((method, path), notfound_404) return handler(environ, start_response) def register(self, method, path, function): self.pathmap[method.lower(), path] = function return function def hello(environ, start_response): start_response('200 OK', [('Content-Type', 'text/html')]) return [b'<h1>Hello, web!</h1>'] from wsgiref.simple_server import make_server dispatcher = PathDispatcher() dispatcher.register('GET', '/hello', hello) httpd = make_server('', 8080, dispatcher) print('Serving on port 8080...') httpd.serve_forever()
13.4 XMLRPC
from xmlrpc.server import SimpleXMLRPCServer class KeyValueServer: _rpc_methods_ = ['get', 'set'] def __init__(self, address): self._data = {} self._serv = SimpleXMLRPCServer(address, allow_none=True) for name in self._rpc_methods_: self._serv.register_function(getattr(self, name)) def get(self, name): return self._data[name] def set(self, name, value): self._data[name] = value def serve_forever(self): self._serv.serve_forever() if __name__ == '__main__': kvserv = KeyValueServer(('', 15000)) kvserv.serve_forever()
- client
from xmlrpc.client import ServerProxy s = ServerProxy('http://localhost:15000', allow_none=True) s.set('foo', 'bar') s.get('foo')
13.5 Communicating Between Interpreters
multiprocessing.connection: support UNIX domain sockets
13.6 Simple Auth
import hmac import os def client_authenticate(connection, secret_key): ''' Authenticate client to a remote service. connection represents a network connection. secret_key is a key known only to both client/server. ''' message = connection.recv(32) hash = hmac.new(secret_key, message) digest = hash.digest() connection.send(digest) def server_authenticate(connection, secret_key): ''' Request client authentication. ''' message = os.urandom(32) connection.send(message) hash = hmac.new(secret_key, message) digest = hash.digest() response = connection.recv(len(digest)) return hmac.compare_digest(digest, response)
13.7 SSL Wrapper
import ssl KEYFILE = 'server_key.pem' # Private key of the server CERTFILE = 'server_cert.pem' # Server certificate (given to client) s_ssl = ssl.wrap_socket( s, keyfile=KEYFILE, certfile=CERTFILE, server_side=True)
13.8 select
import select def event_loop(handlers): while True: wants_recv = [h for h in handlers if h.wants_to_receive()] wants_send = [h for h in handlers if h.wants_to_send()] can_recv, can_send, _ = select.select(wants_recv, wants_send, []) for h in can_recv: h.handle_receive() for h in can_send: h.handle_send()
13.9 Sending Large Arrays
# zero copy with memoryview def send_from(arr, dest): view = memoryview(arr).cast('B') while len(view): nsent = dest.send(view) view = view[nsent:] def recv_into(arr, source): view = memoryview(arr).cast('B') while len(view): nrecv = source.recv_into(view) view = view[nrecv:]
14 Concurrency
14.1 Threading
Threadmethods:start,is_alive,join,terminateThreadinterface:run
14.1.1 Daemonic Thread
t = Thread(target=countdown, args=(10,), daemon=True) t.start()
14.1.2 Storing Thread-Specific State
threading.local(): create a thread-local storage object
14.1.3 Threading Pool
from concurrent.futures import ThreadPoolExecutor
14.2 Synchronization Primitives
14.2.1 Event
Event instances are similar to a "sticky" flag that allows threads to wait for something to happan.
14.2.2 Avoid Deadlock
import threading from contextlib import contextmanager # Thread-local state to stored information on locks already acquired _local = threading.local() @contextmanager def acquire(*locks): # Sort locks by object identifier locks = sorted(locks, key=lambda x: id(x)) # Make sure lock order of previously acquired locks is not violated acquired = getattr(_local, 'acquired', []) if acquired and max(id(lock) for lock in acquired) >= id(locks[0]): raise RuntimeError('Lock Order Violation') # Acquire all of the locks acquired.extend(locks) _local.acquired = acquired try: for lock in locks: lock.acquire() yield finally: # Release locks in reverse order of acquisition for lock in reversed(locks): lock.release() del acquired[-len(locks):] import threading x_lock = threading.Lock() y_lock = threading.Lock() def thread_1(): while True: with acquire(x_lock, y_lock): print('Thread-1') def thread_2(): while True: with acquire(y_lock, x_lock): print('Thread-2')
14.3 Message Queue
- ZeroMQ
- Celery
14.3.1 Actor Model
from queue import Queue from threading import Thread, Event # Sentinel used for shutdown class ActorExit(Exception): pass class Actor: def __init__(self): self._mailbox = Queue() def send(self, msg): ''' Send a message to the actor ''' self._mailbox.put(msg) def recv(self): ''' Receive an incoming message ''' msg = self._mailbox.get() if msg is ActorExit: raise ActorExit() return msg def close(self): ''' Close the actor, thus shutting it down ''' self.send(ActorExit) def start(self): ''' Start concurrent execution ''' self._terminated = Event() t = Thread(target=self._bootstrap) t.daemon = True t.start() def _bootstrap(self): try: self.run() except ActorExit: pass finally: self._terminated.set() def join(self): self._terminated.wait() def run(self): ''' Run method to be implemented by the user ''' while True: msg = self.recv()
15 System Utilities
getpass: prompting for a password
15.1 Subprocess
import subprocess out_bytes = subprocess.check_output(['netstat','-a']) out_text = out_bytes.decode('utf-8')
15.2 Performance Counter
use time.perf_counter for wall-time, time.process_time for CPU time
15.3 CPU&memory Limits
import signal import resource import os def time_exceeded(signo, frame): print("Time's up!") raise SystemExit(1) def set_max_runtime(seconds): # Install the signal handler and set a resource limit soft, hard = resource.getrlimit(resource.RLIMIT_CPU) resource.setrlimit(resource.RLIMIT_CPU, (seconds, hard)) signal.signal(signal.SIGXCPU, time_exceeded) def limit_memory(maxsize): soft, hard = resource.getrlimit(resource.RLIMIT_AS) resource.setrlimit(resource.RLIMIT_AS, (maxsize, hard))
15.4 webbrowser
get, open, open_new, open_new_tab
16 Testing
16.1 Mock
unittest.mock.patch@patch('somefunc')orwith patch('somefunc') as mock- patch value:
with patch(__main__.x, 'patched_value')
16.2 Assert Regex
with self.assertRaisesRegex(ValueError, 'error*'): ...
16.3 Raise from another exception
raise ... from e
16.4 Warnings
import warnings warnings.warn('logfile argument deprecated', DeprecationWarning)
17 Debuging
python3 -i: starts an shell as soon as a program terminates, then usespdb
17.1 Traceback
traceback.print_exc(file=sys.stderr)traceback.print_stack(file=sys.stderr)
17.2 Profiling
The first rule of optimization might be to "not do it" the second rule is almost certainly "don't optimize the unimportant"
17.2.1 Decorator Version
import time from functools import wraps def timethis(func): @wraps(func) def wrapper(*args, **kwargs): start = time.perf_counter() r = func(*args, **kwargs) end = time.perf_counter() print('{}.{} : {}'.format(func.__module__, func.__name__, end - start)) return r return wrapper
17.2.2 Contextmanager Version
import time from contextlib import contextmanager @contextmanager def timeblock(label): start = time.perf_counter() try: yield finally: end = time.perf_counter() print('{} : {}'.format(label, end - start))
17.2.3 time.process_time()
17.2.4 Tools
- pypy
- Numba