Python Cookbook

Table of Contents

1 Data Structures and Algorithms

1.1 Quick Summary

# unpacking
data = ["ACME", 50, 91.1, (2012, 12, 21)]
name, *_, (year, mon, day) = data

# fixed-length queue
from collections import deque

d = deque(maxlen=N)

# heap related
import heapq

heapq.nlargest(N, items)  # faster than sorted(items)[:N] if N is small
heapq.nsmallest(N, items, key=None)
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
heapq.heapify(nums)  # inplace function

# agg by values
prices = {"AAPL": 612.78, "IBM": 205.55, "FB": 10.75}
min_price = min(zip(prices.values(), prices.keys()))  # zip to ((value, key)) generator

# key-views support set oparations
d.keys() - d2.keys()
d.keys() & d2.keys()
d.keys() | d2.keys()

# boolean selectors
from itertools import compress
alist = list(range(10))
list(compress(alist, [i % 2 == 0 for i in alist]))

1.1.1 others

  • slice, itertools.islice
  • collections.Counter
  • from operator import itemgetter, attrgetter
  • itertools.groupby
  • from collections.ChainMap

1.2 PriorityQueue Implemention

import heapq

class PriorityQueue:
    def __init__(self):
	self._queue = []
	self._index = 0 # using index to properly order items with the same priority level

    def push(self, item, priority):
	heapq.heappush(self._queue, (-priority, self._index, item))
	self._index += 1

    def pop(self):
	return heapq.heappop(self._queue)[-1]

2 String Manipulation

2.1 Quick Summary

import re

re.split(r"[./]", "btc.usd/coinbase")
# => ['btc', 'usd', 'coinbase'] slower than split("/").split(".")

# shell-like matching
fnmatch("foo.txt", "*.txt")

# searching & replacing
# str.find, str.findall, re.match
# str.replace, re.sub
# Case-Insensitive Option: flags=re.IGNORECASE


# For the Shortest Match
str_pat = re.compile(r"\"(.*)\"")  # default: greedy regular expression
text = 'Computer says "no." Phone says "yes."'
str_pat.findall(text)
# => ['no." Phone says "yes.']

str_pat = re.compile(r"\"(.*?)\"")  # nongreedy and produces the shortest match instead.
str_pat.findall(text)
# => ['no.', 'yes.']

# Aligning
# str.ljust, str.rjust, str.center
# f"{text:<20}", f"{text:>20}", f"{text:^20}"
text = "Hello World"
ftext = f"{text:=>20}"  # => "=========Hello World"
ftext = f"{text:*^20}"  # => "****Hello World*****"

# Byte Strings
bstr = b"Hello World"
barray = bytearray(b"Hello World")
bstr.decode("ascii") # => "Hello World"

2.1.1 others

  • The textwrap module is a straightforward way to clean up text for printing. textwrap.fill(s, 70, initial_indent=" ")

2.2 Tokenizing Text

Tokenizing is often the first step for more advanced kinds of text parsing and handling. https://docs.python.org/3/library/re.html#writing-a-tokenizer

text = "foo = 23 + 42 * 10"

# expected results
tokens = [
    ("NAME", "foo"),
    ("EQ", "="),
    ("NUM", "23"),
    ("PLUS", "+"),
    ("NUM", "42"),
    ("TIMES", "*"),
    ("NUM", "10"),
]

import re

NAME = r"(?P<NAME>[a-zA-Z_][a-zA-Z_0-9]*)"
NUM = r"(?P<NUM>\d+)"
PLUS = r"(?P<PLUS>\+)"
TIMES = r"(?P<TIMES>\*)"
EQ = r"(?P<EQ>=)"
WS = r"(?P<WS>\s+)"
# the ?P<TOKENNAME> convention is used to assign a name to the pattern
master_pat = re.compile("|".join([NAME, NUM, PLUS, TIMES, EQ, WS]))

for mo in re.finditer(master_pat, "foo = 42"):
    print(mo.lastgroup, mo.group())  # NAME foo, WS , EQ =, ...
  • Syntax Parser: Recipe 2.19
  • For more complicated grammer: use pyparsing, PLY

3 TODO Numbers

3.1 round

round(1.29, 1)
# => 1.3
round(1245, -1)
# => 1240
round(1275, -1)
# => 1280

3.2 Decimal

from decimal import Decimal, localcontext
a = Decimal('6.32')
b = Decimal('2.41')

with localcontext() as ctx:
    ctx.prec = 5
    print(a/b) # 2.6224

3.3 Formatting

x = 1234.56789
format(x, '0.2f')
# => '1234.57'   # round
format(x, '>10.1f')
# => '    1234.6'
format(x, '0,.1f')
# => '1,234.6

x = 1234
bin(x)  # others: oct, hex
# => '0b10011010010'
format(x, 'b')  # others: o, x
# => '0011010010'
int('10011010010', 2)
# => 1234

3.4 Bin, Oct, Hex Int

x = -1234
format(x, 'b')
#=> '-10011010010'
format(x, 'x')
#=> '-4d2'
format(2**32 + x, 'b')
#=> '11111111111111111111101100101110'
format(2**32 + x, 'x')
#=> 'fffffb2e'
int('4d2', 16)
#=> 1234
int('10011010010', 2)
#=> 1234

3.5 Bytes2Int

data = b'\x00\x124V\x00x\x90\xab\x00\xcd\xef\x01\x00#\x004'
x = int.from_bytes(data, 'little')  # or 'big

x = 94522842520747284487117727783387188
x.to_bytes(16, 'little')

useful in cryptography or networking domains

  • struct module
  • int.bit_length()

3.6 Complex Math

a = complex(2, 4)
b = 3 - 5j
a.conjugate()
#=> (2-4j)
abs(a)
#=> 4.47213595499958
a * b
#=> (26+2j)

import cmath
cmath.sin(a)
#=> (24.83130584894638-11.356612711218174j)

import numpy as np
a = np.array([2 + 3j, 4 + 5j, 6 - 7j, 8 + 9j])
np.sin(a)

3.7 random

  • random.choice
  • random.sample
  • random.shuffle
  • random.randint
  • random.random: 0 to 1
  • random.getrandbits

3.7.1 seed

random.seed()  # Seed based on system time or os.urandom()
random.seed(12345)  # Seed based on integer given
random.seed(b'bytedata')  # Seed based on byte data

3.7.2 distribution

  • random.uniform
  • random.gauss

3.8 math.f***

  • math.fsum
  • math.fmod
  • math.fabs

4 Datetime

4.1 Finding Last Friday

from dateutil.relativedelta import relativedelta
from dateutil.rrule import FR
d = datetime.now()
print(d + relativedelta(weekday=FR(-1)))

4.2 Timezone

import pytz
d = datetime.now() # no timezone info
print(d)
# => 2018-12-21 17:14:01.258941

shanghai = pytz.timezone('Asia/Shanghai')
loc_d = shanghai.localize(d) # Localize the date for Shanghai
print(loc_d)
# => 2018-12-21 17:14:01.258941+08:00

# Once the date has been localized, it can be converted to other time zones
utc_d = loc_d.astimezone(pytz.utc)
print(utc_d)
# => 2018-12-21 09:14:01.258941+00:00
  • datetime.replace
  • datetime.astimezone

5 Iterator

5.1 Manually Consuming an Iterator

iterable = iter(range(5))  # Invokes range.__iter__()
try:
    while True:
	line = next(iterable)  # Invokes iterable.__next__()
	print(line, end='')
except StopIteration:
    pass

# non exception version
while True:
    line = next(iterable, None)
    if line is None:
	break
    print(line, end='')
  • Python’s iterator protocol requires __iter__() to return a special iterator object that implements a __next__() method to carry out the actual iteration.

5.2 Iterating Over Multi Sequences

a = [1, 2, 3]
b = ['w', 'x', 'y', 'z']

for i in zip(a, b):
    print(i)
#=> (1, 'w') (2, 'x') (3, 'y')

from itertools import zip_longest
for i in zip_longest(a, b):
    print(i)
#=> (1, 'w') (2, 'x') (3, 'y') (None, 'z')

5.3 dropwhile

Drop all of the initial comment lines.

5.4 Permutation & Combination

  • combinations, permutations, combinations_with_replacement

5.5 itertools.chain

Concatenate two iterables(copy-free)

5.6 Data Processing Pipelines

5.7 Flattening a Nested Sequence

from collections import Iterable


def flatten(items, ignore_types=(str, bytes)):
    for x in items:
	if isinstance(x, Iterable) and not isinstance(x, ignore_types):
	    yield from flatten(x)
	else:
	    yield x

items = ['Dave', 'Paula', ['Thomas', 'Lewis']]
for x in flatten(items):
    print(x)

5.8 Merge Two Sorted Iterables

import heapq
a = [1, 4, 7, 10]
b = [2, 5, 6, 11]
for c in heapq.merge(a, b):
    print(c)

5.9 iter()

iter() optionally accepts a zero-argument callable and sentinel (terminating) value as inputs.

for chunk in iter(lambda: fs.read(10), ''):
    print(chunk)

6 I/O

6.1 Encoding

with open('somefile.txt', 'rt', encoding='latin-1') as f:
    ...

latin-1 encoding is notable in that it will never produce a decoding error when reading text of a possibly unknown encoding.

# Replace bad chars with Unicode U+fffd replacement char
open('sample.txt', 'rt', encoding='ascii', errors='replace')

# Ignore bad chars entirely
open('sample.txt', 'rt', encoding='ascii', errors='ignore')

6.2 readinto

import array
a = array.array('i', [0, 0, 0, 0, 0, 0, 0, 0])
with open('data.bin', 'rb') as f:
    f.readinto(a)

readinto() fills the contents of an existing buffer

  • One caution with using f.readinto()~ is that you must always make sure to check its return code, which is the number of bytes actually read.

6.3 io.StringIO, io.BytesIO

6.4 gzip.open, bz2.open

6.5 Iterating Over Fixed-Sized Records

from functools import partial

RECORD_SIZE = 32

with open('somefile.data', 'rb') as f:
    records = iter(partial(f.read, RECORD_SIZE), b'')
    for r in records:
	...

6.6 In-memory Modification

6.6.1 nmap

Use the mmap module to memory map files for random access to its contents or to make in-place modifications.

  • nmap also can be used to exchange data between interpreters

6.6.2 memoryview

buf = bytearray(b'Hello World')
m1 = memoryview(buf)
m2 = m1[-5:]
#m2=> <memory at 0x100681390>
m2[:] = b'WORLD'
#buf=> bytearray(b'Hello WORLD')

6.7 os.path

os.path.basename(path)
os.path.dirname(path)
os.path.expanduser(path)
os.path.splitext(path)  # Split the file extension
os.path.exists(path)
os.path.isfile(path) # isdir, islink
os.path.realpath('/usr/local/bin/python3') # => '/usr/local/bin/python3.3'
os.path.getsize() # getmtime
os.listdir(dir)
  • other module: glob, fnmatch used for filename matching

6.8 Changing Encoding of a File

import io
# decode a binary file
with open('some_binary_file.bin', 'rb') as open_file:
    fs = io.TextIOWrapper(open_file, encoding='utf8')
    text = fs.read()

# change encoding
import sys
sys.stdout.encoding  #=> 'UTF-8'
# use its detach() method to remove the existing text encoding layer before replacing it with a new one
sys.stdout = io.TextIOWrapper(sys.stdout.detach(), encoding='latin-1')
sys.stdout.encoding #=> 'latin-1'
  • layers on I/O:
f = open('sample.txt', 'w')
# a text-handling layer that encodes and decodes Unicode
f # => <_io.TextIOWrapper name='sample.txt' mode='w' encoding='UTF-8'>

# a buffered I/O layer that handles binary data
f.buffer # => <_io.BufferedWriter name='sample.txt'>
f.buffer.write(b'hello\n') # write bytes to a text file

# io.FileIO is a raw file representing the low-level file descriptor in the operating system
f.buffer.raw # => <_io.FileIO name='sample.txt' mode='wb'>
  • detach: disconnects the topmost layer of a file and returns the next lower layer.

6.9 File Descriptor

# Create a file object, but don't close underlying fd when done
f = open(fd, 'wt', closefd=False)

def echo_client(client_sock, addr):
    print('Got connection from', addr)

    # Make text-mode file wrappers for socket reading/writing, only works on Unix-based systems
    # Use the makefile() method of sockets instead to be cross platform
    client_in = open(client_sock.fileno(), 'rt', encoding='latin-1',
			 closefd=False)
    client_out = open(client_sock.fileno(), 'wt', encoding='latin-1',
			  closefd=False)

    # Echo lines back to the client using file I/O
    for line in client_in:
	client_out.write(line)
	client_out.flush()
    client_sock.close()

6.10 Temporary Files

from tempfile import TemporaryFile, NamedTemporaryFile, TemporaryDirectory
with TemporaryFile('w+t', encoding='utf-8', errors='ignore') as f:
    f.write('Hello World\n')

with NamedTemporaryFile(
	'w+t', delete=False, prefix='mytemp', suffix='.txt', dir='/tmp') as f:
    print('filename is:', f.name)  #=> /tmp/mytemp2tmz4nl5.txt

with TemporaryDirectory() as dirname:
    print('dirname is:', dirname)

6.11 Serializing Python Objects

pickle is a Python-specific self-describing data encoding

6.11.1 Dealing with Multiple Objects

import pickle
with open('somedata', 'wb') as fs:
    pickle.dump([1, 2, 3, 4], fs)
    pickle.dump('hello', fs)

with open('somedata', 'rb') as fs:
    pickle.load(fs) # => [1, 2, 3, 4]
    pickle.load(fs) # => hello

6.11.2 Safety

pickle.load() should never be used on untrusted data

6.11.3 User-defined Classes

Certain kinds of objects can’t be pickled. These are typically objects that involve some sort of external system state, such as open files, open network connections, threads, processes, stack frames, and so forth. User-defined classes can sometimes work around these limitations by providing __getstate__() and __setstate__() methods

  • pickle.dump() will call __getstate__() to get an object that can be pickled

7 Encoding

7.1 csv

7.1.1 reader

from collections import namedtuple
import re
import csv
with open('stock.csv') as f:
    f_csv = csv.reader(f)
    headings = next(f_csv)
    Row = namedtuple('Row', headings)
    for r in f_csv:
	row = Row(*r)
	# Process row

7.1.2 DictReader

import csv
with open('stocks.csv') as f:
    f_csv = csv.DictReader(f)
    for row in f_csv:
	# process row
	...

7.1.3 writer

  • writer.writerow and writer.writerows

7.1.4 DictWriter

  • writer.writeheader and writer.writerows

7.2 json2object

  • use object_pairs_hook and object_hook options
import json
from collections import OrderedDict
s = '{"name": "ACME", "shares": 50, "price": 490.1}'
data = json.loads(s, object_pairs_hook=OrderedDict)
# data => OrderedDict([('name', 'ACME'), ('shares', 50), ('price', 490.1)])

class JSONObject:
    def __init__(self, d):
	self.__dict__ = d

    def __str__(self):
	return str(self.__dict__)


obj = json.loads(s, object_hook=JSONObject)
obj.name = 'def'

json.dumps(vars(obj)) # vars(obj) same as obj.__dict__
# or
json.dumps(obj, default=vars) # use vars as a serializing function

7.3 xml

from xml.etree.ElementTree import parse
doc = parse(xml_str)
  • lxml
  • for huge xml: Recipe 6.4
  • more: Recipe 6.3~6.7

7.4 hex encoding

  • binascii
import binascii

s = b'hello'
h = binascii.b2a_hex(s) # bytes2hexbytes b'68656c6c6f'
b = binascii.a2b_hex(h) # hexbytes2bytes
  • base64
import base64
s = b'hello'
h = base64.b16encode(s) # b'68656C6C6F' uppercase
b = base64.b16decode(h)

7.5 base64

  • base64.b64encode
  • base64.b64decode

7.6 struct

from struct import Struct

def write_records(records: tuple, format, f):
    record_struct = Struct(format)
    for r in records:
	f.write(record_struct.pack(*r))

def read_records(format, f) -> tuple:
    record_struct = Struct(format)
    chunks = iter(lambda: f.read(record_struct.size), b'') # star!
    return (record_struct.unpack(chunk) for chunk in chunks) # star!

  • more to explore Recipe 6.12

8 Functions

8.1 Keyword-only Arguments

def recv(maxsize, *, block):
    pass

recv(1024, True)# TypeError
recv(1024, block=True) # OK

8.2 Capture Variables

x = 10
a = lambda y, x=x: x + y # use x=x to bind at definition time
x = 20
a(5) # => 15

8.3 Replace Single-method Classes with Closures

8.4 Callback Shared State

Recipe 7.10 four ways:

  • single-method class
  • closure
  • coroutine: use coroutine.send as callback
  • use functools.partial

9 Class

9.1 String Representation

  • __repr__: returns the code representation of an instance, and is usually the text you would type to recreate the instance. eval(repr(x)) == x
  • __str__: converts the instance to a string.

9.2 __format__

_formats = {
    'ymd' : '{d.year}-{d.month}-{d.day}',
    'mdy' : '{d.month}/{d.day}/{d.year}',
    'dmy' : '{d.day}/{d.month}/{d.year}'
    }
from datetime import date
d = date.today()
format(d, 'mdy')
'The date is {:ymd}'.format(d)

9.3 Context Management

class Connection:
    def __enter__(self):
	self.fs = open('somefile.txt', 'rt')
	return self.fs

    def __exit__(self, exc_ty, exc_val, tb):
	self.fs.close()
	self.fs = None

9.4 Saving Memory __slots__

  • Instances are built around a small fixed-sized array instead of a dictionary.
  • A side effect of using slots is that it is no longer possible to add new attributes to instances.
class Date:
    __slots__ = ['year', 'month', 'day']
    def __init__(self, year, month, day):
	self.year = year
	self.month = month
	self.day = day

9.5 Properties

class Object:
    @property
    def attr(self):
	return self.__attr

    @attr.setter
    def attr(self, value):
	self.__attr = value

    @attr.deleter
    def attr(self):
	self.__attr = None
    # del obj.attr

9.5.1 Extending a Property

class SubPerson(Person):
    @property
    def name(self):
	print('Getting name')
	return super().name

    @name.setter
    def name(self, value):
	print('Setting name to', value)
	# the only way to get to setter method is to access it as a class variable
	super(SubPerson, SubPerson).name.__set__(self, value)

    @name.deleter
    def name(self):
	print('Deleting name')
	super(SubPerson, SubPerson).name.__delete__(self)
  • Extending only getter method

    class SubPerson(Person):
        @Person.name.getter
        def name(self):
    	print('Getting name')
    	return super().name
    

9.6 super()

  • To avoid double-invocation when involving multiple inheritance.
  • Use __mro__ to see method resolution order.

9.6.1 MRO

The actual determination of the MRO list itself is made using a technique known as C3 Linearization.

  • Child classes get checked before parents.
  • Multiple parents get checked in the order listed.
  • If there are two valid choices for the next class, pick the one from the first parent.

When you use the super() function, Python continues its search starting with the next class on the MRO. See Chapter 8.7 More details.

  • Hint: super(MyClass, self).__init__() provides the next __init__ method according to the used Method Resolution Ordering(MRO)

9.6.2 Multiple Inheritance with Different Arguments to Constructors

class A:
    def __init__(self, a, **kw):
	super().__init__(**kw)
	print('A a', a)


class B:
    def __init__(self, b, c=0, **kw):
	super().__init__(**kw)
	print('B b', b)
	print('B c', c)


class C(A, B):
    def __init__(self, a, b, c, d):
	super().__init__(a=a, b=b, c=c)
	print('C d', d)

or

 class C(A, B):
     def __init__(self, a, b, c):
	 A.__init__(self, a)
	 B.__init__(self, b, c)
# should be careful with double-invocation

9.7 Descriptor

Use Descriptor to create a new kind of instance attribute with some extra functionality, such as type checking. Descriptors provide the underlying magic for most of Python’s class features, such as @classmethod, @staticmethod, @property.

  • __get__(self, instance, cls)
  • __set__(self, instance, value)
  • __delete__(self, instance)
class String:
    def __init__(self, name):
	self.name = name

    def __get__(self, instance, cls):
	if instance is None:  # called as class variable
	    return self
	return instance.__dict__[self.name]

    def __set__(self, instance, value):
	if not isinstance(value, str):
	    raise TypeError('Expected a string')
	instance.__dict__[self.name] = value

    def __delete__(self, instance):
	del instance.__dict__[self.name]


class Person:
    name = String('name')

    def __init__(self, name):
	self.name = name

9.7.1 Advanced Usage

# Descriptor for a type-checked attribute
class Typed:
    def __init__(self, name, expected_type):
	self.name = name
	self.expected_type = expected_type

    def __get__(self, instance, cls):
	if instance is None:
	    return self
	else:
	    return instance.__dict__[self.name]

    def __set__(self, instance, value):
	if not isinstance(value, self.expected_type):
	    raise TypeError('Expected ' + str(self.expected_type))
	instance.__dict__[self.name] = value

    def __delete__(self, instance):
	del instance.__dict__[self.name]


# Class decorator that applies it to selected attributes
def typeassert(**kwargs):
    def decorate(cls):
	for name, expected_type in kwargs.items():
	    # Attach a Typed descriptor to the class
	    setattr(cls, name, Typed(name, expected_type))
	    return cls

    return decorate


# Example use
@typeassert(name=str, shares=int, price=float)
class Stock:
    def __init__(self, name, shares, price):
	self.name = name
	self.shares = shares
	self.price = price

9.7.2 Lazy Properties

Define a read-only attribute as a property that only gets computed on access.

class lazyproperty:
    def __init__(self, func):
	self.func = func

    def __get__(self, instance, cls):
	if instance is None:
	    return self
	else:
	    value = self.func(instance)
	    setattr(instance, self.func.__name__, value)
	    return value


class Circle:
    def __init__(self, radius):
	self.radius = radius

    @lazyproperty
    def area(self):
	print('Computing area')
	return math.pi * self.radius**2

9.7.3 Data Model

# Base class. Uses a descriptor to set a value
class Descriptor:
    def __init__(self, name=None, **opts):
	self.name = name
	for key, value in opts.items():
	    setattr(self, key, value)

    def __set__(self, instance, value):
	instance.__dict__[self.name] = value


# Descriptor for enforcing types
class Typed(Descriptor):
    expected_type = type(None)

    def __set__(self, instance, value):
	if not isinstance(value, self.expected_type):
	    raise TypeError('expected ' + str(self.expected_type))
	super().__set__(instance, value)


# Descriptor for enforcing values
class Unsigned(Descriptor):
    def __set__(self, instance, value):
	if value < 0:
	    raise ValueError('Expected >= 0')
	super().__set__(instance, value)


class Integer(Typed):
    expected_type = int


class UnsignedInteger(Integer, Unsigned):
    pass
  • Simplify the Specification by Class Decorator
    # Class decorator to apply constraints
    def check_attributes(**kwargs):
        def decorate(cls):
    	for key, value in kwargs.items():
    	    if isinstance(value, Descriptor):
    		value.name = key
    		setattr(cls, key, value)
    	    else:
    		setattr(cls, key, value(key))
    	return cls
        return decorate
    
    # Example
    @check_attributes(name=SizedString(size=8),
    		  shares=UnsignedInteger,
    		  price=UnsignedFloat)
    class Stock:
        def __init__(self, name, shares, price):
    	self.name = name
    	self.shares = shares
    	self.price = price
    
  • Simplify the Specification by Metaclass
    # A metaclass that applies checking
    class checkedmeta(type):
        def __new__(cls, clsname, bases, methods):
    	# Attach attribute names to the descriptors
    	for key, value in methods.items():
    	    if isinstance(value, Descriptor):
    		value.name = key
    	    return type.__new__(cls, clsname, bases, methods)
    
    
    # Example
    class Stock(metaclass=checkedmeta):
        name = SizedString(size=8)
        shares = UnsignedInteger() # no need to give a name
        price = UnsignedFloat()
    
        def __init__(self, name, shares, price):
    	self.name = name
    	self.shares = shares
    	self.price = price
    
  • Decorator Version(Preferred Approach)
    # Base class. Uses a descriptor to set a value
    class Descriptor:
        def __init__(self, name=None, **opts):
    	self.name = name
    	for key, value in opts.items():
    	    setattr(self, key, value)
    
        def __set__(self, instance, value):
    	instance.__dict__[self.name] = value
    
    
    # Decorator for applying type checking
    def Typed(expected_type, cls=None):
        if cls is None:
    	return lambda cls: Typed(expected_type, cls)
    
        super_set = cls.__set__
    
        def __set__(self, instance, value):
    	if not isinstance(value, expected_type):
    	    raise TypeError('expected ' + str(expected_type))
    	super_set(self, instance, value)
    
        cls.__set__ = __set__
        return cls
    
    
    @Typed(int)
    class Integer(Descriptor):
        pass
    

9.8 Simplifying Initialization

class Structure:
    # Class variable that specifies expected fields
    _fields = []

    def __init__(self, *args):
	if len(args) != len(self._fields):
	    raise TypeError('Expected {} arguments'.format(len(self._fields)))

	# Set the arguments
	for name, value in zip(self._fields, args):
	    setattr(self, name, value)


class Stock(Structure):
    _fields = ['name', 'shares', 'price']
  • Downside: documentation and help features of IDEs. It can be solved by attaching or enforcing a type signature

9.9 Abstract Base Class

  • Define an abc class: class AbstractBase(metaclass=abc.ABCMeta)
  • Using register to bind other class which is already defined
  • @abstractmethod can work with @staticmethod, @classmethod

9.10 Implementing Custom Containers

  • Container: __contains__
  • Iterable: __iter__
  • Sized: __len__
  • Sequence: __getitem__, __len__
  • MutableSequence: __delitem__, __getitem__, __len__, __setitem__, insert

9.11 Proxy Class

Implement __getattr__, __setattr__, __delattr__

  • __getattr__ method is actually a fallback method that only gets called when an attribute is not found.

9.12 Multiple Constructors

  • Use @classmethod, example: Date.today
  • Use cls.__new__(cls) to create instance without initialization.

9.13 Mixin Class

  • To enhance the functionality of existing classes with optional features.
  • See Recipe 8.18, example: ThreadedXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer)
  • Mixin classes are never meant to be instantiated directly.
  • Mixin classes typically have no state of their own(not a restriction)
  • Use __slots__ = () to serve as a strong hint that the mixin classes do not have their own instance data.
  • Use class decorator to patch method(preferred approach)

9.14 State Machine

Based on state design pattern. See Recipe 8.19, should use __class__

9.15 Calling a Method by Name

  • getattr
  • operator.methodcaller()

9.16 Visitor Pattern

Recipe 8.21

9.16.1 Without Recursion

Recipe 8.22

  • Use stack (like depth-first traversal) and generator
import types


class Node:
    pass


class NodeVisitor:
    def visit(self, node):
	stack = [node]
	last_result = None
	while stack:
	    try:
		last = stack[-1]
		if isinstance(last, types.GeneratorType):
		    stack.append(last.send(last_result))
		    last_result = None
		elif isinstance(last, Node):
		    stack.append(self._visit(stack.pop()))
		else:
		    last_result = stack.pop()
	    except StopIteration:
		stack.pop()
	return last_result


def _visit(self, node):
    methname = 'visit_' + type(node).__name__
    meth = getattr(self, methname, None)
    if meth is None:
	meth = self.generic_visit
    return meth(node)


def generic_visit(self, node):
    raise RuntimeError('No {} method'.format('visit_' + type(node).__name__))

9.17 Comparison

__le__, __ge__, __lt__, __gt__, __eq__

9.18 weakref

9.18.1 Avoid Cyclic Reference

weakref.ref

9.18.2 Cache Instances(like logging.getLogger)

Use weakref.WeakValueDictionary to store instances as weak reference

10 Metaprogramming

10.1 functools.wrap

  • Preserving function metadata, __name__, __doc__, __annotations__.
  • original function in __wrapped__ (hint: @classmethod and @staticmethod store original function in __func__)

10.2 Decorator with get/set

def attach_wrapper(obj, func=None):
    if func is None:
	return partial(attach_wrapper, obj)
    setattr(obj, func.__name__, func)
    return func

10.3 Decorator with Optional Arguments

example:

import logging
from functools import partial, wraps


def logged(func=None, *, level=logging.DEBUG, name=None, message=None):
    if func is None:
	return partial(logged, level=level, name=name, message=message)
    logname = name if name else func.__module__
    log = logging.getLogger(logname)
    logmsg = message if message else func.__name__

    @wraps(func)
    def wrapper(*args, **kwargs):
	log.log(level, logmsg)
	return func(*args, **kwargs)

    return wrapper

10.4 Type Checking Decorator

from inspect import signature
from functools import wraps


def typeassert(*ty_args, **ty_kwargs):
    def decorate(func):
	# If in optimized mode, disable type checking
	if not __debug__:
	    return func
	# Map function argument names to supplied types
	sig = signature(func)
	bound_types = sig.bind_partial(*ty_args, **ty_kwargs).arguments

	@wraps(func)
	def wrapper(*args, **kwargs):
	    bound_values = sig.bind(*args, **kwargs)
	    # Enforce type assertions across supplied arguments
	    for name, value in bound_values.arguments.items():
		if name in bound_types:
		    if not isinstance(value, bound_types[name]):
			raise TypeError('Argument {} must be {}'.format(
			    name, bound_types[name]))
	    return func(*args, **kwargs)
	return wrapper
    return decorate


@typeassert(int, z=int)
def add(x, y, z=42):
    return x + y + z

10.5 Decorator as Functional Class

import types
from functools import wraps


class Profiled:
    def __init__(self, func):
	wraps(func)(self)
	self.ncalls = 0

    def __call__(self, *args, **kwargs):
	self.ncalls += 1
	return self.__wrapped__(*args, **kwargs)

    def __get__(self, instance, cls):
	if instance is None:
	    return self
	else:
	    return types.MethodType(self, instance)


@Profiled
def add(x, y):
    return x + y


add(4, 5)
print(add.ncalls)

10.6 inspect

  • signature
  • getargspec
  • Parameter

10.7 Using Decorators to Patch Class Definitions

def patch(cls):
    orig_method = cls.method

    def new_method(self):
	return orig_method(self)

    cls.method = new_method

10.8 Enforcing an Argument Signature on *args and **kwargs

10.8.1 Creating a Function Signature

from inspect import Signature, Parameter

# Make a signature for a func(x, y=42, *, z=None)

parms = [
    Parameter('x', Parameter.POSITIONAL_OR_KEYWORD),
    Parameter('y', Parameter.POSITIONAL_OR_KEYWORD, default=42),
    Parameter('z', Parameter.KEYWORD_ONLY, default=None)
]
sig = Signature(parms)
print(sig)

def func(*args, **kwargs):
    bound_values = sig.bind(*args, **kwargs)

10.8.2 Enforcing Function Signatures

from inspect import Signature, Parameter
def make_sig(*names):
    parms = [Parameter(name, Parameter.POSITIONAL_OR_KEYWORD)
	     for name in names]
    return Signature(parms)

class Structure:
    __signature__ = make_sig() # inspect.signature will lookup __signature__
    def __init__(self, *args, **kwargs):
	bound_values = self.__signature__.bind(*args, **kwargs)
	for name, value in bound_values.arguments.items():
	    setattr(self, name, value)

class Stock(Structure):
    __signature__ = make_sig('name', 'shares', 'price')
    def __init__(self, *args, **kwargs):
	super().__init__(*args, **kwargs)

10.8.3 Metaclass Approach

from inspect import Signature, Parameter


def make_sig(*names):
    parms = [
	Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) for name in names
    ]
    return Signature(parms)


class StructureMeta(type):
    def __new__(cls, clsname, bases, clsdict):
	clsdict['__signature__'] = make_sig(*clsdict.get('_fields', []))
	return super().__new__(cls, clsname, bases, clsdict)


class Structure(metaclass=StructureMeta):
    fields = []

    def __init__(self, *args, **kwargs):
	bound_values = self.__signature__.bind(*args, **kwargs)
	for name, value in bound_values.arguments.items():
	    setattr(self, name, value)

10.9 Parsing and Analyzing Python Source

  • eval('2 + 3*4 + x')

10.9.1 exec

  • exec('for i in range(10): print(i)')
def test1():
    x = 0
    exec('x += 1')
    print(x) # => 0

def test2():
    x = 0
    loc = locals()
    exec('x += 1')
    x = loc['x']
    print(x) # => 1

10.9.2 ast

compile Python source code into an abstract syntax tree(AST)

import ast
ex = ast.parse('2 + 3*4 + x', mode='eval')
ast.dump(ex)
# "Expression(body=BinOp(left=BinOp(left=Num(n=2), op=Add(), right=BinOp(left=Num(n=3), op=Mult(), right=Num(n=4))), op=Add(), right=Name(id='x', ctx=Load())))"

10.9.3 Rewriting AST to Achieve Performance Improvement

  • see 9.24

10.10 dis

  • dis.dis
  • disassembled code: some_func.__code__.co_code

10.11 Simple namedtuple

import operator

class StructTupleMeta(type):
    def __init__(cls, *args, **kwargs):
	super().__init__(*args, **kwargs)
	for n, name in enumerate(cls._fields):
	    setattr(cls, name, property(operator.itemgetter(n)))
	    # After f = itemgetter(2), the call f(r) returns r[2]

class StructTuple(tuple, metaclass=StructTupleMeta):
    _fields = []

    def __new__(cls, *args):
	if len(args) != len(cls._fields):
	    raise ValueError('{} arguments required'.format(len(cls._fields)))
	return super().__new__(cls, args)

class Stock(StructTuple):
    _fields = ['name', 'shares', 'price']

s = Stock('ACME', 50, 91.1)

10.12 Multimethod 9.20

10.13 Avoiding Repetitive Property

def typed_property(name, expected_type):
    storage_name = '_' + name

    @property
    def prop(self):
	return getattr(self, storage_name)

    @prop.setter
    def prop(self, value):
	if not isinstance(value, expected_type):
	    raise TypeError('{} must be a {}'.format(name, expected_type))
	setattr(self, storage_name, value)
    return prop

from functools import partial
String = partial(typed_property, expected_type=str)
Integer = partial(typed_property, expected_type=int)

# Example use
class Person:
    name = String('name')
    age = Integer('age')

    def __init__(self, name, age):
	self.name = name
	self.age = age

10.14 Defining Context Manager the Easy Way

import time
from contextlib import contextmanager

@contextmanager
def timethis(label):
    start = time.time()
    try:
	yield
    finally:
	end = time.time()
	print('{}: {}'.format(label, end - start))

# Example use
with timethis('counting'):
    n = 10000000
    while n > 0:
	n -= 1
  • all of the code prior to the yield executes as the __enter__() method of a context manager. All of the code after the yield executes as the __exit__() method
  • If there was an exception, it is raised at the yield statement.

11 Metaclass

11.1 Basic

When writing metaclasses, it is somewhat common to only define a __new__() or __init__() method, but not both.

class MyMeta(type):
    def __new__(self, clsname, bases, clsdict):
	# self is a class object
	# clsname is name of class being defined
	# bases is tuple of base classes
	# clsdict is class dictionary
	return super().__new__(self, clsname, bases, clsdict)

or

class MyMeta(type):
    def __init__(cls, clsname, bases, clsdict):
	super().__init__(clsname, bases, clsdict)
__prepare__
is called first and used to create the class namespace prior to the body of any class definition being processed. Normally, this method simply returns a dictionary or other mapping object
__new__
is invoked prior to class creation and is typically used when a metaclass wants to alter the class definition in some way
__init__
is invoked after a class has been created, and is useful if you want to write code that works with the fully formed class object.

11.2 NoInstances

class NoInstances(type):
    def __call__(self, *args, **kwargs):
	raise TypeError("Can't instantiate directly")

# Example
class Spam(metaclass=NoInstances):
    @staticmethod
    def grok(x):
	print('Spam.grok')

11.3 Singleton

class Singleton(type):
    def __init__(self, *args, **kwargs):
	self.__instance = None
	super().__init__(*args, **kwargs)

    def __call__(self, *args, **kwargs):
	if self.__instance is None:
	    self.__instance = super().__call__(*args, **kwargs)
	    return self.__instance
	else:
	    return self.__instance

11.4 Cached Instances

import weakref

class Cached(type):
    def __init__(self, *args, **kwargs):
	super().__init__(*args, **kwargs)
	self.__cache = weakref.WeakValueDictionary()

    def __call__(self, *args):
	if args in self.__cache:
	    return self.__cache[args]
	else:
	    obj = super().__call__(*args)
	    self.__cache[args] = obj
	    return obj

11.5 OrderedDict for Class Body

This method is invoked immediately at the start of a class definition with the class name and base classes. It must then return a mapping object to use when processing the class body.

# Metaclass that uses an OrderedDict for class body
class OrderedMeta(type):
    def __new__(cls, clsname, bases, clsdict):
	d = dict(clsdict)
	order = []
	for name, value in clsdict.items():
	    if isinstance(value, Typed):
		value._name = name
		order.append(name)
	d['_order'] = order
	return type.__new__(cls, clsname, bases, d)

    @classmethod
    def __prepare__(cls, clsname, bases):
	return OrderedDict()

11.6 Optional Arguments on Class Definitions

class Spam(metaclass=MyMeta, debug=True, synchronize=True):
    ...

To support such keyword arguments in a metaclass, make sure you define them on the __prepare__(), __new__(), and __init__() methods using keyword-only arguments

class MyMeta(type):
    # Optional
    @classmethod
    def __prepare__(cls, name, bases, *, debug=False, synchronize=False):
	# Custom processing
	...
	return super().__prepare__(name, bases)

    # Required
    def __new__(cls, name, bases, ns, *, debug=False, synchronize=False):
	# Custom processing
	...
	return super().__new__(cls, name, bases, ns)

    # Required
    def __init__(self, name, bases, ns, *, debug=False, synchronize=False):
	# Custom processing
	...
	super().__init__(name, bases, ns)

12 Packages and Modules

12.1 Lazy Import

# __init__.py
def A():
    from .a import A
    return A()

12.2 Organize Add-on Packages into a Common Package

# foo-package/
#   spam/
#     blah.py
# bar-package/
#   spam/
#     grok.py
import sys
sys.path.extend(['foo-package', 'bar-package'])
import spam.blah
import spam.grok

12.3 Read Data File

  • pkgutil.get_data

12.4 Import Hooks

  • See 10.11, 10.12

13 Network

13.1 socketserver

13.2 CIDR network address

import ipaddress
net = ipaddress.ip_network('123.45.67.64/27')
list(net)
net.num_addresses

inet = ipaddress.ip_interface('123.45.67.73/27')
inet.network
inet.ip

13.3 Simple WSGI

WSGI: Web Server Gateway Interface. Same code for different web framework.

def application(environ, start_response):
    start_response('200 OK', [('Content-Type', 'text/html')])
    return [b'<h1>Hello, web!</h1>']

from wsgiref.simple_server import make_server
httpd = make_server('', 8000, application)
print('Serving HTTP on port 8000...')
httpd.serve_forever()
  • environ: http request info
  • start_response: a function that must be called to initiate a response
  • return: response body

13.3.1 PathDispatcher

import cgi


def notfound_404(environ, start_response):
    start_response('404 Not Found', [('Content-type', 'text/plain')])
    return [b'Not Found']


class PathDispatcher:
    def __init__(self):
	self.pathmap = {}

    def __call__(self, environ, start_response):
	path = environ['PATH_INFO']

	# extracts supplied query parameters from the request and puts them into a dictionary-like object
	params = cgi.FieldStorage(environ['wsgi.input'], environ=environ)
	method = environ['REQUEST_METHOD'].lower()
	environ['params'] = {key: params.getvalue(key) for key in params}
	handler = self.pathmap.get((method, path), notfound_404)
	return handler(environ, start_response)

    def register(self, method, path, function):
	self.pathmap[method.lower(), path] = function
	return function


def hello(environ, start_response):
    start_response('200 OK', [('Content-Type', 'text/html')])
    return [b'<h1>Hello, web!</h1>']


from wsgiref.simple_server import make_server
dispatcher = PathDispatcher()
dispatcher.register('GET', '/hello', hello)
httpd = make_server('', 8080, dispatcher)
print('Serving on port 8080...')
httpd.serve_forever()

13.4 XMLRPC

from xmlrpc.server import SimpleXMLRPCServer

class KeyValueServer:
    _rpc_methods_ = ['get', 'set']
    def __init__(self, address):
	self._data = {}
	self._serv = SimpleXMLRPCServer(address, allow_none=True)
	for name in self._rpc_methods_:
	    self._serv.register_function(getattr(self, name))

    def get(self, name):
	return self._data[name]

    def set(self, name, value):
	self._data[name] = value

    def serve_forever(self):
	self._serv.serve_forever()

if __name__ == '__main__':
    kvserv = KeyValueServer(('', 15000))
    kvserv.serve_forever()
  • client
from xmlrpc.client import ServerProxy
s = ServerProxy('http://localhost:15000', allow_none=True)
s.set('foo', 'bar')
s.get('foo')

13.5 Communicating Between Interpreters

  • multiprocessing.connection: support UNIX domain sockets

13.6 Simple Auth

import hmac
import os

def client_authenticate(connection, secret_key):
    '''
    Authenticate client to a remote service.
    connection represents a network connection.
    secret_key is a key known only to both client/server.
    '''
    message = connection.recv(32)
    hash = hmac.new(secret_key, message)
    digest = hash.digest()
    connection.send(digest)


def server_authenticate(connection, secret_key):
    '''
    Request client authentication.
    '''
    message = os.urandom(32)
    connection.send(message)
    hash = hmac.new(secret_key, message)
    digest = hash.digest()
    response = connection.recv(len(digest))
    return hmac.compare_digest(digest, response)

13.7 SSL Wrapper

import ssl
KEYFILE = 'server_key.pem'  # Private key of the server
CERTFILE = 'server_cert.pem'  # Server certificate (given to client)
s_ssl = ssl.wrap_socket(
    s, keyfile=KEYFILE, certfile=CERTFILE, server_side=True)

13.8 select

import select

def event_loop(handlers):
    while True:
	wants_recv = [h for h in handlers if h.wants_to_receive()]
	wants_send = [h for h in handlers if h.wants_to_send()]
	can_recv, can_send, _ = select.select(wants_recv, wants_send, [])
	for h in can_recv:
	    h.handle_receive()
	for h in can_send:
	    h.handle_send()

13.9 Sending Large Arrays

# zero copy with memoryview
def send_from(arr, dest):
    view = memoryview(arr).cast('B')
    while len(view):
	nsent = dest.send(view)
	view = view[nsent:]


def recv_into(arr, source):
    view = memoryview(arr).cast('B')
    while len(view):
	nrecv = source.recv_into(view)
	view = view[nrecv:]

14 Concurrency

14.1 Threading

  • Thread methods: start, is_alive, join, terminate
  • Thread interface: run

14.1.1 Daemonic Thread

t = Thread(target=countdown, args=(10,), daemon=True)
t.start()

14.1.2 Storing Thread-Specific State

  • threading.local() : create a thread-local storage object

14.1.3 Threading Pool

  • from concurrent.futures import ThreadPoolExecutor

14.2 Synchronization Primitives

14.2.1 Event

Event instances are similar to a "sticky" flag that allows threads to wait for something to happan.

14.2.2 Avoid Deadlock

import threading
from contextlib import contextmanager

# Thread-local state to stored information on locks already acquired
_local = threading.local()


@contextmanager
def acquire(*locks):
    # Sort locks by object identifier
    locks = sorted(locks, key=lambda x: id(x))

    # Make sure lock order of previously acquired locks is not violated
    acquired = getattr(_local, 'acquired', [])
    if acquired and max(id(lock) for lock in acquired) >= id(locks[0]):
	raise RuntimeError('Lock Order Violation')

    # Acquire all of the locks
    acquired.extend(locks)
    _local.acquired = acquired
    try:
	for lock in locks:
	    lock.acquire()
	yield
    finally:
	# Release locks in reverse order of acquisition
	for lock in reversed(locks):
	    lock.release()
	del acquired[-len(locks):]


import threading
x_lock = threading.Lock()
y_lock = threading.Lock()

def thread_1():
    while True:
	with acquire(x_lock, y_lock):
	    print('Thread-1')

def thread_2():
    while True:
	with acquire(y_lock, x_lock):
	    print('Thread-2')

14.3 Message Queue

  • ZeroMQ
  • Celery

14.3.1 Actor Model

from queue import Queue
from threading import Thread, Event


# Sentinel used for shutdown
class ActorExit(Exception):
    pass


class Actor:
    def __init__(self):
	self._mailbox = Queue()

    def send(self, msg):
	'''
        Send a message to the actor
        '''
	self._mailbox.put(msg)

    def recv(self):
	'''
        Receive an incoming message
        '''
	msg = self._mailbox.get()
	if msg is ActorExit:
	    raise ActorExit()
	return msg

    def close(self):
	'''
        Close the actor, thus shutting it down
        '''
	self.send(ActorExit)

    def start(self):
	'''
        Start concurrent execution
        '''
	self._terminated = Event()
	t = Thread(target=self._bootstrap)
	t.daemon = True
	t.start()

    def _bootstrap(self):
	try:
	    self.run()
	except ActorExit:
	    pass
	finally:
	    self._terminated.set()

    def join(self):
	self._terminated.wait()

    def run(self):
	'''
        Run method to be implemented by the user
        '''
	while True:
	    msg = self.recv()

15 System Utilities

  • getpass: prompting for a password

15.1 Subprocess

import subprocess
out_bytes = subprocess.check_output(['netstat','-a'])
out_text = out_bytes.decode('utf-8')

15.2 Performance Counter

use time.perf_counter for wall-time, time.process_time for CPU time

15.3 CPU&memory Limits

import signal
import resource
import os


def time_exceeded(signo, frame):
    print("Time's up!")
    raise SystemExit(1)


def set_max_runtime(seconds):
    # Install the signal handler and set a resource limit
    soft, hard = resource.getrlimit(resource.RLIMIT_CPU)
    resource.setrlimit(resource.RLIMIT_CPU, (seconds, hard))
    signal.signal(signal.SIGXCPU, time_exceeded)

def limit_memory(maxsize):
    soft, hard = resource.getrlimit(resource.RLIMIT_AS)
    resource.setrlimit(resource.RLIMIT_AS, (maxsize, hard))

15.4 webbrowser

get, open, open_new, open_new_tab

16 Testing

16.1 Mock

  • unittest.mock.patch
  • @patch('somefunc') or with patch('somefunc') as mock
  • patch value: with patch(__main__.x, 'patched_value')

16.2 Assert Regex

with self.assertRaisesRegex(ValueError, 'error*'):
    ...

16.3 Raise from another exception

  • raise ... from e

16.4 Warnings

import warnings
warnings.warn('logfile argument deprecated', DeprecationWarning)

17 Debuging

  • python3 -i: starts an shell as soon as a program terminates, then uses pdb

17.1 Traceback

  • traceback.print_exc(file=sys.stderr)
  • traceback.print_stack(file=sys.stderr)

17.2 Profiling

The first rule of optimization might be to "not do it" the second rule is almost certainly "don't optimize the unimportant"

17.2.1 Decorator Version

import time
from functools import wraps

def timethis(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
	start = time.perf_counter()
	r = func(*args, **kwargs)
	end = time.perf_counter()
	print('{}.{} : {}'.format(func.__module__, func.__name__, end - start))
	return r
    return wrapper

17.2.2 Contextmanager Version

import time
from contextlib import contextmanager

@contextmanager
def timeblock(label):
    start = time.perf_counter()
    try:
	yield
    finally:
	end = time.perf_counter()
	print('{} : {}'.format(label, end - start))

17.2.3 time.process_time()

17.2.4 Tools

  • pypy
  • Numba