Python Basics

 

Contents

    '1. Collections': [List, Dictionary, Set, Tuple, Range, Enumerate, Iterator, Generator],

    '2. Types':       [Type, String, Regular_Exp, Format, Numbers, Combinatorics, Datetime],

    '3. Syntax':      [Args, Inline, Closure, Decorator, Class, Duck_Type, Enum, Exception],

    '4. System':      [Exit, Print, Input, Command_Line_Arguments, Open, Path, OS_Commands],

    '5. Data':        [JSON, Pickle, CSV, SQLite, Bytes, Struct, Array, Memory_View, Deque],

    '6. Advanced':    [Threading, Operator, Introspection, Metaprograming, Eval, Coroutine],

    '7. Libraries':   [Progress_Bar, Plot, Table, Curses, Logging, Scraping, Web, Profile,

                       NumPy, Image, Audio, Games, Data]

Main

if __name__ == '__main__':     # Runs main() if file wasn't imported.

    main()

List

<list> = <list>[from_inclusive : to_exclusive : ±step_size]

<list>.append(<el>)            # Or: <list> += [<el>]

<list>.extend(<collection>)    # Or: <list> += <collection>

<list>.sort()

<list>.reverse()

<list> = sorted(<collection>)

<iter> = reversed(<list>)

sum_of_elements  = sum(<collection>)

elementwise_sum  = [sum(pair) for pair in zip(list_a, list_b)]

sorted_by_second = sorted(<collection>, key=lambda el: el[1])

sorted_by_both   = sorted(<collection>, key=lambda el: (el[1], el[0]))

flatter_list     = list(itertools.chain.from_iterable(<list>))

product_of_elems = functools.reduce(lambda out, el: out * el, <collection>)

list_of_chars    = list(<str>)

Module operator provides functions itemgetter() and mul() that offer the same functionality as lambda expressions above.

<list>.insert(<int>, <el>)     # Inserts item at index and moves the rest to the right.

<el>  = <list>.pop([<int>])    # Returns and removes item at index or from the end.

<int> = <list>.count(<el>)     # Returns number of occurrences. Also works on strings.

<int> = <list>.index(<el>)     # Returns index of the first occurrence or raises ValueError.

<list>.remove(<el>)            # Removes first occurrence of the item or raises ValueError.

<list>.clear()                 # Removes all items. Also works on dictionary and set.

Dictionary

<view> = <dict>.keys()                          # Coll. of keys that reflects changes.

<view> = <dict>.values()                        # Coll. of values that reflects changes.

<view> = <dict>.items()                         # Coll. of key-value tuples that reflects chgs.

value  = <dict>.get(key, default=None)          # Returns default if key is missing.

value  = <dict>.setdefault(key, default=None)   # Returns and writes default if key is missing.

<dict> = collections.defaultdict(<type>)        # Creates a dict with default value of type.

<dict> = collections.defaultdict(lambda: 1)     # Creates a dict with default value 1.

<dict> = dict(<collection>)                     # Creates a dict from coll. of key-value pairs.

<dict> = dict(zip(keys, values))                # Creates a dict from two collections.

<dict> = dict.fromkeys(keys [, value])          # Creates a dict from collection of keys.

<dict>.update(<dict>)                           # Adds items. Replaces ones with matching keys.

value = <dict>.pop(key)                         # Removes item or raises KeyError.

{k for k, v in <dict>.items() if v == value}    # Returns set of keys that point to the value.

{k: v for k, v in <dict>.items() if k in keys}  # Returns a dictionary, filtered by keys.

Set

<set> = set()

<set>.add(<el>)                                 # Or: <set> |= {<el>}

<set>.update(<collection> [, ...])              # Or: <set> |= <set>

<set>  = <set>.union(<coll.>)                   # Or: <set> | <set>

<set>  = <set>.intersection(<coll.>)            # Or: <set> & <set>

<set>  = <set>.difference(<coll.>)              # Or: <set> - <set>

<set>  = <set>.symmetric_difference(<coll.>)    # Or: <set> ^ <set>

<bool> = <set>.issubset(<coll.>)                # Or: <set> <= <set>

<bool> = <set>.issuperset(<coll.>)              # Or: <set> >= <set>

<el> = <set>.pop()                              # Raises KeyError if empty.

<set>.remove(<el>)                              # Raises KeyError if missing.

<set>.discard(<el>)                             # Doesn't raise an error.

Frozen Set

<frozenset> = frozenset(<collection>)

Tuple

<tuple> = ()

<tuple> = (<el>,)                           # Or: <el>,

<tuple> = (<el_1>, <el_2> [, ...])          # Or: <el_1>, <el_2> [, ...]

Range

<range> = range(to_exclusive)

<range> = range(from_inclusive, to_exclusive)

<range> = range(from_inclusive, to_exclusive, ±step_size)

from_inclusive = <range>.start

to_exclusive   = <range>.stop

String

<str>  = <str>.strip()                       # Strips all whitespace characters from both ends.

<str>  = <str>.strip('<chars>')              # Strips all passed characters from both ends.

<list> = <str>.split()                       # Splits on one or more whitespace characters.

<list> = <str>.split(sep=None, maxsplit=-1)  # Splits on 'sep' str at most 'maxsplit' times.

<list> = <str>.splitlines(keepends=False)    # Splits on [\n\r\f\v\x1c\x1d\x1e\x85] and '\r\n'.

<str>  = <str>.join(<coll_of_strings>)       # Joins elements using string as a separator.

<bool> = <sub_str> in <str>                  # Checks if string contains a substring.

<bool> = <str>.startswith(<sub_str>)         # Pass tuple of strings for multiple options.

<bool> = <str>.endswith(<sub_str>)           # Pass tuple of strings for multiple options.

<int>  = <str>.find(<sub_str>)               # Returns start index of the first match or -1.

<int>  = <str>.index(<sub_str>)              # Same but raises ValueError if missing.

<str>  = <str>.replace(old, new [, count])   # Replaces 'old' with 'new' at most 'count' times.

<str>  = <str>.translate(<table>)            # Use `str.maketrans(<dict>)` to generate table.

<str>  = chr(<int>)                          # Converts int to Unicode char.

<int>  = ord(<str>)                          # Converts Unicode char to int.

Regex

import re

<str>   = re.sub(<regex>, new, text, count=0)  # Substitutes all occurrences with 'new'.

<list>  = re.findall(<regex>, text)            # Returns all occurrences as strings.

<list>  = re.split(<regex>, text, maxsplit=0)  # Use brackets in regex to include the matches.

<Match> = re.search(<regex>, text)             # Searches for first occurrence of the pattern.

<Match> = re.match(<regex>, text)              # Searches only at the beginning of the text.

<iter>  = re.finditer(<regex>, text)           # Returns all occurrences as match objects.

Numbers

Types

<int>      = int(<float/str/bool>)       # Or: math.floor(<float>)

<float>    = float(<int/str/bool>)       # Or: <real>e±<int>

<complex>  = complex(real=0, imag=0)     # Or: <real> ± <real>j

<Fraction> = fractions.Fraction(0, 1)    # Or: Fraction(numerator=0, denominator=1)

<Decimal>  = decimal.Decimal(<str/int>)  # Or: Decimal((sign, digits, exponent))

'int(<str>)' and 'float(<str>)' raise ValueError on malformed strings.

Basic Functions

<num> = pow(<num>, <num>)                # Or: <num> ** <num>

<num> = abs(<num>)                       # <float> = abs(<complex>)

<num> = round(<num> [, ±ndigits])        # `round(126, -1) == 130`

Math

from math import e, pi, inf, nan, isinf, isnan

from math import sin, cos, tan, asin, acos, atan, degrees, radians

from math import log, log10, log2

Statistics

from statistics import mean, median, variance, stdev, pvariance, pstdev

Random

from random import random, randint, choice, shuffle, gauss, seed

<float> = random()                       # A float inside [0, 1).

<int>   = randint(from_inc, to_inc)      # An int inside [from_inc, to_inc].

<el>    = choice(<list>)                 # Keeps the list intact.

Bin, Hex

<int> = ±0b<bin>                         # Or: ±0x<hex>

<int> = int('±<bin>', 2)                 # Or: int('±<hex>', 16)

<int> = int('±0b<bin>', 0)               # Or: int('±0x<hex>', 0)

<str> = bin(<int>)                       # Returns '[-]0b<bin>'.

Bitwise Operators

<int> = <int> & <int>                    # And

<int> = <int> | <int>                    # Or

<int> = <int> ^ <int>                    # Xor (0 if both bits equal)

<int> = <int> << n_bits                  # Left shift (>> for right)

<int> = ~<int>                           # Not (also: -<int> - 1)

Datetime

Module 'datetime' provides 'date' <D>, 'time' <T>, 'datetime' <DT> and 'timedelta' <TD> classes. All are immutable and hashable.

Time and datetime objects can be 'aware' <a>, meaning they have defined timezone, or 'naive' <n>, meaning they don't.

If object is naive, it is presumed to be in the system's timezone.

from datetime import date, time, datetime, timedelta

from dateutil.tz import UTC, tzlocal, gettz, datetime_exists, resolve_imaginary

Constructors

<D>  = date(year, month, day)

<T>  = time(hour=0, minute=0, second=0, microsecond=0, tzinfo=None, fold=0)

<DT> = datetime(year, month, day, hour=0, minute=0, second=0, ...)

<TD> = timedelta(days=0, seconds=0, microseconds=0, milliseconds=0,

                 minutes=0, hours=0, weeks=0)

Use '<D/DT>.weekday()' to get the day of the week (Mon == 0).

'fold=1' means the second pass in case of time jumping back for one hour.

'<DTa> = resolve_imaginary(<DTa>)' fixes DTs that fall into the missing hour.

Now

<D/DTn>  = D/DT.today()                     # Current local date or naive datetime.

<DTn>    = DT.utcnow()                      # Naive datetime from current UTC time.

<DTa>    = DT.now(<tzinfo>)                 # Aware datetime from current tz time.

To extract time use '<DTn>.time()', '<DTa>.time()' or '<DTa>.timetz()'.

Timezone

<tzinfo> = UTC                              # UTC timezone. London without DST.

<tzinfo> = tzlocal()                        # Local timezone. Also gettz().

<tzinfo> = gettz('<Continent>/<City>')      # 'Continent/City_Name' timezone or None.

<DTa>    = <DT>.astimezone(<tzinfo>)        # Datetime, converted to the passed timezone.

<Ta/DTa> = <T/DT>.replace(tzinfo=<tzinfo>)  # Unconverted object with a new timezone.

Encode

<D/T/DT> = D/T/DT.fromisoformat('<iso>')    # Object from ISO string. Raises ValueError.

<DT>     = DT.strptime(<str>, '<format>')   # Datetime from str, according to format.

<D/DTn>  = D/DT.fromordinal(<int>)          # D/DTn from days since the Gregorian NYE 1.

<DTn>    = DT.fromtimestamp(<real>)         # Local time DTn from seconds since the Epoch.

<DTa>    = DT.fromtimestamp(<real>, <tz.>)  # Aware datetime from seconds since the Epoch.

ISO strings come in following forms: 'YYYY-MM-DD', 'HH:MM:SS.ffffff[±<offset>]', or both separated by an arbitrary character. Offset is formatted as: 'HH:MM'.

Epoch on Unix systems is: '1970-01-01 00:00 UTC', '1970-01-01 01:00 CET', …

Decode

<str>    = <D/T/DT>.isoformat(sep='T')      # Also timespec='auto/hours/minutes/seconds'.

<str>    = <D/T/DT>.strftime('<format>')    # Custom string representation.

<int>    = <D/DT>.toordinal()               # Days since Gregorian NYE 1, ignoring time and tz.

<float>  = <DTn>.timestamp()                # Seconds since the Epoch, from DTn in local tz.

<float>  = <DTa>.timestamp()                # Seconds since the Epoch, from DTa.

Format

>>> from datetime import datetime

>>> dt = datetime.strptime('2015-05-14 23:39:00.00 +0200', '%Y-%m-%d %H:%M:%S.%f %z')

>>> dt.strftime("%A, %dth of %B '%y, %I:%M%p %Z")

"Thursday, 14th of May '15, 11:39PM UTC+02:00"

When parsing, '%z' also accepts '±HH:MM'.

For abbreviated weekday and month use '%a' and '%b'.

Arguments

Inside Function Call

<function>(<positional_args>)                  # f(0, 0)

<function>(<keyword_args>)                     # f(x=0, y=0)

<function>(<positional_args>, <keyword_args>)  # f(0, y=0)

Inside Function Definition

def f(<nondefault_args>):                      # def f(x, y):

def f(<default_args>):                         # def f(x=0, y=0):

def f(<nondefault_args>, <default_args>):      # def f(x, y=0):

#Splat Operator

Inside Function Call

Splat expands a collection into positional arguments, while splatty-splat expands a dictionary into keyword arguments.

args   = (1, 2)

kwargs = {'x': 3, 'y': 4, 'z': 5}

func(*args, **kwargs)

Is the same as:

func(1, 2, x=3, y=4, z=5)

Inside Function Definition

Splat combines zero or more positional arguments into a tuple, while splatty-splat combines zero or more keyword arguments into a dictionary.

Lambda

<func> = lambda: <return_value>

<func> = lambda <arg_1>, <arg_2>: <return_value>

Comprehensions

<list> = [i+1 for i in range(10)]                         # [1, 2, ..., 10]

<set>  = {i for i in range(10) if i > 5}                  # {6, 7, 8, 9}

<iter> = (i+5 for i in range(10))                         # (5, 6, ..., 14)

<dict> = {i: i*2 for i in range(10)}                      # {0: 0, 1: 2, ..., 9: 18}

>>> [l+r for l in 'abc' for r in 'abc']

['aa', 'ab', 'ac', ..., 'cc']

Map, Filter, Reduce

<iter> = map(lambda x: x + 1, range(10))                  # (1, 2, ..., 10)

<iter> = filter(lambda x: x > 5, range(10))               # (6, 7, 8, 9)

<obj>  = reduce(lambda out, x: out + x, range(10))        # 45

Reduce must be imported from functools module.

Any, All

<bool> = any(<collection>)                                # False if empty.

<bool> = all(el[1] for el in <collection>)                # True if empty.

Conditional Expression

<obj> = <exp_if_true> if <condition> else <exp_if_false>

>>> [a if a else 'zero' for a in (0, 1, 2, 3)]

['zero', 1, 2, 3]

Namedtuple, Enum, Dataclass

from collections import namedtuple

Point     = namedtuple('Point', 'x y')

point     = Point(0, 0)

from enum import Enum

Direction = Enum('Direction', 'n e s w')

direction = Direction.n

from dataclasses import make_dataclass

Creature  = make_dataclass('Creature', ['loc', 'dir'])

creature  = Creature(Point(0, 0), Direction.n)

Decorator

A decorator takes a function, adds some functionality and returns it.

@decorator_name

def function_that_gets_passed_to_decorator():

    ...

Debugger Example

Decorator that prints function's name every time it gets called.

from functools import wraps

def debug(func):

    @wraps(func)

    def out(*args, **kwargs):

        print(func.__name__)

        return func(*args, **kwargs)

    return out

@debug

def add(x, y):

    return x + y

CSV

Text file format for storing spreadsheets.

import csv

Read

<reader> = csv.reader(<file>)       # Also: `dialect='excel', delimiter=','`.

<list>   = next(<reader>)           # Returns next row as a list of strings.

<list>   = list(<reader>)           # Returns list of remaining rows.

File must be opened with a 'newline=""' argument, or newlines embedded inside quoted fields will not be interpreted correctly!

Write

<writer> = csv.writer(<file>)       # Also: `dialect='excel', delimiter=','`.

<writer>.writerow(<collection>)     # Encodes objects using `str(<el>)`.

<writer>.writerows(<coll_of_coll>)  # Appends multiple rows.

File must be opened with a 'newline=""' argument, or '\r' will be added in front of every '\n' on platforms that use '\r\n' line endings!

Parameters

'dialect' - Master parameter that sets the default values.

'delimiter' - A one-character string used to separate fields.

'quotechar' - Character for quoting fields that contain special characters.

'doublequote' - Whether quotechars inside fields get doubled or escaped.

'skipinitialspace' - Whether whitespace after delimiter gets stripped.

'lineterminator' - Specifies how writer terminates rows.

'quoting' - Controls the amount of quoting: 0 - as necessary, 1 - all.

'escapechar' - Character for escaping 'quotechar' if 'doublequote' is False.

SQLite

import sqlite3

<conn> = sqlite3.connect(<path>)                # Also ':memory:'.

<conn>.close()                                  # Closes the connection.

Read

Returned values can be of type str, int, float, bytes or None.

<cursor> = <conn>.execute('<query>')            # Can raise a subclass of sqlite3.Error.

<tuple>  = <cursor>.fetchone()                  # Returns next row. Also next(<cursor>).

<list>   = <cursor>.fetchall()                  # Returns remaining rows. Also list(<cursor>).

Write

<conn>.execute('<query>')                       # Can raise a subclass of sqlite3.Error.

<conn>.commit()                                 # Saves all changes since the last commit.

<conn>.rollback()                               # Discards all changes since the last commit.

Or:

with <conn>:                                    # Exits the block with commit() or rollback(),

    <conn>.execute('<query>')                   # depending on whether an exception occurred.

Placeholders

Passed values can be of type str, int, float, bytes, None, bool, datetime.date or datetime.datetme.

Bools will be stored and returned as ints and dates as ISO formatted strings.

<conn>.execute('<query>', <list/tuple>)         # Replaces '?'s in query with values.

<conn>.execute('<query>', <dict/namedtuple>)    # Replaces ':<key>'s with values.

<conn>.executemany('<query>', <coll_of_above>)  # Runs execute() multiple times.

MySQL

$ pip3 install mysql-connector

from mysql import connector

<conn>   = connector.connect(host=<str>, …)     # `user=<str>, password=<str>, database=<str>`.

<cursor> = <conn>.cursor()                      # Only cursor has execute method.

<cursor>.execute('<query>')                     # Can raise a subclass of connector.Error.

<cursor>.execute('<query>', <list/tuple>)       # Replaces '%s's in query with values.

<cursor>.execute('<query>', <dict/namedtuple>)  # Replaces '%(<key>)s's with values.

Array

from array import array

<array> = array('<typecode>', <collection>)    # Array from collection of numbers.

<array> = array('<typecode>', <bytes>)         # Array from bytes object.

<array> = array('<typecode>', <array>)         # Treats array as a sequence of numbers.

<bytes> = bytes(<array>)                       # Or: <array>.tobytes()

<file>.write(<array>)                          # Writes array to the binary file.

Coroutines

Coroutines have a lot in common with threads, but unlike threads, they only give up control when they call another coroutine and they don’t use as much memory.

Coroutine definition starts with 'async' and its call with 'await'.

'asyncio.run(<coroutine>)' is the main entry point for asynchronous programs.

Functions wait(), gather() and as_completed() can be used when multiple coroutines need to be started at the same time.

Asyncio module also provides its own Queue, Event, Lock and Semaphore classes.

Runs a terminal game where you control an asterisk that must avoid numbers:

import asyncio, collections, curses, enum, random

P = collections.namedtuple('P', 'x y')         # Position

D = enum.Enum('D', 'n e s w')                  # Direction

def main(screen):

    curses.curs_set(0)                         # Makes cursor invisible.

    screen.nodelay(True)                       # Makes getch() non-blocking.

    asyncio.run(main_coroutine(screen))        # Starts running asyncio code.

async def main_coroutine(screen):

    state = {'*': P(0, 0), **{id_: P(30, 10) for id_ in range(10)}}

    moves = asyncio.Queue()

    coros = (*(random_controller(id_, moves) for id_ in range(10)),

             human_controller(screen, moves),

             model(moves, state, *screen.getmaxyx()),

             view(state, screen))

    await asyncio.wait(coros, return_when=asyncio.FIRST_COMPLETED)


async def random_controller(id_, moves):

    while True:

        d = random.choice(list(D))

        moves.put_nowait((id_, d))

        await asyncio.sleep(random.random() / 2)


async def human_controller(screen, moves):

    while True:

        ch = screen.getch()

        key_mappings = {259: D.n, 261: D.e, 258: D.s, 260: D.w}

        if ch in key_mappings:

            moves.put_nowait(('*', key_mappings[ch]))

        await asyncio.sleep(0.01)  


async def model(moves, state, height, width):

    while state['*'] not in {p for id_, p in state.items() if id_ != '*'}:

        id_, d = await moves.get()

        p      = state[id_]

        deltas = {D.n: P(0, -1), D.e: P(1, 0), D.s: P(0, 1), D.w: P(-1, 0)}

        new_p  = P(p.x + deltas[d].x, p.y + deltas[d].y)

        if 0 <= new_p.x < width-1 and 0 <= new_p.y < height:

            state[id_] = new_p


async def view(state, screen):

    while True:

        screen.clear()

        for id_, p in state.items():

            screen.addstr(p.y, p.x, str(id_))

        await asyncio.sleep(0.01)  


if __name__ == '__main__':

    curses.wrapper(main)

Libraries

#Progress Bar

# $ pip3 install tqdm

>>> from tqdm import tqdm

>>> from time import sleep

>>> for el in tqdm([1, 2, 3], desc='Processing'):

...     sleep(1)

Processing: 100%|████████████████████| 3/3 [00:03<00:00,  1.00s/it]

#Plot

# $ pip3 install matplotlib

import matplotlib.pyplot as plt

plt.plot(<x_data>, <y_data> [, label=<str>])   # Or: plt.plot(<y_data>)

plt.legend()                                   # Adds a legend.

plt.savefig(<path>)                            # Saves the figure.

plt.show()                                     # Displays the figure.

plt.clf()                                      # Clears the figure.

#Table

Prints a CSV file as an ASCII table:

# $ pip3 install tabulate

import csv, tabulate

with open('test.csv', encoding='utf-8', newline='') as file:

    rows   = csv.reader(file)

    header = [a.title() for a in next(rows)]

    table  = tabulate.tabulate(rows, header)

    print(table)

#Curses

Runs a basic file explorer in the terminal:

from curses import wrapper, ascii, A_REVERSE, KEY_UP, KEY_DOWN, KEY_LEFT, KEY_RIGHT, KEY_ENTER

from os import listdir, path, chdir


def main(screen):

    ch, first, selected, paths = 0, 0, 0, listdir()

    while ch != ascii.ESC:

        height, _ = screen.getmaxyx()

        screen.clear()

        for y, a_path in enumerate(paths[first : first+height]):

            screen.addstr(y, 0, a_path, A_REVERSE * (selected == first + y))

        ch = screen.getch()

        selected += (ch == KEY_DOWN) - (ch == KEY_UP)

        selected = max(0, min(len(paths)-1, selected))

        first += (first <= selected - height) - (first > selected)

        if ch in [KEY_LEFT, KEY_RIGHT, KEY_ENTER, 10, 13]:

            new_dir = '..' if ch == KEY_LEFT else paths[selected]

            if path.isdir(new_dir):

                chdir(new_dir)

                first, selected, paths = 0, 0, listdir()


if __name__ == '__main__':

    wrapper(main)

#Logging

# $ pip3 install loguru

from loguru import logger

logger.add('debug_{time}.log', colorize=True)  # Connects a log file.

logger.add('error_{time}.log', level='ERROR')  # Another file for errors or higher.

logger.<level>('A logging message.')

Levels: 'debug', 'info', 'success', 'warning', 'error', 'critical'.

Exceptions

Exception description, stack trace and values of variables are appended automatically.


try:

    ...

except <exception>:

    logger.exception('An error happened.')

Rotation

Argument that sets a condition when a new log file is created.


rotation=<int>|<datetime.timedelta>|<datetime.time>|<str>

'<int>' - Max file size in bytes.

'<timedelta>' - Max age of a file.

'<time>' - Time of day.

'<str>' - Any of above as a string: '100 MB', '1 month', 'monday at 12:00', …

Retention

Sets a condition which old log files get deleted.


retention=<int>|<datetime.timedelta>|<str>

'<int>' - Max number of files.

'<timedelta>' - Max age of a file.

'<str>' - Max age as a string: '1 week, 3 days', '2 months', …

#Scraping

Scrapes Python's URL, version number and logo from its Wikipedia page:

# $ pip3 install requests beautifulsoup4

import requests, bs4, sys


WIKI_URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'

try:

    html       = requests.get(WIKI_URL).text

    document   = bs4.BeautifulSoup(html, 'html.parser')

    table      = document.find('table', class_='infobox vevent')

    python_url = table.find('th', text='Website').next_sibling.a['href']

    version    = table.find('th', text='Stable release').next_sibling.strings.__next__()

    logo_url   = table.find('img')['src']

    logo       = requests.get(f'https:{logo_url}').content

    with open('test.png', 'wb') as file:

        file.write(logo)

    print(python_url, version)

except requests.exceptions.ConnectionError:

    print("You've got problems with connection.", file=sys.stderr)

#Web

# $ pip3 install bottle

from bottle import run, route, static_file, template, post, request, response

import json

Run

run(host='localhost', port=8080)        # Runs locally.

run(host='0.0.0.0', port=80)            # Runs globally.

Static Request

@route('/img/<image>')

def send_image(image):

    return static_file(image, 'img_dir/', mimetype='image/png')

Dynamic Request

@route('/<sport>')

def send_page(sport):

    return template('<h1>{{title}}</h1>', title=sport)

REST Request

@post('/<sport>/odds')

def odds_handler(sport):

    team = request.forms.get('team')

    home_odds, away_odds = 2.44, 3.29

    response.headers['Content-Type'] = 'application/json'

    response.headers['Cache-Control'] = 'no-cache'

    return json.dumps([team, home_odds, away_odds])

Test:

# $ pip3 install requests

>>> import threading, requests

>>> threading.Thread(target=run, daemon=True).start()

>>> url = 'http://localhost:8080/football/odds'

>>> data = {'team': 'arsenal f.c.'}

>>> response = requests.post(url, data=data)

>>> response.json()

['arsenal f.c.', 2.44, 3.29]

#Profiling

Stopwatch

from time import time

start_time = time()                     # Seconds since the Epoch.

...

duration = time() - start_time

High performance:

from time import perf_counter

start_time = perf_counter()             # Seconds since the restart.

...

duration = perf_counter() - start_time

Timing a Snippet

>>> from timeit import timeit

>>> timeit("''.join(str(i) for i in range(100))",

...        number=10000, globals=globals(), setup='pass')

0.34986

Profiling by Line

# $ pip3 install line_profiler memory_profiler

@profile

def main():

    a = [*range(10000)]

    b = {*range(10000)}

main()

$ kernprof -lv test.py

Line #   Hits     Time  Per Hit   % Time  Line Contents

=======================================================

     1                                    @profile

     2                                    def main():

     3      1    955.0    955.0     43.7      a = [*range(10000)]

     4      1   1231.0   1231.0     56.3      b = {*range(10000)}

$ python3 -m memory_profiler test.py

Line #         Mem usage      Increment   Line Contents

=======================================================

     1        37.668 MiB     37.668 MiB   @profile

     2                                    def main():

     3        38.012 MiB      0.344 MiB       a = [*range(10000)]

     4        38.477 MiB      0.465 MiB       b = {*range(10000)}

Call Graph

Generates a PNG image of a call graph with highlighted bottlenecks:

# $ pip3 install pycallgraph2

from pycallgraph2 import output, PyCallGraph

from datetime import datetime

filename = f'profile-{datetime.now():%Y%m%d%H%M%S}.png'

drawer = output.GraphvizOutput(output_file=filename)

with PyCallGraph(drawer):

    <code_to_be_profiled>

#NumPy

Array manipulation mini-language. It can run up to one hundred times faster than the equivalent Python code. An even faster alternative that runs on a GPU is called CuPy.


# $ pip3 install numpy

import numpy as np

<array> = np.array(<list>)

<array> = np.arange(from_inclusive, to_exclusive, ±step_size)

<array> = np.ones(<shape>)

<array> = np.random.randint(from_inclusive, to_exclusive, <shape>)

<array>.shape = <shape>

<view>  = <array>.reshape(<shape>)

<view>  = np.broadcast_to(<array>, <shape>)

<array> = <array>.sum(axis)

indexes = <array>.argmin(axis)

Shape is a tuple of dimension sizes.

Axis is an index of the dimension that gets collapsed. Leftmost dimension has index 0.

Indexing

<el>       = <2d_array>[row_index, column_index]

<1d_view>  = <2d_array>[row_index]

<1d_view>  = <2d_array>[:, column_index]

<1d_array> = <2d_array>[row_indexes, column_indexes]

<2d_array> = <2d_array>[row_indexes]

<2d_array> = <2d_array>[:, column_indexes]

<2d_bools> = <2d_array> ><== <el>

<1d_array> = <2d_array>[<2d_bools>]

Broadcasting

Broadcasting is a set of rules by which NumPy functions operate on arrays of different sizes and/or dimensions.


left  = [[0.1], [0.6], [0.8]]        # Shape: (3, 1)

right = [ 0.1 ,  0.6 ,  0.8 ]        # Shape: (3)

1. If array shapes differ in length, left-pad the shorter shape with ones:

left  = [[0.1], [0.6], [0.8]]        # Shape: (3, 1)

right = [[0.1 ,  0.6 ,  0.8]]        # Shape: (1, 3) <- !

2. If any dimensions differ in size, expand the ones that have size 1 by duplicating their elements:

left  = [[0.1, 0.1, 0.1], [0.6, 0.6, 0.6], [0.8, 0.8, 0.8]]  # Shape: (3, 3) <- !

right = [[0.1, 0.6, 0.8], [0.1, 0.6, 0.8], [0.1, 0.6, 0.8]]  # Shape: (3, 3) <- !

3. If neither non-matching dimension has size 1, raise an error.

Example

For each point returns index of its nearest point ([0.1, 0.6, 0.8] => [1, 2, 1]):

>>> points = np.array([0.1, 0.6, 0.8])

 [ 0.1,  0.6,  0.8]

>>> wrapped_points = points.reshape(3, 1)

[[ 0.1],

 [ 0.6],

 [ 0.8]]

>>> distances = wrapped_points - points

[[ 0. , -0.5, -0.7],

 [ 0.5,  0. , -0.2],

 [ 0.7,  0.2,  0. ]]

>>> distances = np.abs(distances)

[[ 0. ,  0.5,  0.7],

 [ 0.5,  0. ,  0.2],

 [ 0.7,  0.2,  0. ]]

>>> i = np.arange(3)

[0, 1, 2]

>>> distances[i, i] = np.inf

[[ inf,  0.5,  0.7],

 [ 0.5,  inf,  0.2],

 [ 0.7,  0.2,  inf]]

>>> distances.argmin(1)

[1, 2, 1]

#Image

# $ pip3 install pillow

from PIL import Image

<Image> = Image.new('<mode>', (width, height))  # Also: `color=<int/tuple/str>`.

<Image> = Image.open(<path>)                    # Identifies format based on file contents.

<Image> = <Image>.convert('<mode>')             # Converts image to the new mode.

<Image>.save(<path>)                            # Selects format based on the path extension.

<Image>.show()                                  # Opens image in default preview app.

<int/tuple> = <Image>.getpixel((x, y))          # Returns a pixel.

<Image>.putpixel((x, y), <int/tuple>)           # Writes a pixel to the image.

<ImagingCore> = <Image>.getdata()               # Returns a sequence of pixels.

<Image>.putdata(<list/ImagingCore>)             # Writes a sequence of pixels.

<Image>.paste(<Image>, (x, y))                  # Writes an image to the image.

<2d_array> = np.array(<Image_L>)                # Creates NumPy array from greyscale image.

<3d_array> = np.array(<Image_RGB>)              # Creates NumPy array from color image.

<Image>    = Image.fromarray(<array>)           # Creates image from NumPy array of floats.

Modes

'1' - 1-bit pixels, black and white, stored with one pixel per byte.

'L' - 8-bit pixels, greyscale.

'RGB' - 3x8-bit pixels, true color.

'RGBA' - 4x8-bit pixels, true color with transparency mask.

'HSV' - 3x8-bit pixels, Hue, Saturation, Value color space.

Examples

Creates a PNG image of a rainbow gradient:

WIDTH, HEIGHT = 100, 100

size = WIDTH * HEIGHT

hues = (255 * i/size for i in range(size))

img = Image.new('HSV', (WIDTH, HEIGHT))

img.putdata([(int(h), 255, 255) for h in hues])

img.convert('RGB').save('test.png')

Adds noise to a PNG image:

from random import randint

add_noise = lambda value: max(0, min(255, value + randint(-20, 20)))

img = Image.open('test.png').convert('HSV')

img.putdata([(add_noise(h), s, v) for h, s, v in img.getdata()])

img.convert('RGB').save('test.png')

Image Draw

from PIL import ImageDraw

<ImageDraw> = ImageDraw.Draw(<Image>)

<ImageDraw>.point((x, y), fill=None)

<ImageDraw>.line((x1, y1, x2, y2 [, ...]), fill=None, width=0, joint=None) 

<ImageDraw>.arc((x1, y1, x2, y2), from_deg, to_deg, fill=None, width=0)

<ImageDraw>.rectangle((x1, y1, x2, y2), fill=None, outline=None, width=0)

<ImageDraw>.polygon((x1, y1, x2, y2 [, ...]), fill=None, outline=None)

<ImageDraw>.ellipse((x1, y1, x2, y2), fill=None, outline=None, width=0)

Use 'fill=<color>' to set the primary color.

Use 'outline=<color>' to set the secondary color.

Color can be specified as an int, tuple, '#rrggbb[aa]' string or a color name.

#Animation

Creates a GIF of a bouncing ball:

# $ pip3 install imageio

from PIL import Image, ImageDraw

import imageio

WIDTH, R = 126, 10

frames = []

for velocity in range(1, 16):

    y = sum(range(velocity))

    frame = Image.new('L', (WIDTH, WIDTH))

    draw  = ImageDraw.Draw(frame)

    draw.ellipse((WIDTH/2-R, y, WIDTH/2+R, y+R*2), fill='white')

    frames.append(frame)

frames += reversed(frames[1:-1])

imageio.mimsave('test.gif', frames, duration=0.03)

#Audio

import wave

<Wave_read>  = wave.open('<path>', 'rb')        # Opens the WAV file.

framerate    = <Wave_read>.getframerate()       # Number of frames per second.

nchannels    = <Wave_read>.getnchannels()       # Number of samples per frame.

sampwidth    = <Wave_read>.getsampwidth()       # Sample size in bytes.

nframes      = <Wave_read>.getnframes()         # Number of frames.

<params>     = <Wave_read>.getparams()          # Immutable collection of above.

<bytes>      = <Wave_read>.readframes(nframes)  # Returns next 'nframes' frames.

<Wave_write> = wave.open('<path>', 'wb')        # Truncates existing file.

<Wave_write>.setframerate(<int>)                # 44100 for CD, 48000 for video.

<Wave_write>.setnchannels(<int>)                # 1 for mono, 2 for stereo.

<Wave_write>.setsampwidth(<int>)                # 2 for CD quality sound.

<Wave_write>.setparams(<params>)                # Sets all parameters.

<Wave_write>.writeframes(<bytes>)               # Appends frames to the file.

Bytes object contains a sequence of frames, each consisting of one or more samples.

In a stereo signal, the first sample of a frame belongs to the left channel.

Each sample consists of one or more bytes that, when converted to an integer, indicate the displacement of a speaker membrane at a given moment.

If sample width is one, then the integer should be encoded unsigned.

For all other sizes, the integer should be encoded signed with little-endian byte order.

Sample Values

┏━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━┯━━━━━━━━━━━━━┓

┃ sampwidth │     min     │ zero │     max     ┃

┠───────────┼─────────────┼──────┼─────────────┨

┃     1     │           0 │  128 │         255 ┃

┃     2     │      -32768 │    0 │       32767 ┃

┃     3     │    -8388608 │    0 │     8388607 ┃

┃     4     │ -2147483648 │    0 │  2147483647 ┃

┗━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━┷━━━━━━━━━━━━━┛

Read Float Samples from WAV File

def read_wav_file(filename):

    def get_int(bytes_obj):

        an_int = int.from_bytes(bytes_obj, 'little', signed=sampwidth!=1)

        return an_int - 128 * (sampwidth == 1)

    with wave.open(filename, 'rb') as file:

        sampwidth = file.getsampwidth()

        frames = file.readframes(-1)

    bytes_samples = (frames[i : i+sampwidth] for i in range(0, len(frames), sampwidth))

    return [get_int(b) / pow(2, sampwidth * 8 - 1) for b in bytes_samples]

Write Float Samples to WAV File

def write_to_wav_file(filename, float_samples, nchannels=1, sampwidth=2, framerate=44100):

    def get_bytes(a_float):

        a_float = max(-1, min(1 - 2e-16, a_float))

        a_float += sampwidth == 1

        a_float *= pow(2, sampwidth * 8 - 1)

        return int(a_float).to_bytes(sampwidth, 'little', signed=sampwidth!=1) 

    with wave.open(filename, 'wb') as file:

        file.setnchannels(nchannels)

        file.setsampwidth(sampwidth)

        file.setframerate(framerate)

        file.writeframes(b''.join(get_bytes(f) for f in float_samples))

Examples

Saves a sine wave to a mono WAV file:

from math import pi, sin

samples_f = (sin(i * 2 * pi * 440 / 44100) for i in range(100000))

write_to_wav_file('test.wav', samples_f)

Adds noise to a mono WAV file:

from random import random

add_noise = lambda value: value + (random() - 0.5) * 0.03

samples_f = (add_noise(f) for f in read_wav_file('test.wav'))

write_to_wav_file('test.wav', samples_f)

Plays a WAV file:

# $ pip3 install simpleaudio

from simpleaudio import play_buffer

with wave.open('test.wav', 'rb') as file:

    p = file.getparams()

    frames = file.readframes(-1)

    play_buffer(frames, p.nchannels, p.sampwidth, p.framerate)

Text to Speech

# $ pip3 install pyttsx3

import pyttsx3

engine = pyttsx3.init()

engine.say('Sally sells seashells by the seashore.')

engine.runAndWait()

#Synthesizer

Plays Popcorn by Gershon Kingsley:

# $ pip3 install simpleaudio

import math, struct, simpleaudio

from itertools import repeat, chain

F  = 44100

P1 = '71♩,69♪,,71♩,66♪,,62♩,66♪,,59♩,,,'

P2 = '71♩,73♪,,74♩,73♪,,74♪,,71♪,,73♩,71♪,,73♪,,69♪,,71♩,69♪,,71♪,,67♪,,71♩,,,'

get_pause   = lambda seconds: repeat(0, int(seconds * F))

sin_f       = lambda i, hz: math.sin(i * 2 * math.pi * hz / F)

get_wave    = lambda hz, seconds: (sin_f(i, hz) for i in range(int(seconds * F)))

get_hz      = lambda key: 8.176 * 2 ** (int(key) / 12)

parse_note  = lambda note: (get_hz(note[:2]), 1/4 if '♩' in note else 1/8)

get_samples = lambda note: get_wave(*parse_note(note)) if note else get_pause(1/8)

samples_f   = chain.from_iterable(get_samples(n) for n in f'{P1}{P1}{P2}'.split(','))

samples_b   = b''.join(struct.pack('<h', int(f * 30000)) for f in samples_f)

simpleaudio.play_buffer(samples_b, 1, 2, F)

#Pygame

Basic Example

# $ pip3 install pygame

import pygame as pg

pg.init()

screen = pg.display.set_mode((500, 500))

rect = pg.Rect(240, 240, 20, 20)

while all(event.type != pg.QUIT for event in pg.event.get()):

    deltas = {pg.K_UP: (0, -1), pg.K_RIGHT: (1, 0), pg.K_DOWN: (0, 1), pg.K_LEFT: (-1, 0)}

    for key_code, is_pressed in enumerate(pg.key.get_pressed()):

        rect = rect.move(deltas[key_code]) if key_code in deltas and is_pressed else rect

    screen.fill((0, 0, 0))

    pg.draw.rect(screen, (255, 255, 255), rect)

    pg.display.flip()

Rectangle

Object for storing rectangular coordinates.


<Rect> = pg.Rect(x, y, width, height)           # Floats get truncated into ints.

<int>  = <Rect>.x/y/centerx/centery/…           # Top, right, bottom, left. Allows assignments.

<tup.> = <Rect>.topleft/center/…                # Topright, bottomright, bottomleft.

<Rect> = <Rect>.move((x, y))                    # Use move_ip() to move in place.

<bool> = <Rect>.collidepoint((x, y))            # Checks if rectangle contains a point.

<bool> = <Rect>.colliderect(<Rect>)             # Checks if two rectangles overlap.

<int>  = <Rect>.collidelist(<list_of_Rect>)     # Returns index of first colliding Rect or -1.

<list> = <Rect>.collidelistall(<list_of_Rect>)  # Returns indexes of all colliding Rects.

Surface

Object for representing images.


<Surf> = pg.display.set_mode((width, height))   # Returns display surface.

<Surf> = pg.Surface((width, height), …)         # New RGB surface. Add `pg.SRCALPHA` for RGBA.

<Surf> = pg.image.load('<path>')                # Loads the image. Format depends on source.

<Surf> = <Surf>.subsurface(<Rect>)              # Returns a subsurface.

<Surf>.fill(color)                              # Tuple, Color('#rrggbb[aa]') or Color(<name>).

<Surf>.set_at((x, y), color)                    # Updates pixel.

<Surf>.blit(<Surf>, (x, y))                     # Draws passed surface to the surface.

from pygame.transform import scale, ...

<Surf> = scale(<Surf>, (width, height))         # Returns scaled surface.

<Surf> = rotate(<Surf>, degrees)                # Returns rotated and scaled surface.

<Surf> = flip(<Surf>, x_bool, y_bool)           # Returns flipped surface.

from pygame.draw import line, ...

line(<Surf>, color, (x1, y1), (x2, y2), width)  # Draws a line to the surface.

arc(<Surf>, color, <Rect>, from_rad, to_rad)    # Also: ellipse(<Surf>, color, <Rect>)

rect(<Surf>, color, <Rect>)                     # Also: polygon(<Surf>, color, points)

Font

<Font> = pg.font.SysFont('<name>', size)        # Loads the system font or default if missing.

<Font> = pg.font.Font('<path>', size)           # Loads the TTF file. Pass None for default.

<Surf> = <Font>.render(text, antialias, color)  # Background color can be specified at the end.

Sound

<Sound> = pg.mixer.Sound('<path>')              # Loads the WAV file.

<Sound>.play()                                  # Starts playing the sound.

Basic Mario Brothers Example

import collections, dataclasses, enum, io, itertools as it, pygame as pg, urllib.request

from random import randint


P = collections.namedtuple('P', 'x y')          # Position

D = enum.Enum('D', 'n e s w')                   # Direction

SIZE, MAX_SPEED = 50, P(5, 10)                  # Screen size, Speed limit


def main():

    def get_screen():

        pg.init()

        return pg.display.set_mode(2 * [SIZE*16])

    def get_images():

        url = 'https://gto76.github.io/python-cheatsheet/web/mario_bros.png'

        img = pg.image.load(io.BytesIO(urllib.request.urlopen(url).read()))

        return [img.subsurface(get_rect(x, 0)) for x in range(img.get_width() // 16)]

    def get_mario():

        Mario = dataclasses.make_dataclass('Mario', 'rect spd facing_left frame_cycle'.split())

        return Mario(get_rect(1, 1), P(0, 0), False, it.cycle(range(3)))

    def get_tiles():

        positions = [p for p in it.product(range(SIZE), repeat=2) if {*p} & {0, SIZE-1}] + \

            [(randint(1, SIZE-2), randint(2, SIZE-2)) for _ in range(SIZE**2 // 10)]

        return [get_rect(*p) for p in positions]

    def get_rect(x, y):

        return pg.Rect(x*16, y*16, 16, 16)

    run(get_screen(), get_images(), get_mario(), get_tiles())


def run(screen, images, mario, tiles):

    clock = pg.time.Clock()

    while all(event.type != pg.QUIT for event in pg.event.get()):

        keys = {pg.K_UP: D.n, pg.K_RIGHT: D.e, pg.K_DOWN: D.s, pg.K_LEFT: D.w}

        pressed = {keys.get(i) for i, on in enumerate(pg.key.get_pressed()) if on}

        update_speed(mario, tiles, pressed)

        update_position(mario, tiles)

        draw(screen, images, mario, tiles, pressed)

        clock.tick(28)


def update_speed(mario, tiles, pressed):

    x, y = mario.spd

    x += 2 * ((D.e in pressed) - (D.w in pressed))

    x -= x // abs(x) if x else 0

    y += 1 if D.s not in get_boundaries(mario.rect, tiles) else (D.n in pressed) * -10

    mario.spd = P(*[max(-limit, min(limit, s)) for limit, s in zip(MAX_SPEED, P(x, y))])


def update_position(mario, tiles):

    x, y = mario.rect.topleft

    n_steps = max(abs(s) for s in mario.spd)

    for _ in range(n_steps):

        mario.spd = stop_on_collision(mario.spd, get_boundaries(mario.rect, tiles))

        x, y = x + mario.spd.x/n_steps, y + mario.spd.y/n_steps

        mario.rect.topleft = x, y


def get_boundaries(rect, tiles):

    deltas = {D.n: P(0, -1), D.e: P(1, 0), D.s: P(0, 1), D.w: P(-1, 0)}

    return {d for d, delta in deltas.items() if rect.move(delta).collidelist(tiles) != -1}


def stop_on_collision(spd, bounds):

    return P(x=0 if (D.w in bounds and spd.x < 0) or (D.e in bounds and spd.x > 0) else spd.x,

             y=0 if (D.n in bounds and spd.y < 0) or (D.s in bounds and spd.y > 0) else spd.y)


def draw(screen, images, mario, tiles, pressed):

    def get_frame_index():

        if D.s not in get_boundaries(mario.rect, tiles):

            return 4

        return next(mario.frame_cycle) if {D.w, D.e} & pressed else 6

    screen.fill((85, 168, 255))

    mario.facing_left = (D.w in pressed) if {D.w, D.e} & pressed else mario.facing_left

    screen.blit(images[get_frame_index() + mario.facing_left * 9], mario.rect)

    for rect in tiles:

        screen.blit(images[18 if {*rect.topleft} & {0, (SIZE-1)*16} else 19], rect)

    pg.display.flip()


if __name__ == '__main__':

    main()

#Pandas

# $ pip3 install pandas

import pandas as pd

from pandas import Series, DataFrame

Series

Ordered dictionary with a name.


>>> Series([1, 2], index=['x', 'y'], name='a')

x    1

y    2

Name: a, dtype: int64

<Sr> = Series(<list>)                         # Assigns RangeIndex starting at 0.

<Sr> = Series(<dict>)                         # Takes dictionary's keys for index.

<Sr> = Series(<dict/Series>, index=<list>)    # Only keeps items with keys specified in index.

<el> = <Sr>.loc[key]                          # Or: <Sr>.iloc[index]

<Sr> = <Sr>.loc[keys]                         # Or: <Sr>.iloc[indexes]

<Sr> = <Sr>.loc[from_key : to_key_inclusive]  # Or: <Sr>.iloc[from_i : to_i_exclusive]

<el> = <Sr>[key/index]                        # Or: <Sr>.key

<Sr> = <Sr>[keys/indexes]                     # Or: <Sr>[<key_range/range>]

<Sr> = <Sr>[bools]                            # Or: <Sr>.i/loc[bools]

<Sr> = <Sr> ><== <el/Sr>                      # Returns a Series of bools.

<Sr> = <Sr> +-*/ <el/Sr>                      # Items with non-matching keys get value NaN.

<Sr> = <Sr>.append(<Sr>)                      # Or: pd.concat(<coll_of_Sr>)

<Sr> = <Sr>.combine_first(<Sr>)               # Adds items that are not yet present.

<Sr>.update(<Sr>)                             # Updates items that are already present.

Aggregate, Transform, Map:

<el> = <Sr>.sum/max/mean/idxmax/all()         # Or: <Sr>.aggregate(<agg_func>)

<Sr> = <Sr>.rank/diff/cumsum/ffill/interpl()  # Or: <Sr>.agg/transform(<trans_func>)

<Sr> = <Sr>.fillna(<el>)                      # Or: <Sr>.apply/agg/transform/map(<map_func>)

The way 'aggregate()' and 'transform()' find out whether the passed function accepts an element or the whole Series is by passing it a single value at first and if it raises an error, then they pass it the whole Series.

>>> sr = Series([1, 2], index=['x', 'y'])

x    1

y    2

┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓

┃             │    'sum'    │   ['sum']   │ {'s': 'sum'}  ┃

┠─────────────┼─────────────┼─────────────┼───────────────┨

┃ sr.apply(…) │      3      │    sum  3   │     s  3      ┃

┃ sr.agg(…)   │             │             │               ┃

┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛

┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓

┃             │    'rank'   │   ['rank']  │ {'r': 'rank'} ┃

┠─────────────┼─────────────┼─────────────┼───────────────┨

┃ sr.apply(…) │             │      rank   │               ┃

┃ sr.agg(…)   │     x  1    │   x     1   │    r  x  1    ┃

┃ sr.trans(…) │     y  2    │   y     2   │       y  2    ┃

┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛

Last result has a hierarchical index. Use '<Sr>[key_1, key_2]' to get its values.

DataFrame

Table with labeled rows and columns.


>>> DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])

   x  y

a  1  2

b  3  4

<DF>    = DataFrame(<list_of_rows>)           # Rows can be either lists, dicts or series.

<DF>    = DataFrame(<dict_of_columns>)        # Columns can be either lists, dicts or series.

<el>    = <DF>.loc[row_key, column_key]       # Or: <DF>.iloc[row_index, column_index]

<Sr/DF> = <DF>.loc[row_key/s]                 # Or: <DF>.iloc[row_index/es]

<Sr/DF> = <DF>.loc[:, column_key/s]           # Or: <DF>.iloc[:, column_index/es]

<DF>    = <DF>.loc[row_bools, column_bools]   # Or: <DF>.iloc[row_bools, column_bools]

<Sr/DF> = <DF>[column_key/s]                  # Or: <DF>.column_key

<DF>    = <DF>[row_bools]                     # Keeps rows as specified by bools.

<DF>    = <DF>[<DF_of_bools>]                 # Assigns NaN to False values.

<DF>    = <DF> ><== <el/Sr/DF>                # Returns DF of bools. Sr is treated as a row.

<DF>    = <DF> +-*/ <el/Sr/DF>                # Items with non-matching keys get value NaN.

<DF>    = <DF>.set_index(column_key)          # Replaces row keys with values from a column.

<DF>    = <DF>.reset_index()                  # Moves row keys to a column named index.

<DF>    = <DF>.filter('<regex>', axis=1)      # Only keeps columns whose key matches the regex.

<DF>    = <DF>.melt(id_vars=column_key/s)     # Converts DataFrame from wide to long format.

Merge, Join, Concat:

>>> l = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])

   x  y 

a  1  2 

b  3  4 

>>> r = DataFrame([[4, 5], [6, 7]], index=['b', 'c'], columns=['y', 'z'])

   y  z

b  4  5

c  6  7

┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━┓

┃                        │    'outer'    │   'inner'  │   'left'   │       Description        ┃

┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨

┃ l.merge(r, on='y',     │    x   y   z  │ x   y   z  │ x   y   z  │ Joins/merges on column.  ┃

┃            how=…)      │ 0  1   2   .  │ 3   4   5  │ 1   2   .  │ Also accepts left_on and ┃

┃                        │ 1  3   4   5  │            │ 3   4   5  │ right_on parameters.     ┃

┃                        │ 2  .   6   7  │            │            │ Uses 'inner' by default. ┃

┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨

┃ l.join(r, lsuffix='l', │    x yl yr  z │            │ x yl yr  z │ Joins/merges on row keys.┃

┃           rsuffix='r', │ a  1  2  .  . │ x yl yr  z │ 1  2  .  . │ Uses 'left' by default.  ┃

┃           how=…)       │ b  3  4  4  5 │ 3  4  4  5 │ 3  4  4  5 │ If r is a series, it is  ┃

┃                        │ c  .  .  6  7 │            │            │ treated as a column.     ┃

┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨

┃ pd.concat([l, r],      │    x   y   z  │     y      │            │ Adds rows at the bottom. ┃

┃           axis=0,      │ a  1   2   .  │     2      │            │ Uses 'outer' by default. ┃

┃           join=…)      │ b  3   4   .  │     4      │            │ A series is treated as a ┃

┃                        │ b  .   4   5  │     4      │            │ column. Use l.append(r)  ┃

┃                        │ c  .   6   7  │     6      │            │ to add a row instead.    ┃

┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨

┃ pd.concat([l, r],      │    x  y  y  z │            │            │ Adds columns at the      ┃

┃           axis=1,      │ a  1  2  .  . │ x  y  y  z │            │ right end. Uses 'outer'  ┃

┃           join=…)      │ b  3  4  4  5 │ 3  4  4  5 │            │ by default. A series is  ┃

┃                        │ c  .  .  6  7 │            │            │ treated as a column.     ┃

┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨

┃ l.combine_first(r)     │    x   y   z  │            │            │ Adds missing rows and    ┃

┃                        │ a  1   2   .  │            │            │ columns. Also updates    ┃

┃                        │ b  3   4   5  │            │            │ items that contain NaN.  ┃

┃                        │ c  .   6   7  │            │            │ R must be a DataFrame.   ┃

┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛

Aggregate, Transform, Map:

<Sr> = <DF>.sum/max/mean/idxmax/all()         # Or: <DF>.apply/agg/transform(<agg_func>)

<DF> = <DF>.rank/diff/cumsum/ffill/interpl()  # Or: <DF>.apply/agg/transform(<trans_func>)

<DF> = <DF>.fillna(<el>)                      # Or: <DF>.applymap(<map_func>)

All operations operate on columns by default. Use 'axis=1' parameter to process the rows instead.

>>> df = DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['x', 'y'])

   x  y

a  1  2

b  3  4

┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓

┃             │    'sum'    │   ['sum']   │ {'x': 'sum'}  ┃

┠─────────────┼─────────────┼─────────────┼───────────────┨

┃ df.apply(…) │             │       x  y  │               ┃

┃ df.agg(…)   │     x  4    │  sum  4  6  │     x  4      ┃

┃             │     y  6    │             │               ┃

┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛

┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓

┃             │    'rank'   │   ['rank']  │ {'x': 'rank'} ┃

┠─────────────┼─────────────┼─────────────┼───────────────┨

┃ df.apply(…) │      x  y   │      x    y │        x      ┃

┃ df.agg(…)   │   a  1  1   │   rank rank │     a  1      ┃

┃ df.trans(…) │   b  2  2   │ a    1    1 │     b  2      ┃

┃             │             │ b    2    2 │               ┃

┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛

Use '<DF>[col_key_1, col_key_2][row_key]' to get the fifth result's values.

Encode, Decode:

<DF> = pd.read_json/html('<str/path/url>')

<DF> = pd.read_csv/pickle/excel('<path/url>')

<DF> = pd.read_sql('<table_name/query>', <connection>)

<DF> = pd.read_clipboard()

<dict> = <DF>.to_dict(['d/l/s/sp/r/i'])

<str>  = <DF>.to_json/html/csv/markdown/latex([<path>])

<DF>.to_pickle/excel(<path>)

<DF>.to_sql('<table_name>', <connection>)

GroupBy

Object that groups together rows of a dataframe based on the value of the passed column.


>>> df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 6]], index=list('abc'), columns=list('xyz'))

>>> df.groupby('z').get_group(3)

   x  y

a  1  2

>>> df.groupby('z').get_group(6)

   x  y

b  4  5

c  7  8

<GB> = <DF>.groupby(column_key/s)             # DF is split into groups based on passed column.

<DF> = <GB>.get_group(group_key/s)            # Selects a group by value of grouping column.

Aggregate, Transform, Map:

<DF> = <GB>.sum/max/mean/idxmax/all()         # Or: <GB>.apply/agg(<agg_func>)

<DF> = <GB>.rank/diff/cumsum/ffill()          # Or: <GB>.aggregate(<trans_func>)  

<DF> = <GB>.fillna(<el>)                      # Or: <GB>.transform(<map_func>)

>>> gb = df.groupby('z')

      x  y  z

3: a  1  2  3

6: b  4  5  6

   c  7  8  6

┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓

┃             │    'sum'    │    'rank'   │   ['rank']  │ {'x': 'rank'} ┃

┠─────────────┼─────────────┼─────────────┼─────────────┼───────────────┨

┃ gb.agg(…)   │      x   y  │      x  y   │      x    y │        x      ┃

┃             │  z          │   a  1  1   │   rank rank │     a  1      ┃

┃             │  3   1   2  │   b  1  1   │ a    1    1 │     b  1      ┃

┃             │  6  11  13  │   c  2  2   │ b    1    1 │     c  2      ┃

┃             │             │             │ c    2    2 │               ┃

┠─────────────┼─────────────┼─────────────┼─────────────┼───────────────┨

┃ gb.trans(…) │      x   y  │      x  y   │             │               ┃

┃             │  a   1   2  │   a  1  1   │             │               ┃

┃             │  b  11  13  │   b  1  1   │             │               ┃

┃             │  c  11  13  │   c  1  1   │             │               ┃

┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛

Rolling

Object for rolling window calculations.


<R_Sr/R_DF/R_GB> = <Sr/DF/GB>.rolling(window_size)  # Also: `min_periods=None, center=False`.

<R_Sr/R_DF>      = <R_DF/R_GB>[column_key/s]        # Or: <R>.column_key

<Sr/DF/DF>       = <R_Sr/R_DF/R_GB>.sum/max/mean()  # Or: <R>.apply/agg(<agg_func/str>)

#Plotly

# $ pip3 install plotly kaleido

from plotly.express import line

<Figure> = line(<DF>, x=<col_name>, y=<col_name>)        # Or: line(x=<list>, y=<list>)

<Figure>.update_layout(margin=dict(t=0, r=0, b=0, l=0))  # Or: paper_bgcolor='rgba(0, 0, 0, 0)'

<Figure>.write_html/json/image('<path>')                 # Also: <Figure>.show()

Covid deaths by continent:

Apr 2020May 2020Jun 2020Jul 2020Aug 2020Sep 2020Oct 2020Nov 20200200400600

ContinentSouth AmericaNorth AmericaEuropeAsiaAfricaOceaniaDateTotal Deaths per Million

covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv', 

                    usecols=['iso_code', 'date', 'total_deaths', 'population'])

continents = pd.read_csv('https://datahub.io/JohnSnowLabs/country-and-continent-codes-' + \

                         'list/r/country-and-continent-codes-list-csv.csv',

                         usecols=['Three_Letter_Country_Code', 'Continent_Name'])

df = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')

df = df.groupby(['Continent_Name', 'date']).sum().reset_index()

df['Total Deaths per Million'] = df.total_deaths * 1e6 / df.population

df = df[('2020-03-14' < df.date) & (df.date < '2020-11-25')]

df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')

line(df, x='Date', y='Total Deaths per Million', color='Continent').show()

Confirmed covid cases, Dow Jones, gold, and Bitcoin price:

Mar 2020Apr 2020May 2020Jun 2020Jul 2020Aug 2020Sep 2020Oct 2020Nov 2020010M20M30M40M50M60M050100150200

Total CasesBitcoinGoldDow JonesTotal Cases%

import pandas as pd

import plotly.graph_objects as go


def main():

    display_data(wrangle_data(*scrape_data()))


def scrape_data():

    def scrape_covid():

        url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'

        df = pd.read_csv(url, usecols=['location', 'date', 'total_cases'])

        return df[df.location == 'World'].set_index('date').total_cases

    def scrape_yahoo(slug):

        url = f'https://query1.finance.yahoo.com/v7/finance/download/{slug}' + \

              '?period1=1579651200&period2=1608850800&interval=1d&events=history'

        df = pd.read_csv(url, usecols=['Date', 'Close'])

        return df.set_index('Date').Close

    return scrape_covid(), scrape_yahoo('BTC-USD'), scrape_yahoo('GC=F'), scrape_yahoo('^DJI')


def wrangle_data(covid, bitcoin, gold, dow):

    df = pd.concat([bitcoin, gold, dow], axis=1)

    df = df.sort_index().interpolate()

    df = df.rolling(10, min_periods=1, center=True).mean()

    df = df.loc['2020-02-23':'2020-11-25']

    df = (df / df.iloc[0]) * 100

    return pd.concat([covid, df], axis=1, join='inner')


def display_data(df):

    df.columns = ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones']

    figure = go.Figure()

    for col_name in df:

        yaxis = 'y1' if col_name == 'Total Cases' else 'y2'

        trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis)

        figure.add_trace(trace)

    figure.update_layout(

        yaxis1=dict(title='Total Cases', rangemode='tozero'),

        yaxis2=dict(title='%', rangemode='tozero', overlaying='y', side='right'),

        legend=dict(x=1.1)

    ).show()


if __name__ == '__main__':

    main()

#PySimpleGUI

# $ pip3 install PySimpleGUI

import PySimpleGUI as sg

layout = [[sg.Text("What's your name?")], [sg.Input()], [sg.Button('Ok')]]

window = sg.Window('Window Title', layout)

event, values = window.read()

print(f'Hello {values[0]}!' if event == 'Ok' else '')

#Appendix

Cython

Library that compiles Python code into C.


# $ pip3 install cython

import pyximport; pyximport.install()

import <cython_script>

<cython_script>.main()

Definitions:

All 'cdef' definitions are optional, but they contribute to the speed-up.

Script needs to be saved with a 'pyx' extension.

cdef <type> <var_name> = <el>

cdef <type>[n_elements] <var_name> = [<el_1>, <el_2>, ...]

cdef <type/void> <func_name>(<type> <arg_name_1>, ...):

cdef class <class_name>:

    cdef public <type> <attr_name>

    def __init__(self, <type> <arg_name>):

        self.<attr_name> = <arg_name>

cdef enum <enum_name>: <member_name_1>, <member_name_2>, ...

PyInstaller

$ pip3 install pyinstaller

$ pyinstaller script.py                        # Compiles into './dist/script' directory.

$ pyinstaller script.py --onefile              # Compiles into './dist/script' console app.

$ pyinstaller script.py --windowed             # Compiles into './dist/script' windowed app.

$ pyinstaller script.py --add-data '<path>:.'  # Adds file to the root of the executable.

File paths need to be updated to 'os.path.join(sys._MEIPASS, <path>)'.

Basic Script Template

#!/usr/bin/env python3

#

# Usage: .py

#


from sys import argv, exit

from collections import defaultdict, namedtuple

from dataclasses import make_dataclass

from enum import Enum

import functools as ft, itertools as it, operator as op, re



def main():

    pass



###

##  UTIL

#


def read_file(filename):

    with open(filename, encoding='utf-8') as file:

        return file.readlines()



if __name__ == '__main__':

    main()

#Index

Only available in the PDF.

Ctrl+F / ⌘F is usually sufficient.

Searching '#<title>' will limit the search to the titles.

Признаки "плохого кода"

Кратко рассмотрим 12 признаков, когда код можно улучшить: 1. Duplicated Code  — иногда повторяющийся код не всегда несет в себе пользу. Выде...