Pythonic Patterns
Generator Functions
Lazy Sequences
You need to process a million records. Loading all into a list would exhaust
memory. A generator with yield produces values one at a time, on demand -
processing millions while using almost no memory.
Basic yield
Use yield instead of return to create a generator.
# Basic Generator Function with yield
print("=== Basic Generator ===\n")
# Regular function returns all at once
limit =
def get_numbers_list():
result = []
for i in range(1, limit + 1):
result.append(i)
return result
numbers = get_numbers_list()
print(f"List function: {numbers}")
print(f"Type: {type(numbers)}")
# Generator function yields one at a time
def get_numbers_gen():
for i in range(1, limit + 1):
yield i # Pause and emit value
gen = get_numbers_gen()
print(f"\nGenerator function: {gen}")
print(f"Type: {type(gen)}")
# Iterate to get values
print("\nIterating:")
for num in gen:
print(f" Got: {num}")
print("\n=== yield vs return ===")
def with_return():
return 1
return 2 # Never reached!
return 3
def with_yield():
yield 1
yield 2 # Reached on second next()
yield 3 # Reached on third next()
print(f"return: {with_return()}")
print(f"yield (list): {list(with_yield())}")
print("\n=== Step by Step ===")
def count_to_three():
print(" Starting...")
yield 1
print(" After first yield...")
yield 2
print(" After second yield...")
yield 3
print(" Done!")
print("Creating generator:")
gen = count_to_three() # Nothing printed yet!
print("(Generator created but not started)")
print("\nIterating:")
for value in gen:
print(f" => Got {value}")
# Basic Generator Function with yield
print("=== Basic Generator ===\n")
# Regular function returns all at once
limit =
def get_numbers_list():
result = []
for i in range(1, limit + 1):
result.append(i)
return result
numbers = get_numbers_list()
print(f"List function: {numbers}")
print(f"Type: {type(numbers)}")
# Generator function yields one at a time
def get_numbers_gen():
for i in range(1, limit + 1):
yield i # Pause and emit value
gen = get_numbers_gen()
print(f"\nGenerator function: {gen}")
print(f"Type: {type(gen)}")
# Iterate to get values
print("\nIterating:")
for num in gen:
print(f" Got: {num}")
print("\n=== yield vs return ===")
def with_return():
return 1
return 2 # Never reached!
return 3
def with_yield():
yield 1
yield 2 # Reached on second next()
yield 3 # Reached on third next()
print(f"return: {with_return()}")
print(f"yield (list): {list(with_yield())}")
print("\n=== Step by Step ===")
def count_to_three():
print(" Starting...")
yield 1
print(" After first yield...")
yield 2
print(" After second yield...")
yield 3
print(" Done!")
print("Creating generator:")
gen = count_to_three() # Nothing printed yet!
print("(Generator created but not started)")
print("\nIterating:")
for value in gen:
print(f" => Got {value}")
yield pauses the function and returns a value. Next iteration resumes.
Lazy evaluation
Values are computed only when needed.
# Lazy Evaluation
print("=== Lazy Evaluation ===\n")
# This function does "work"
def process(n):
print(f" [Processing {n}...]")
return n * 2
# List: processes ALL items immediately
print("Creating list (eager):")
list_result = [process(i) for i in range(5)]
print(f"List created: {list_result}")
print("\nUsing first 2 items:")
for item in list_result[:2]:
print(f" Using: {item}")
print("\n" + "="*40)
# Generator: processes items on demand
def process_gen(items):
for item in items:
yield process(item)
print("\nCreating generator (lazy):")
gen_result = process_gen(range(5))
print(f"Generator created: {gen_result}")
print("(Nothing processed yet!)")
print("\nUsing first 2 items:")
count = 0
for item in gen_result:
print(f" Using: {item}")
count += 1
if count >= 2:
break
print("\nNotice: Only processed items 0 and 1!")
print("\n=== Why Lazy is Better ===")
# Imagine processing millions of records
# List: Must process ALL before using ANY
# Generator: Process only what you need
print("""
List (eager):
✗ Creates all items upfront
✗ Uses memory for all items
✗ Wastes work if you only need some
Generator (lazy):
✓ Creates items on demand
✓ Uses minimal memory
✓ Can stop early, saving work
""")
Nothing runs until you iterate. Each next() runs to the next yield.
Generator state
Generators remember their state between yields.
# Generator State
print("=== Generators Maintain State ===\n")
# Generator remembers where it was
def counter(start=0):
n = start
while True:
yield n
n += 1
# Create counter starting at 10
count = counter(10)
print("Getting values one at a time:")
print(f" next(): {next(count)}") # 10
print(f" next(): {next(count)}") # 11
print(f" next(): {next(count)}") # 12
print(f" next(): {next(count)}") # 13
print("\nGenerator remembered it was at 13!")
print("\n=== State with Multiple Variables ===")
def fibonacci():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
fib = fibonacci()
print("Fibonacci sequence:")
for i in range(10):
print(f" {next(fib)}", end="")
print()
print("\n=== Each Generator is Independent ===")
gen1 = counter(0)
gen2 = counter(100)
print("Two independent counters:")
print(f" gen1: {next(gen1)}, {next(gen1)}, {next(gen1)}") # 0, 1, 2
print(f" gen2: {next(gen2)}, {next(gen2)}, {next(gen2)}") # 100, 101, 102
print(f" gen1 continues: {next(gen1)}") # 3
print(f" gen2 continues: {next(gen2)}") # 103
print("\n=== Generator is Exhaustible ===")
def limited():
yield 1
yield 2
yield 3
gen = limited()
print("Limited generator:")
print(f" {list(gen)}") # [1, 2, 3]
print(f" Again: {list(gen)}") # [] - Empty! Already exhausted
Local variables persist. The function resumes exactly where it left off.
Memory efficiency
Compare memory usage: list vs generator.
# Memory Efficiency
import sys
print("=== Memory Comparison ===\n")
# Create a range of numbers
n = 1000000 # One million
# List: stores all values
number_list = [i for i in range(n)]
list_size = sys.getsizeof(number_list)
print(f"List of {n:,} numbers:")
print(f" Memory: {list_size:,} bytes ({list_size / 1024 / 1024:.2f} MB)")
# Generator: stores only the function state
def number_gen(n):
for i in range(n):
yield i
gen = number_gen(n)
gen_size = sys.getsizeof(gen)
print(f"\nGenerator for {n:,} numbers:")
print(f" Memory: {gen_size:,} bytes")
print(f"\nRatio: List uses {list_size / gen_size:.0f}x more memory!")
print("\n=== Why Generators Use Less Memory ===")
print("""
List stores:
[0, 1, 2, 3, 4, ..., 999999]
→ All 1,000,000 values in memory
Generator stores:
- Current position (integer)
- Local variables
- Code reference
→ Just a few hundred bytes, regardless of size!
""")
print("=== Processing Large Data ===\n")
# Simulating reading a large file line by line
def read_large_data():
for i in range(100): # Simulating 100 "lines"
yield f"Line {i}: " + "x" * 50 # Each "line" is 50+ chars
# With generator: one line at a time
print("Processing 'large file' with generator:")
for i, line in enumerate(read_large_data()):
if i >= 3: # Only show first 3
print(" ...")
break
print(f" {line[:40]}...")
print("\n=== Infinite Sequences ===")
# Impossible with a list!
def infinite_counter():
n = 0
while True:
yield n
n += 1
counter = infinite_counter()
print("Infinite counter (showing first 10):")
print(" ", end="")
for i in range(10):
print(next(counter), end=" ")
print("...")
print("\nA list of infinite numbers would crash!")
print("A generator works because values are computed on demand.")
Lists store all values. Generators store only one value at a time.
The iteration protocol
Understand StopIteration and manual iteration.
# StopIteration and the Iteration Protocol
print("=== How Iteration Works ===\n")
# A simple generator
def count_to_three():
yield 1
yield 2
yield 3
gen = count_to_three()
# Using next() manually
print("Manual iteration with next():")
print(f" next(gen): {next(gen)}") # 1
print(f" next(gen): {next(gen)}") # 2
print(f" next(gen): {next(gen)}") # 3
# What happens when exhausted?
print("\nWhen exhausted:")
try:
value = next(gen)
print(f" Got: {value}")
except StopIteration:
print(" StopIteration raised!")
print("\n=== for Loop Handles StopIteration ===")
# for loop catches StopIteration automatically
print("for loop (clean):")
for num in count_to_three():
print(f" {num}")
print(" (Loop ended cleanly)")
print("\n=== Default Value for next() ===")
gen = count_to_three()
print("Using next() with default:")
# Use default to avoid exception
print(f" {next(gen, 'done')}") # 1
print(f" {next(gen, 'done')}") # 2
print(f" {next(gen, 'done')}") # 3
print(f" {next(gen, 'done')}") # 'done' - no exception!
print(f" {next(gen, 'done')}") # 'done'
print("\n=== The Iteration Protocol ===")
# iter() and next() are the protocol
my_list = [10, 20, 30]
iterator = iter(my_list)
print("List iteration protocol:")
print(f" iter([10,20,30]): {iterator}")
print(f" next(): {next(iterator)}")
print(f" next(): {next(iterator)}")
print(f" next(): {next(iterator)}")
print("\n=== Generators ARE Iterators ===")
gen = count_to_three()
print(f"Generator: {gen}")
print(f"iter(gen): {iter(gen)}")
print(f"Same object: {gen is iter(gen)}")
print("""
Generators implement the iterator protocol:
- __iter__() returns self
- __next__() runs until yield or raises StopIteration
""")
Generator raises StopIteration when exhausted. for loops handle this automatically.
Exercise: practical.py
Real-world generator patterns: file processing, infinite sequences