Generators are memory-efficient iterators that produce values on-demand using the yield
keyword.
# Basic generator function
def count_up_to(n):
count = 1
while count <= n:
yield count
count += 1
# Using the generator
counter = count_up_to(5)
for num in counter:
print(num) # Prints 1, 2, 3, 4, 5
# Generator vs List comparison
def numbers_list(n):
return [i for i in range(n)] # Creates entire list in memory
def numbers_generator(n):
for i in range(n):
yield i # Produces one value at a time
# Memory usage difference
big_list = numbers_list(1000000) # Uses ~40MB
big_gen = numbers_generator(1000000) # Uses minimal memory
# Generator expression (like list comprehension but with parentheses)
squares = (x**2 for x in range(10))
print(type(squares)) # <class 'generator'>
for square in squares:
print(square) # 0, 1, 4, 9, 16, 25, 36, 49, 64, 81
# Generator expressions are memory efficient
even_squares = (x**2 for x in range(1000000) if x % 2 == 0)
print(sum(even_squares)) # Calculates sum without storing all values
# Chaining generators
def fibonacci():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
# Take first 10 fibonacci numbers
fib_gen = fibonacci()
first_10_fib = (next(fib_gen) for _ in range(10))
print(list(first_10_fib)) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
# Generator with send() method
def echo_generator():
while True:
received = yield
if received is not None:
yield f"Echo: {received}"
gen = echo_generator()
next(gen) # Prime the generator
print(gen.send("Hello")) # Echo: Hello
# Generator with return value
def generator_with_return():
yield 1
yield 2
yield 3
return "Generator finished"
gen = generator_with_return()
try:
while True:
print(next(gen))
except StopIteration as e:
print(f"Return value: {e.value}")
# Generator delegation with yield from
def inner_generator():
yield 1
yield 2
yield 3
def outer_generator():
yield "start"
yield from inner_generator() # Delegate to inner generator
yield "end"
for value in outer_generator():
print(value) # start, 1, 2, 3, end
# File processing generator
def read_large_file(file_path):
"""Read file line by line without loading entire file into memory"""
with open(file_path, 'r') as file:
for line in file:
yield line.strip()
# CSV processing
def process_csv_rows(file_path):
"""Process CSV file row by row"""
with open(file_path, 'r') as file:
header = next(file).strip().split(',')
for line in file:
values = line.strip().split(',')
yield dict(zip(header, values))
# Infinite sequence generators
def primes():
"""Generate prime numbers infinitely"""
yield 2
candidates = iter(range(3, None, 2)) # Odd numbers starting from 3
primes_found = [2]
for candidate in candidates:
is_prime = True
for prime in primes_found:
if prime * prime > candidate:
break
if candidate % prime == 0:
is_prime = False
break
if is_prime:
primes_found.append(candidate)
yield candidate
# Get first 10 primes
prime_gen = primes()
first_10_primes = [next(prime_gen) for _ in range(10)]
print(first_10_primes) # [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
# Building data processing pipelines
def read_numbers(filename):
"""Generator to read numbers from file"""
with open(filename, 'r') as f:
for line in f:
yield float(line.strip())
def filter_positive(numbers):
"""Filter positive numbers"""
for num in numbers:
if num > 0:
yield num
def square_numbers(numbers):
"""Square each number"""
for num in numbers:
yield num ** 2
def take(n, iterable):
"""Take first n items from iterable"""
for i, item in enumerate(iterable):
if i >= n:
break
yield item
# Pipeline usage
# numbers = read_numbers('data.txt')
# positive = filter_positive(numbers)
# squared = square_numbers(positive)
# first_5 = take(5, squared)
# result = list(first_5)
# Or chain them together
def process_pipeline(filename, n):
return list(take(n,
square_numbers(
filter_positive(
read_numbers(filename)))))
# Batch processing generator
def batch_generator(iterable, batch_size):
"""Group items into batches"""
iterator = iter(iterable)
while True:
batch = list(take(batch_size, iterator))
if not batch:
break
yield batch
# Process data in batches
data = range(25)
for batch in batch_generator(data, 5):
print(f"Processing batch: {batch}")
# Process each batch...
# Stateful generator
def running_average():
"""Calculate running average of sent values"""
total = 0
count = 0
average = None
while True:
value = yield average
if value is not None:
total += value
count += 1
average = total / count
avg_gen = running_average()
next(avg_gen) # Prime the generator
print(avg_gen.send(10)) # 10.0
print(avg_gen.send(20)) # 15.0
print(avg_gen.send(30)) # 20.0
# Generator-based state machine
def traffic_light():
"""Simple traffic light state machine"""
while True:
print("Red light")
yield "red"
print("Green light")
yield "green"
print("Yellow light")
yield "yellow"
light = traffic_light()
for _ in range(6):
state = next(light)
print(f"Current state: {state}")
# Cooperative multitasking with generators
def task1():
for i in range(5):
print(f"Task 1 - Step {i}")
yield
def task2():
for i in range(3):
print(f"Task 2 - Step {i}")
yield
def scheduler(tasks):
"""Simple round-robin scheduler"""
while tasks:
for task in tasks[:]: # Copy list to avoid modification issues
try:
next(task)
except StopIteration:
tasks.remove(task)
# Run tasks cooperatively
scheduler([task1(), task2()])