Generators

What are Generators?

Generators are memory-efficient iterators that produce values on-demand using the yield keyword.

# Basic generator function
def count_up_to(n):
    count = 1
    while count <= n:
        yield count
        count += 1

# Using the generator
counter = count_up_to(5)
for num in counter:
    print(num)  # Prints 1, 2, 3, 4, 5

# Generator vs List comparison
def numbers_list(n):
    return [i for i in range(n)]  # Creates entire list in memory

def numbers_generator(n):
    for i in range(n):
        yield i  # Produces one value at a time

# Memory usage difference
big_list = numbers_list(1000000)      # Uses ~40MB
big_gen = numbers_generator(1000000)  # Uses minimal memory

Generator Expressions

# Generator expression (like list comprehension but with parentheses)
squares = (x**2 for x in range(10))
print(type(squares))  # <class 'generator'>

for square in squares:
    print(square)  # 0, 1, 4, 9, 16, 25, 36, 49, 64, 81

# Generator expressions are memory efficient
even_squares = (x**2 for x in range(1000000) if x % 2 == 0)
print(sum(even_squares))  # Calculates sum without storing all values

# Chaining generators
def fibonacci():
    a, b = 0, 1
    while True:
        yield a
        a, b = b, a + b

# Take first 10 fibonacci numbers
fib_gen = fibonacci()
first_10_fib = (next(fib_gen) for _ in range(10))
print(list(first_10_fib))  # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]

Advanced Generator Features

# Generator with send() method
def echo_generator():
    while True:
        received = yield
        if received is not None:
            yield f"Echo: {received}"

gen = echo_generator()
next(gen)  # Prime the generator
print(gen.send("Hello"))  # Echo: Hello

# Generator with return value
def generator_with_return():
    yield 1
    yield 2
    yield 3
    return "Generator finished"

gen = generator_with_return()
try:
    while True:
        print(next(gen))
except StopIteration as e:
    print(f"Return value: {e.value}")

# Generator delegation with yield from
def inner_generator():
    yield 1
    yield 2
    yield 3

def outer_generator():
    yield "start"
    yield from inner_generator()  # Delegate to inner generator
    yield "end"

for value in outer_generator():
    print(value)  # start, 1, 2, 3, end

Practical Generator Examples

# File processing generator
def read_large_file(file_path):
    """Read file line by line without loading entire file into memory"""
    with open(file_path, 'r') as file:
        for line in file:
            yield line.strip()

# CSV processing
def process_csv_rows(file_path):
    """Process CSV file row by row"""
    with open(file_path, 'r') as file:
        header = next(file).strip().split(',')
        for line in file:
            values = line.strip().split(',')
            yield dict(zip(header, values))

# Infinite sequence generators
def primes():
    """Generate prime numbers infinitely"""
    yield 2
    candidates = iter(range(3, None, 2))  # Odd numbers starting from 3
    primes_found = [2]
    
    for candidate in candidates:
        is_prime = True
        for prime in primes_found:
            if prime * prime > candidate:
                break
            if candidate % prime == 0:
                is_prime = False
                break
        
        if is_prime:
            primes_found.append(candidate)
            yield candidate

# Get first 10 primes
prime_gen = primes()
first_10_primes = [next(prime_gen) for _ in range(10)]
print(first_10_primes)  # [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]

Generator Pipelines

# Building data processing pipelines
def read_numbers(filename):
    """Generator to read numbers from file"""
    with open(filename, 'r') as f:
        for line in f:
            yield float(line.strip())

def filter_positive(numbers):
    """Filter positive numbers"""
    for num in numbers:
        if num > 0:
            yield num

def square_numbers(numbers):
    """Square each number"""
    for num in numbers:
        yield num ** 2

def take(n, iterable):
    """Take first n items from iterable"""
    for i, item in enumerate(iterable):
        if i >= n:
            break
        yield item

# Pipeline usage
# numbers = read_numbers('data.txt')
# positive = filter_positive(numbers)
# squared = square_numbers(positive)
# first_5 = take(5, squared)
# result = list(first_5)

# Or chain them together
def process_pipeline(filename, n):
    return list(take(n, 
                    square_numbers(
                        filter_positive(
                            read_numbers(filename)))))

# Batch processing generator
def batch_generator(iterable, batch_size):
    """Group items into batches"""
    iterator = iter(iterable)
    while True:
        batch = list(take(batch_size, iterator))
        if not batch:
            break
        yield batch

# Process data in batches
data = range(25)
for batch in batch_generator(data, 5):
    print(f"Processing batch: {batch}")
    # Process each batch...

Generator State and Coroutines

# Stateful generator
def running_average():
    """Calculate running average of sent values"""
    total = 0
    count = 0
    average = None
    
    while True:
        value = yield average
        if value is not None:
            total += value
            count += 1
            average = total / count

avg_gen = running_average()
next(avg_gen)  # Prime the generator

print(avg_gen.send(10))    # 10.0
print(avg_gen.send(20))    # 15.0
print(avg_gen.send(30))    # 20.0

# Generator-based state machine
def traffic_light():
    """Simple traffic light state machine"""
    while True:
        print("Red light")
        yield "red"
        print("Green light")
        yield "green"
        print("Yellow light")
        yield "yellow"

light = traffic_light()
for _ in range(6):
    state = next(light)
    print(f"Current state: {state}")

# Cooperative multitasking with generators
def task1():
    for i in range(5):
        print(f"Task 1 - Step {i}")
        yield

def task2():
    for i in range(3):
        print(f"Task 2 - Step {i}")
        yield

def scheduler(tasks):
    """Simple round-robin scheduler"""
    while tasks:
        for task in tasks[:]:  # Copy list to avoid modification issues
            try:
                next(task)
            except StopIteration:
                tasks.remove(task)

# Run tasks cooperatively
scheduler([task1(), task2()])
💡 When to Use Generators:
← Decorators Next: Async Programming →