Iterator Protocol — __iter__, __next__, StopIteration
The iterator is at the heart of Python's iteration processing. Fully understand how for loops work internally and how to implement custom iterators.
Iterable vs Iterator Difference
| Iterable | Iterator | |
|---|---|---|
| Definition | Object that has __iter__() method | Object that has both __iter__() and __next__() |
| Examples | list, tuple, str, dict, set | enumerate, zip, map, filter, generator |
| Characteristic | Can be iterated multiple times | Consumed once, maintains state |
| Relationship | Returns an iterator when iter() is called | Is also an iterable (returns itself) |
# Iterable: can be iterated multiple times
my_list = [1, 2, 3]
for x in my_list: # First iteration
pass
for x in my_list: # Second iteration — starts from the beginning again
print(x) # 1, 2, 3
# Iterator: consumed once
my_iter = iter(my_list)
print(next(my_iter)) # 1
print(next(my_iter)) # 2
print(next(my_iter)) # 3
print(next(my_iter)) # StopIteration!
# An iterator is also an iterable
my_iter2 = iter(my_list)
for x in my_iter2: # Can use for directly on an iterator
print(x)
# Iterating again yields nothing (exhausted)
for x in my_iter2:
print(x) # No output
__iter__ / __next__ Protocol
# Iterable protocol
class MyIterable:
def __iter__(self):
"""Return an iterator"""
return MyIterator(self.data)
# Iterator protocol
class MyIterator:
def __iter__(self):
"""Return the iterator itself (it is also an iterable)"""
return self
def __next__(self):
"""Return the next value, raise StopIteration when exhausted"""
...
StopIteration Exception
An iterator raises StopIteration when all values have been returned, signaling the end of iteration.
class CountDown:
"""Countdown iterator"""
def __init__(self, start: int):
self.current = start
def __iter__(self):
return self
def __next__(self) -> int:
if self.current <= 0:
raise StopIteration
value = self.current
self.current -= 1
return value
countdown = CountDown(3)
for n in countdown:
print(n) # 3, 2, 1
iter() / next() Built-in Functions
# iter(iterable) — return an iterator
it = iter([10, 20, 30])
print(next(it)) # 10
print(next(it)) # 20
# next(iterator, default) — specify default (prevents StopIteration)
print(next(it)) # 30
print(next(it, "done")) # "done" (instead of StopIteration)
# iter(callable, sentinel) — two-argument form
import random
# Repeatedly call callable until sentinel value appears
roll_until_six = iter(lambda: random.randint(1, 6), 6)
for n in roll_until_six:
print(f"Roll: {n}")
print("Got a 6!")
# Practical pattern: read a file line by line
with open("data.txt") as f:
# Read file using iter(callable, sentinel) pattern
for line in iter(f.readline, ""):
process(line)
Custom Iterator Class Implementation
Example 1: Range Iterator
class Range:
"""Custom range iterator similar to the built-in range()"""
def __init__(self, start: int, stop: int, step: int = 1):
if step == 0:
raise ValueError("step cannot be 0.")
self.start = start
self.stop = stop
self.step = step
def __iter__(self) -> "RangeIterator":
return RangeIterator(self.start, self.stop, self.step)
def __len__(self) -> int:
if self.step > 0:
return max(0, (self.stop - self.start + self.step - 1) // self.step)
else:
return max(0, (self.start - self.stop - self.step - 1) // (-self.step))
def __repr__(self) -> str:
return f"Range({self.start}, {self.stop}, {self.step})"
class RangeIterator:
def __init__(self, start: int, stop: int, step: int):
self.current = start
self.stop = stop
self.step = step
def __iter__(self) -> "RangeIterator":
return self
def __next__(self) -> int:
if (self.step > 0 and self.current >= self.stop) or \
(self.step < 0 and self.current <= self.stop):
raise StopIteration
value = self.current
self.current += self.step
return value
# Separated iterable and iterator: can be iterated multiple times
r = Range(1, 10, 2)
print(list(r)) # [1, 3, 5, 7, 9]
print(list(r)) # [1, 3, 5, 7, 9] — reusable!
print(len(r)) # 5
Example 2: Fibonacci Iterator
class Fibonacci:
"""Infinite Fibonacci sequence iterator"""
def __init__(self, limit: int | None = None):
self.limit = limit
def __iter__(self) -> "FibIterator":
return FibIterator(self.limit)
class FibIterator:
def __init__(self, limit: int | None):
self.a, self.b = 0, 1
self.limit = limit
self.count = 0
def __iter__(self) -> "FibIterator":
return self
def __next__(self) -> int:
if self.limit is not None and self.count >= self.limit:
raise StopIteration
value = self.a
self.a, self.b = self.b, self.a + self.b
self.count += 1
return value
# First 10 Fibonacci numbers
print(list(Fibonacci(10))) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
# Take only those under 100 from infinite Fibonacci
import itertools
fib_under_100 = list(itertools.takewhile(lambda x: x < 100, Fibonacci()))
print(fib_under_100) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]
Example 3: File Chunk Iterator
from pathlib import Path
class ChunkReader:
"""Iterator that reads large files in fixed-size chunks"""
def __init__(self, filepath: str | Path, chunk_size: int = 4096):
self.filepath = Path(filepath)
self.chunk_size = chunk_size
def __iter__(self) -> "ChunkReaderIterator":
return ChunkReaderIterator(self.filepath, self.chunk_size)
class ChunkReaderIterator:
def __init__(self, filepath: Path, chunk_size: int):
self.chunk_size = chunk_size
self._file = open(filepath, "rb")
def __iter__(self) -> "ChunkReaderIterator":
return self
def __next__(self) -> bytes:
chunk = self._file.read(self.chunk_size)
if not chunk:
self._file.close()
raise StopIteration
return chunk
def __del__(self):
"""Close file on destruction"""
if hasattr(self, "_file") and not self._file.closed:
self._file.close()
# Usage
for chunk in ChunkReader("large_file.bin"):
process_chunk(chunk)
How for Loops Work Internally
# Actual behavior of a for loop
my_list = [1, 2, 3]
for x in my_list:
print(x)
# The code above works internally like this:
_iter = iter(my_list) # calls __iter__()
while True:
try:
x = next(_iter) # calls __next__()
print(x)
except StopIteration:
break # End iteration
# The in operator also uses the iterator protocol
# x in iterable → sequential search internally
print(3 in [1, 2, 3, 4, 5]) # True — O(n) search
print(3 in {1, 2, 3, 4, 5}) # True — O(1) hash search
# list(iterable) — exhausts the iterator while building a list
from io import StringIO
f = StringIO("line1\nline2\nline3")
lines = list(f) # ['line1\n', 'line2\n', 'line3']
Practical Example: CSV Paging Iterator
import csv
from pathlib import Path
from typing import Iterator
class CsvPageIterator:
"""Iterator that reads a CSV file in page-sized chunks"""
def __init__(self, filepath: str | Path, page_size: int = 100):
self.filepath = Path(filepath)
self.page_size = page_size
def __iter__(self) -> Iterator[list[dict]]:
page: list[dict] = []
with open(self.filepath, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
page.append(dict(row))
if len(page) >= self.page_size:
yield page
page = []
if page: # Last page
yield page
def __len__(self) -> int:
"""Total page count (requires full file scan)"""
total_rows = sum(1 for _ in open(self.filepath)) - 1 # Exclude header
return (total_rows + self.page_size - 1) // self.page_size
# Usage
pager = CsvPageIterator("users.csv", page_size=50)
for page_num, page in enumerate(pager, 1):
print(f"Page {page_num}: processing {len(page)} rows")
for row in page:
process_row(row)
Expert Tips
Tip 1: Separating iterable and iterator enables reuse
# Bad example: iterable and iterator in the same class
class BadRange:
def __iter__(self): return self
# Cannot be reused once exhausted
# Good example: separated
class GoodRange:
def __iter__(self): return GoodRangeIterator(...)
class GoodRangeIterator:
def __iter__(self): return self
def __next__(self): ...
Tip 2: Provide hints with __length_hint__
class MyIterator:
def __init__(self, data):
self.data = data
self.index = 0
def __length_hint__(self) -> int:
"""Hint for remaining elements (doesn't need to be exact)"""
return len(self.data) - self.index
Tip 3: Using the two-argument form of iter()
import struct
# Read a binary file 4 bytes at a time
with open("data.bin", "rb") as f:
for chunk in iter(lambda: f.read(4), b""):
value = struct.unpack("I", chunk)[0]
print(value)