Loop in chunks¶
Read csv in chunks¶
In [1]:
import pandas as pd
for chunk in pd.read_csv('chunk-data.csv', chunksize=3):
print(chunk)
Chuncked file read with custom function¶
Create a generator function to read a large file lazily.
In [ ]:
def f123():
print('A')
yield 1
print('B')
yield 2
print('C')
yield 3
print(type(f123()))
print(f123())
for item in f123():
print(item)
In [20]:
def read_file(file_object):
while True:
data = file_object.readline()
if not data:
break
yield data
with open('chunk-data.csv') as file:
gen_file = read_file(file)
print(gen_file)
# just read the first three lines
print('call read_line')
print(next(gen_file))
print('call read_line')
print(next(gen_file))
print('call read_line')
print(next(gen_file))
Custom iterator¶
In [27]:
class PrintNumber:
def __init__(self, max):
print('init called')
self.max = max
def __iter__(self):
print('iter called')
self.num = 0
return self
def __next__(self):
print('next called')
if(self.num >= self.max):
raise StopIteration
self.num += 1
return self.num
printNum = PrintNumber(3)
printNumIter = iter(printNum)
# prints '1'
print(next(printNumIter))
# prints '2'
print(next(printNumIter))
# prints '3'
print(next(printNumIter))
# raises StopIteration
try:
print(type(next(printNumIter)))
except StopIteration:
print('no more data available')