itertools
这里主要介绍itertools的常用函数
accumulate(iterable[, func])
将一个二元操作的函数作用于一个可迭代对象上,每次循环计算时,函数的两个参数一个是可迭代对象中当前值,另一个是上次计算得到的结果。函数的返回值是一个由每次计算得到的结果组成的可迭代对象。
相当于如下功能:
def accumulate(iterable, func=operator.add):
'Return running totals'
# accumulate([1,2,3,4,5]) --> 1 3 6 10 15
# accumulate([1,2,3,4,5], operator.mul) --> 1 2 6 24 120
it = iter(iterable)
try:
total = next(it)
except StopIteration:
return
yield total
for element in it:
total = func(total, element)
yield total
二元操作函数可以是如下:
min()
: 计算最小值max()
: 计算最大值operator.mul()
: 叠乘operator.add()
: 叠加
使用示例:
>>> data = [3, 4, 6, 2, 1, 9, 0, 7, 5, 8]
>>> list(accumulate(data, operator.mul)) # 叠乘
[3, 12, 72, 144, 144, 1296, 0, 0, 0, 0]
>>> list(accumulate(data, max)) # 计算最大值
[3, 4, 6, 6, 6, 9, 9, 9, 9, 9]
# 将上一个结果乘1.25,然后加上下一个迭代值
>>> cashflows = [1000, -90, -90, -90, -90]
>>> list(accumulate(cashflows, lambda bal, pmt: bal*1.05 + pmt))
[1000, 960.0, 918.0, 873.9000000000001, 827.5950000000001]
# 这个示例相当于单目迭代运算
>>> logistic_map = lambda x, _: r * x * (1 - x)
>>> r = 3.8
>>> x0 = 0.4
>>> inputs = repeat(x0, 36) # 初始化值
>>> [format(x, '.2f') for x in accumulate(inputs, logistic_map)]
['0.40', '0.91', '0.30', '0.81', '0.60', '0.92', '0.29', '0.79', '0.63',
'0.88', '0.39', '0.90', '0.33', '0.84', '0.52', '0.95', '0.18', '0.57',
'0.93', '0.25', '0.71', '0.79', '0.63', '0.88', '0.39', '0.91', '0.32',
'0.83', '0.54', '0.95', '0.20', '0.60', '0.91', '0.30', '0.80', '0.60']
chain(*iterables)
将多个可迭代对象进行合并,相当于如下代码:
def chain(*iterables):
# chain('ABC', 'DEF') --> A B C D E F
for it in iterables:
for element in it:
yield element
使用示例:
>>> from itertools import chain
>>> chain([1, 2, 3], [4, 5, 6])
<itertools.chain object at 0x7f751ad90b70>
>>> a = chain([1, 2, 3], [4, 5, 6])
>>> for i in a:
... print(i)
...
1
2
3
4
5
6
combinations(iterable, r)
将可迭代对象中每r个元素按序进行组合,功能相当于:
def combinations(iterable, r):
# combinations('ABCD', 2) --> AB AC AD BC BD CD
# combinations(range(4), 3) --> 012 013 023 123
pool = tuple(iterable)
n = len(pool)
if r > n:
return
indices = list(range(r))
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != i + n - r:
break
else:
return
indices[i] += 1
for j in range(i+1, r):
indices[j] = indices[j-1] + 1
yield tuple(pool[i] for i in indices)
使用示例:
>>> from itertools import combinations
>>> comb = combinations('abcd', 3)
>>> for i in comb:
... print(i)
...
('a', 'b', 'c')
('a', 'b', 'd')
('a', 'c', 'd')
('b', 'c', 'd')
combinations_with_replacement(iterable, r)
按照顺序从可迭代对象中取r个元素进行组合,允许使用重复的元素,功能相当于:
def combinations_with_replacement(iterable, r):
# combinations_with_replacement('ABC', 2) --> AA AB AC BB BC CC
pool = tuple(iterable)
n = len(pool)
if not n and r:
return
indices = [0] * r
yield tuple(pool[i] for i in indices)
while True:
for i in reversed(range(r)):
if indices[i] != n - 1:
break
else:
return
indices[i:] = [indices[i] + 1] * (r - i)
yield tuple(pool[i] for i in indices)
使用示例:
>>> from itertools import combinations_with_replacement
>>> a = combinations_with_replacement('abc', 2)
>>> for i in a:
... print(i)
...
('a', 'a')
('a', 'b')
('a', 'c')
('b', 'b')
('b', 'c')
('c', 'c')
compress(data, selectors)
将selectors中为值为True的位置在data对应的值返回,相当于如下代码:
def compress(data, selectors):
# compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F
return (d for d, s in zip(data, selectors) if s)
使用示例:
>>> from itertools import compress
>>> a = compress('abcdef', [1, 0, 1, 0, 1, 1])
>>> for i in a:
... print(i)
...
a
c
e
f
count(start=0, step=1)
从start开始每次加step组成一个可迭代对象,相当于:
def count(start=0, step=1):
# count(10) --> 10 11 12 13 14 ...
# count(2.5, 0.5) -> 2.5 3.0 3.5 ...
n = start
while True:
yield n
n += step
cycle(iterable)
循环迭代,依次从一个可迭代对象中取元素,当到达最后一个元素之后又返回至第一个元素,相当于:
def cycle(iterable):
# cycle('ABCD') --> A B C D A B C D A B C D ...
saved = []
for element in iterable:
yield element
saved.append(element)
while saved:
for element in saved:
yield element
dropwhile(predicate, iterable)
从第一个元素开始,移除满足predicate为True的元素直到遇到使predicate的值为False,返回后面的所有元素。相当于:
def dropwhile(predicate, iterable):
# dropwhile(lambda x: x<5, [1,4,6,4,1]) --> 6 4 1
iterable = iter(iterable)
for x in iterable:
if not predicate(x):
yield x
break
for x in iterable:
yield x
使用示例:
>>> from itertools import dropwhile
>>> dropwhile(lambda x: x<5, [1, 4, 6, 4, 1])
<itertools.dropwhile object at 0x7f94c3507888>
>>> for x in dropwhile(lambda x: x<5, [1, 4, 6, 4, 1]):
... print(x)
...
6
4
1
filterfalse(predicate, iterable)
移除所有使得predicate为False的元素,相当于:
def filterfalse(predicate, iterable):
# filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8
if predicate is None:
predicate = bool
for x in iterable:
if not predicate(x):
yield x
groupby(iterable, key=None)
按照key定义的规则对可迭代对象进行分组,相当于:
class groupby:
# [k for k, g in groupby('AAAABBBCCDAABBB')] --> A B C D A B
# [list(g) for k, g in groupby('AAAABBBCCD')] --> AAAA BBB CC D
def __init__(self, iterable, key=None):
if key is None:
key = lambda x: x
self.keyfunc = key
self.it = iter(iterable)
self.tgtkey = self.currkey = self.currvalue = object()
def __iter__(self):
return self
def __next__(self):
while self.currkey == self.tgtkey:
self.currvalue = next(self.it) # Exit on StopIteration
self.currkey = self.keyfunc(self.currvalue)
self.tgtkey = self.currkey
return (self.currkey, self._grouper(self.tgtkey))
def _grouper(self, tgtkey):
while self.currkey == tgtkey:
yield self.currvalue
try:
self.currvalue = next(self.it)
except StopIteration:
return
self.currkey = self.keyfunc(self.currvalue)
使用示例:
>>> from itertools import groupby
>>> a = ['aa', 'ab', 'abc', 'bcd', 'abcde']
>>> for i, k in groupby(a, len):
... print(i, list(k))
...
2 ['aa', 'ab']
3 ['abc', 'bcd']
5 ['abcde']
>>> from itertools import groupby
>>> qs = [{'data': 1}, {'data': 2}]
>>> [(name, list(group)) for name, group in groupby(qs, lambda p: p['data'])]
[(1, [{'data': 1}]), (2, [{'data': 2}])]
islice(iterable, stop)
islice(iterable, start, stop[, step])
通过起始位置和步长从可迭代对象中取出元素,相当于:
def islice(iterable, *args):
# islice('ABCDEFG', 2) --> A B
# islice('ABCDEFG', 2, 4) --> C D
# islice('ABCDEFG', 2, None) --> C D E F G
# islice('ABCDEFG', 0, None, 2) --> A C E G
s = slice(*args)
it = iter(range(s.start or 0, s.stop or sys.maxsize, s.step or 1))
try:
nexti = next(it)
except StopIteration:
return
for i, element in enumerate(iterable):
if i == nexti:
yield element
nexti = next(it)
permutations(iterable, r=None)
从可迭代对象中取出任意r个元素排列组合,返回所有可能的结果,相当于:
def permutations(iterable, r=None):
# permutations('ABCD', 2) --> AB AC AD BA BC BD CA CB CD DA DB DC
# permutations(range(3)) --> 012 021 102 120 201 210
pool = tuple(iterable)
n = len(pool)
r = n if r is None else r
if r > n:
return
indices = list(range(n))
cycles = list(range(n, n-r, -1))
yield tuple(pool[i] for i in indices[:r])
while n:
for i in reversed(range(r)):
cycles[i] -= 1
if cycles[i] == 0:
indices[i:] = indices[i+1:] + indices[i:i+1]
cycles[i] = n - i
else:
j = cycles[i]
indices[i], indices[-j] = indices[-j], indices[i]
yield tuple(pool[i] for i in indices[:r])
break
else:
return
product(*args, repeat=1)
返回多个可迭代对象的笛卡尔集,相当于:
def product(*args, repeat=1):
# product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy
# product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111
pools = [tuple(pool) for pool in args] * repeat
result = [[]]
for pool in pools:
result = [x+[y] for x in result for y in pool]
for prod in result:
yield tuple(prod)
使用示例:
>>> from itertools import product
>>> a = (1, 2, 3)
>>> b = ('A', 'B', 'C')
>>> c = ('d', 'e', 'f')
>>> pros = product(a, b, c)
>>> for elem in pros:
... print(elem)
...
(1, 'A', 'd')
(1, 'A', 'e')
(1, 'A', 'f')
(1, 'B', 'd')
(1, 'B', 'e')
(1, 'B', 'f')
(1, 'C', 'd')
(1, 'C', 'e')
(1, 'C', 'f')
(2, 'A', 'd')
(2, 'A', 'e')
(2, 'A', 'f')
(2, 'B', 'd')
(2, 'B', 'e')
(2, 'B', 'f')
(2, 'C', 'd')
(2, 'C', 'e')
(2, 'C', 'f')
(3, 'A', 'd')
(3, 'A', 'e')
(3, 'A', 'f')
(3, 'B', 'd')
(3, 'B', 'e')
(3, 'B', 'f')
(3, 'C', 'd')
(3, 'C', 'e')
(3, 'C', 'f')
repeat(object[, times])
重复一个对象times次,如果没有定义times则一直重复,相当于:
def repeat(object, times=None):
# repeat(10, 3) --> 10 10 10
if times is None:
while True:
yield object
else:
for i in range(times):
yield object
starmap(function, iterable)
将可迭代对象的每个元素作为参数执行function,相当于:
def starmap(function, iterable):
# starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000
for args in iterable:
yield function(*args)
takewhile(predicate, iterable)
从可迭代对象的第一个元素开始,返回满足predicate为True的值,当predicate值为False则终止,相当于:
def takewhile(predicate, iterable):
# takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4
for x in iterable:
if predicate(x):
yield x
else:
break
tee(iterable, n=2)
从 iterable 创建 n 个独立的迭代器,以元组的形式返回,n 的默认值是 2,相当于:
def tee(iterable, n=2):
it = iter(iterable)
deques = [collections.deque() for i in range(n)]
def gen(mydeque):
while True:
if not mydeque: # when the local deque is empty
try:
newval = next(it) # fetch a new value and
except StopIteration:
return
for d in deques: # load it to all the deques
d.append(newval)
yield mydeque.popleft()
return tuple(gen(d) for d in deques)
使用示例:
>>> from itertools import tee
>>> iter1, iter2 = tee('abcde')
>>> list(iter1)
['a', 'b', 'c', 'd', 'e']
>>> list(iter2)
['a', 'b', 'c', 'd', 'e']
>>> tee('abcde', 3)
(<itertools._tee object at 0x7f94c3507dc8>, <itertools._tee object at 0x7f94c3507d48>, <itertools._tee object at 0x7f94c3507e08>)
zip_longest(*iterables, fillvalue=None)
依次从每个iterables中取出一个元素进行组合,当短的iterable取完了时用fillvalue进行填充,相当于:
class ZipExhausted(Exception):
pass
def zip_longest(*args, **kwds):
# zip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
fillvalue = kwds.get('fillvalue')
counter = len(args) - 1
def sentinel():
nonlocal counter
if not counter:
raise ZipExhausted
counter -= 1
yield fillvalue
fillers = repeat(fillvalue)
iterators = [chain(it, sentinel(), fillers) for it in args]
try:
while iterators:
yield tuple(map(next, iterators))
except ZipExhausted:
pass