python 使用标题名称和列号的组合选择列,支持通配符,正则表达式,切片,索引和电子表格样式的co
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 使用标题名称和列号的组合选择列,支持通配符,正则表达式,切片,索引和电子表格样式的co相关的知识,希望对你有一定的参考价值。
# -*- coding: utf-8 -*-
"""
Allows header names to be used in slice strings
===============================================
Translates column header names in slice strings to indices. Configurable
support for single & double-quoted names, wildcards, regular expressions,
spreadsheet-style alpha ID column headers, column numbers, and hybrid
combinations of these types.
headers is a list of strings or
a dictionary { column-index: name, ...}
where column index is zero-based if origin is 0 or unit-based if origin is 1
"""
import re
from collections import OrderedDict, Sequence
from inspect import getargspec
#import pyparsing as pp
#from pyparsing import (CharsNotIn, Group, Literal, Or, Regex, ParseException,
# ZeroOrMore)
from toolz import curry, identity, keymap
from slugify import UniqueSlugify
from alphaid import AlphaID
REGEX = type(re.compile(''))
UNIQUE_SLUGS = curry(UniqueSlugify(to_lower=True))
SEPARATORS = ('list_sep', 'range_sep', 'step_sep')
def regex(pattern):
return re.compile(pattern)
def wildcard(value):
return regex(value.replace('*', '.*').replace('%', '.'))
def slug(value, **kwds):
return UniqueSlugify(value, **kwds)
def slugfunct(slugs, **kwds):
if slugs:
return curry(UniqueSlugify(**kwds)) if kwds else UNIQUE_SLUGS
return identity
def get_headers(headers=None, slugs=True, origin=0, **slugkwds):
#import pdb; pdb.set_trace()
hdrs = headers
if not hdrs:
hdrs = 99
if is_int(hdrs):
alphaids = AlphaID(hdrs)
return alphaids.as_dict(origin)
slug = slugfunct(slugs, **slugkwds)
if isinstance(hdrs, dict):
return keymap(slug, hdrs)
try:
if slugs:
hdrs = map(slug, hdrs)
return OrderedDict((j, i) for i, j in enumerate(hdrs, origin))
except (TypeError, ValueError):
raise TypeError('eaders type must be [str, ...]'
' or OrderedDict((str, int), ...}')
def get_indices(values, headers):
import pdb; pdb.set_trace()
for num, value in enumerate(values):
try:
if is_int(value):
yield value
elif isinstance(value, REGEX):
for key in headers.keys():
if value.match(key):
yield headers[key]
elif not isinstance(value, str) and isinstance(value, Sequence):
yield slice(*(i if is_int(i) else headers[i] for i in value))
else:
yield headers[value]
except (TypeError, ValueError):
pass
def is_int(value):
try:
int(value)
return True
except:
return False
def indices2slices(indices):
"""
yields a minimized sequence of slices from a sequence of indices
:returns: ((start1:stop1:step1), (start2:stop2:step2) ...)
"""
if len(indices) == 1:
yield indices
elif len(indices) > 1:
start, stop, step = (indices[0], None, None)
for idx, value in enumerate(indices[1:]):
span = value - indices[idx]
if step and span != step:
yield (start, stop, None if step == 1 else step)
start, stop, step = (value, value, None)
else:
start, stop, step = (start, value, span)
yield (start, stop, None if step == 1 else step,)
以上是关于python 使用标题名称和列号的组合选择列,支持通配符,正则表达式,切片,索引和电子表格样式的co的主要内容,如果未能解决你的问题,请参考以下文章