DataFrame对行列的基本操作实战

Posted 2020-09-24 小鸟的士林

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了DataFrame对行列的基本操作实战相关的知识，希望对你有一定的参考价值。

1、pandas对行列的基本操作命令：

import numpy as np
import pandas as pd
from pandas import Sereis, DataFrame

ser = Series(np.arange(3.))

data = DataFrame(np.arange(16).reshape(4,4),index=list(‘abcd‘),columns=list(‘wxyz‘))

data[‘w‘]  #选择表格中的‘w‘列，使用类字典属性,返回的是Series类型

data.w    #选择表格中的‘w‘列，使用点属性,返回的是Series类型

data[[‘w‘]]  #选择表格中的‘w‘列，返回的是DataFrame类型

data[[‘w‘,‘z‘]]  #选择表格中的‘w‘、‘z‘列

data[0:2]  #返回第1行到第2行的所有行，前闭后开，包括前不包括后

data[1:2]  #返回第2行，从0计，返回的是单行，通过有前后值的索引形式，
       #如果采用data[1]则报错

data.ix[1:2] #返回第2行的第三种方法，返回的是DataFrame，跟data[1:2]同

data[‘a‘:‘b‘]  #利用index值进行切片，返回的是**前闭后闭**的DataFrame, 
        #即末端是包含的  
data.irow(0)   #取data的第一行
data.icol(0)   #取data的第一列

data.head()  #返回data的前几行数据，默认为前五行，需要前十行则data.head(10)
data.tail()  #返回data的后几行数据，默认为后五行，需要后十行则data.tail(10)

ser.iget_value(0)  #选取ser序列中的第一个
ser.iget_value(-1) #选取ser序列中的最后一个，这种轴索引包含索引器的series不能采用ser[-1]去获取最后一个，这会引起歧义。

data.iloc[-1]   #选取DataFrame最后一行，返回的是Series
data.iloc[-1:]   #选取DataFrame最后一行，返回的是DataFrame

data.loc[‘a‘,[‘w‘,‘x‘]]   #返回‘a’行‘w‘、‘x‘列，这种用于选取行索引列索引已知

data.iat[1,1]   #选取第二行第二列，用于已知行、列位置的选取。

2、对列的操作实战

import pandas as pd
import numpy as np
# 构建一个3*5的矩阵
data = pd.DataFrame(np.arange(1, 31, 2).reshape(3, 5),
                    index=[‘one‘, ‘two‘, ‘three‘], columns=[‘a‘, ‘b‘, ‘c‘, ‘d‘, ‘e‘])
print(‘index‘, data.index)
print(‘data‘, data)
‘‘‘
index Index([‘one‘, ‘two‘, ‘three‘], dtype=‘object‘)
data         a   b   c   d   e
one     1   3   5   7   9
two    11  13  15  17  19
three  21  23  25  27  29
‘‘‘

# 对列的操作如下：

# 获取某一列
col_a = data.get(‘a‘)
col_a = data.a
col_a = data[‘a‘]
print(‘col_a‘, type(col_a), col_a)
col_a = data[[‘a‘]]
print(‘col_a‘, type(col_a), col_a)
‘‘‘
col_a <class ‘pandas.core.series.Series‘> 
one       1
two      11
three    21
Name: a, dtype: int32
col_a <class ‘pandas.core.frame.DataFrame‘>         
        a
one     1
two    11
three  21
‘‘‘

cols = data.ix[:, [0, 1, 2]]  # 不知道列名只知道列的位置
print("cols1",cols)
cols = data.ix[1, [0]]  # 选择第2行第1列的值
print("cols2",type(cols), cols)
cols = data.ix[1, 0]
print("cols3",type(cols), cols)
‘‘‘
cols1         
        a   b   c
one     1   3   5
two    11  13  15
three  21  23  25
cols2 <class ‘pandas.core.series.Series‘> 
a    11
Name: two, dtype: int32
cols3 <class ‘numpy.int32‘> 11
‘‘‘
cols = data.ix[[1, 2], [0]]  # 选择第2,3行第1列的值
print("cols4",type(cols), cols)
cols = data.ix[0:2, [0, 2]]  # 选择第1-2行第1、3列的值,不包括2行
print("cols5",type(cols), cols)
cols = data.ix[1:2, 2:4]  # 选择第2-3行，3-5（不包括5）列的值
print("cols6",type(cols), cols)
cols = data.ix[data.a > 5, 3]  # 第1列中大于5所在的行第4列的值
print("cols7",type(cols), cols)
‘‘‘
cols4 <class ‘pandas.core.frame.DataFrame‘>         
        a
two    11
three  21
cols5 <class ‘pandas.core.frame.DataFrame‘>       
      a   c
one   1   5
two  11  15
cols6 <class ‘pandas.core.frame.DataFrame‘>       
     c   d
two  15  17
cols7 <class ‘pandas.core.series.Series‘> 
two      17
three    27
Name: d, dtype: int32
‘‘‘


cols = data.ix[1:3,[‘a‘,‘b‘]]  # 还可以行数或列数跟行名列名混着用
print("cols8",type(cols), cols)

cols = data.ix[‘one‘:‘two‘,[0,1]]  # 索引跟列用法类似，但包含开始、结束
print("cols9",type(cols), cols)

cols = data.ix[[‘one‘,‘three‘],[2,2]]  # 索引跟列用法类似
print("cols10",type(cols), cols)
‘‘‘
cols8 <class ‘pandas.core.frame.DataFrame‘>         
        a   b
two    11  13
three  21  23
cols9 <class ‘pandas.core.frame.DataFrame‘>       
      a   b
one   1   3
two  11  13
cols10 <class ‘pandas.core.frame.DataFrame‘>         
        c   c
one     5   5
three  25  25
‘‘‘

3、对行的操作实战

# 对行的操作,对行操作必须采用切片的方式例如data[1:2],而不能data[1]或者data[‘one‘]
rows = data.ix[1]  # 获取第1行，错误用法rows = data[1]  # 获取第1行
rows = data[1:2]  # 获取第1行，等价rows = data.ix[1:2]  # 获取第1行
print("rows1",type(rows), rows)
rows = data[‘one‘:‘two‘]  # 当用已知的行索引时为前闭后闭区间，这点与切片稍有不同。
print("rows2",type(rows), rows)
‘‘‘
rows1 <class ‘pandas.core.frame.DataFrame‘>       
      a   b   c   d   e
two  11  13  15  17  19
rows2 <class ‘pandas.core.frame.DataFrame‘>       
      a   b   c   d   e
one   1   3   5   7   9
two  11  13  15  17  19
‘‘‘
#取DataFrame中最后一行，返回的是DataFrame类型,**注意**这种取法是有使用条件的，
# 只有当行索引不是数字索引时才可以使用，否则可以选用`data[-1:]`--返回DataFrame类型
# 或`data.irow(-1)`--返回Series类型
rows = data.ix[-1:]
print("rows3",type(rows), rows)
rows = data[-1:]  #跟上面一样，取DataFrame中最后一行，返回的是DataFrame类型
print("rows4",type(rows), rows)
‘‘‘
rows3 <class ‘pandas.core.frame.DataFrame‘>         
        a   b   c   d   e
three  21  23  25  27  29
rows4 <class ‘pandas.core.frame.DataFrame‘>         
        a   b   c   d   e
three  21  23  25  27  29
‘‘‘
rows = data.head(1)   #返回DataFrame中的第一行
print("rows5",type(rows), rows)
rows = data.tail(1)   #返回DataFrame中的最后一行
print("rows6",type(rows), rows)
‘‘‘
rows5 <class ‘pandas.core.frame.DataFrame‘>      
     a  b  c  d  e
one  1  3  5  7  9
rows6 <class ‘pandas.core.frame.DataFrame‘>         
        a   b   c   d   e
three  21  23  25  27  29
‘‘‘

以上是关于DataFrame对行列的基本操作实战的主要内容，如果未能解决你的问题，请参考以下文章