0 1 2
0 0 123 122
1 1 124 321
2 2 125 333
D:ProgramDataAnaconda3_32libsite-packagesipykernel_launcher.py:1: ParserWarning: Falling back to the ‘python‘ engine because the ‘c‘ engine does not support regex separators (separators > 1 char and different from ‘s+‘ are interpreted as regex); you can avoid this warning by specifying engine=‘python‘.
"""Entry point for launching an IPython kernel.
Table 5-1. Metacharacters . single character, except newline d digit D non-digit character s whitespace character S non-whitespace character
new line character tab character uxxxx unicode character specified by the hexadecimal number xxxxch05_06.txt
##### LOG FILE
This file has been generated by automatic system white,red,blue,green,animal 12-Feb-2015: Counting of animals inside the house 1,5,2,3,cat 2,7,8,5,dog 13-Feb-2015: Counting of animals outside the house 3,3,6,7,horse 2,2,8,3,duck 4,4,2,1,mouse
out = Series() i = 0 pieces = read_csv(‘ch05_01.csv‘,chunksize=3) for piece in pieces: print(piece) out.set_value(i,piece[‘white‘].sum()) i = i + 1 print(out)
white red blue green animal
0 1 5 2 3 cat
1 2 7 8 5 dog
2 3 3 6 7 horse
white red blue green animal
3 2 2 8 3 duck
4 4 4 2 1 mouse
0 6
1 6
dtype: int64
Unnamed: 0 ball mug paper pen pencil
0 blue 6.0 NaN NaN 6.0 NaN
1 green NaN NaN NaN NaN NaN
2 red NaN NaN NaN NaN NaN
3 white 20.0 NaN NaN 20.0 NaN
4 yellow 19.0 NaN NaN 19.0 NaN
up down right left
white 0.821537 0.070376 0.131624 0.718632
black 0.723164 0.409424 0.554343 0.361086
red 0.671485 0.993762 0.316291 0.999724
blue 0.834104 0.565786 0.922212 0.166467
1 2 3 4 5 6 7 8 9 10
s = [‘<HTML>‘] s.append(‘<HEAD><TITLE>My DataFrame</TITLE></HEAD>‘) s.append(‘<BODY>‘) s.append(frame.to_html()) s.append(‘</BODY></HTML>‘) html = ‘‘.join(s) html_file = open(‘myFrame.html‘,‘w‘) html_file.write(html) html_file.close()
Unnamed: 0 up down right left
0 white 0.821537 0.070376 0.131624 0.718632
1 black 0.723164 0.409424 0.554343 0.361086
2 red 0.671485 0.993762 0.316291 0.999724
3 blue 0.834104 0.565786 0.922212 0.166467
# Nome Exp Livelli
0 1 Fabio Nelli 17521 NaN
1 2 admin 9029 NaN
2 3 BrunoOrsini 2124 NaN
3 4 Berserker 700 NaN
4 5 Dnocioni 543 NaN
5 6 albertosallusti 409 NaN
6 7 Jon 231 NaN
7 8 Mr.Y 180 NaN
8 9 michele sisinni 157 NaN
9 10 Selina 136 NaN
10 11 Massimo 127 NaN
11 12 Beniamino Feula 122 NaN
12 13 stefano gustin 121 NaN
13 14 Maurizio Andreoli 111 NaN
14 15 Pietro Baima 108 NaN
15 16 Cecilia Lalatta Costerbosa 108 NaN
16 17 Leonardo Zampi 108 NaN
17 18 Davide Aloisi 106 NaN
18 19 gildalombardi 105 NaN
19 20 Telerobotlabs 104 NaN
20 21 Marco Contigiani 101 NaN
21 22 berillio 58 NaN
22 23 ron 55 NaN
23 24 Titanic4wd 43 NaN
24 25 deg 40 NaN
25 26 al45 40 NaN
26 27 il_mix 38 NaN
27 28 AndreaC 35 NaN
28 29 Sergio fly 32 NaN
29 30 bigazzi 32 NaN
.. ... ... ... ...
220 221 pozi 3 NaN
221 222 mattia 3 NaN
222 223 mauro.menegazzi 3 NaN
223 224 cico89 3 NaN
224 225 eta38 3 NaN
225 226 Chinje Chang 3 NaN
226 227 fraschettin 2 NaN
227 228 Rocco 2 NaN
228 229 Dimitri 2 NaN
229 230 Arturo 2 NaN
230 231 Paolo Indennidate 2 NaN
231 232 fabioroberto 2 NaN
232 233 ycomyca 2 NaN
233 234 bdb 2 NaN
234 235 paolotirispetta 2 NaN
235 236 Roberto72 2 NaN
236 237 Christian76 2 NaN
237 238 paolos46 2 NaN
238 239 Giolat90 2 NaN
239 240 giampyypmaig 1 NaN
240 241 Marco Corbetta 1 NaN
241 242 softeng 1 NaN
242 243 strechum 1 NaN
243 244 an6991 1 NaN
244 245 plato 1 NaN
245 246 CarloAlberto98 1 NaN
246 247 cris 1 NaN
247 248 emilibassi 1 NaN
248 249 mehrbano 1 NaN
249 250 NIKITA PANCHAL 1 NaN
[250 rows x 4 columns]
5.5 从XML读取数据 101
books.xml <?xml version=”1.0”?>
272103_1_EnRoss, Mark Computer 23.56 2014-22-01 272103_1_EnBracket, Barbara Computer 35.95 2014-12-16
def etree2df(root): column_names = [] for i in range(0,len(root.getchildren()[0].getchildren())): column_names.append(root.getchildren()[0].getchildren()[i].tag) xml_frame = pd.DataFrame(columns=column_names) for j in range(0, len(root.getchildren())): obj = root.getchildren()[j].getchildren() texts = [] for k in range(0, len(column_names)): texts.append(obj[k].text) row = dict(zip(column_names, texts)) row_s = pd.Series(row) row_s.name = j xml_frame = xml_frame.append(row_s) return xml_frame
1 2
temp = etree2df(root) print(temp)
Author Title Genre Price PublishDate
0 272103_1_EnRoss, Mark XML Cookbook Computer 23.56 2014-22-01
1 272103_1_EnBracket, Barbara XML for Dummies Computer 35.95 2014-12-16
5.6 读写Microsoft Excel文件 103
1 2
temp = pd.read_excel(‘data.xls‘) print(temp)
white red green black
a 12 23 17 18
b 22 16 19 18
c 14 23 22 21
down left right up
black 5 7 6 4
blue 13 15 14 12
red 9 11 10 8
white 1 3 2 0
1 2 3 4 5 6 7 8 9
from pandas.io.json import json_normalize file = open(‘books.json‘,‘r‘) text = file.read() text = json.loads(text) temp = json_normalize(text,‘books‘) print(temp) print() temp = json_normalize(text,‘books‘,[‘writer‘,‘nationality‘]) print(temp)
price title
0 23.56 XML Cookbook
1 50.70 Python Fundamentals
2 12.30 The NumPy library
3 28.60 Java Enterprise
4 31.35 HTML5
5 28.00 Python for Dummies
price title writer nationality
0 23.56 XML Cookbook Mark Ross USA
1 50.70 Python Fundamentals Mark Ross USA
2 12.30 The NumPy library Mark Ross USA
3 28.60 Java Enterprise Barbara Bracket UK
4 31.35 HTML5 Barbara Bracket UK
5 28.00 Python for Dummies Barbara Bracket UK
D:ProgramDataAnaconda3_32libsite-packagesipykernel_launcher.py:4: FutureWarning: pandas.json is deprecated and will be removed in a future version.
You can access loads as pandas.io.json.loads
after removing the cwd from sys.path.
5.8 HDF5格式 107
1 2 3 4 5 6 7
from pandas.io.pytables import HDFStore frame = pd.DataFrame(np.arange(16).reshape(4,4), index=[‘white‘,‘black‘,‘red‘,‘blue‘], columns=[‘up‘,‘down‘,‘right‘,‘left‘]) store = HDFStore(‘mydata.h5‘) store[‘obj1‘] = frame store
index white red blue black green
0 0 0 1 2 3 4
1 1 5 6 7 8 9
2 2 10 11 12 13 14
3 3 15 16 17 18 19
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
import sqlite3 query = """ CREATE TABLE test (a VARCHAR(20), b VARCHAR(20), c REAL, d INTEGER );""" con = sqlite3.connect(‘:memory:‘) con.execute(query) con.commit() data = [(‘white‘,‘up‘,1,3), (‘black‘,‘down‘,2,8), (‘green‘,‘up‘,4,4), (‘red‘,‘down‘,5,5)] stmt = "INSERT INTO test VALUES(?,?,?,?)" con.executemany(stmt, data) con.commit()