数据加载存储与文件格式
Posted wangshuang1631
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了数据加载存储与文件格式相关的知识,希望对你有一定的参考价值。
写在前面的话:
实例中的所有数据都是在GitHub上下载的,打包下载即可。
地址是:http://github.com/pydata/pydata-book
还有一定要说明的:
我使用的是Python2.7,书中的代码有一些有错误,我使用自己的2.7版本调通。
# coding: utf-8
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
df = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex1.csv')
df
pd.read_table('D:\\Source Code\\pydata-book-master\\ch06\\ex1.csv', sep=',')
pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex2.csv', header=None)
pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex2.csv', names=['a','b','c','d','message'])
names=['a','b','c','d','message']
pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex2.csv', names=names, index_col = 'message')
parsed = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\csv_mindex.csv', index_col = ['key1','key2'])
list(open('D:\\Source Code\\pydata-book-master\\ch06\\ex3.txt'))
result = pd.read_table('D:\\Source Code\\pydata-book-master\\ch06\\ex3.txt', sep='\\s+')
result
pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex4.csv', skiprows=[0,2,3])
result = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex5.csv')
result
pd.isnull(result)
result = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex5.csv', na_values=['NULL'])
result
sentinels = 'message':['foo','NA'],'something':['two']
pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex5.csv',na_values = sentinels)
result = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex6.csv')
result
pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex6.csv', nrows=5)
chunker = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex6.csv', chunksize=1000)
chunker
tot = Series([])
for piece in chunker:
tot = tot.add(piece['key'].value_counts(), fill_value=0)
tot = tot.order(ascending=False)
tot[:10]
data = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex5.csv')
data
data.to_csv('D:\\out.csv')
pd.read_csv('D:\\out.csv')
import sys
data.to_csv(sys.stdout, sep='|')
data.to_csv(sys.stdout, na_rep='NULL')
data.to_csv(sys.stdout, index=False, header=False)
data.to_csv(sys.stdout, index=False, cols=['a','b','c'])
dates = pd.date_range('1/1/2000',periods=7)
ts = Series(np.arange(7),index=dates)
ts.to_csv('D:\\out.csv')
Series.from_csv('D:\\out.csv', parse_dates=True)
import csv
f = open('D:\\Source Code\\pydata-book-master\\ch06\\ex7.csv')
reader = csv.reader(f)
for line in reader:
print line
lines = list(csv.reader(open('D:\\Source Code\\pydata-book-master\\ch06\\ex7.csv')))
header,values = line[0],lines[1:]
data_dict = h:v for h, v in zip(header,zip(*values))
data_dict
import json
obj = """"names":"www0","places":["aa","bb","cc","dd"],"pet":null,"siblings":["name":"wang","age":25,"pet":"Zuko","name":"zhang","age":33,"pet":"Cisco"]"""
result = json.loads(obj)
result
asjson = json.dumps(result)
asjson
siblings = DataFrame(result['siblings'],columns=['name','age'])
siblings
from lxml.html import parse
from urllib2 import urlopen
parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))
doc = parsed.getroot()
from lxml import objectify
path = 'D:\\Source Code\\pydata-book-master\\ch06\\mta_perf\\Performance_MNR.xml'
parsed = objectify.parse(open(path))
root = parsed.getroot()
data = []
for elt in root.INDICATOR:
el_data =
for child in elt.getchildren():
el_data[child.tag] = child.pyval
data.append(el_data)
perf = DataFrame(data)
perf
frame = pd.read_csv('D:\\Source Code\\pydata-book-master\\ch06\\ex1.csv')
frame
frame.save('D:\\Source Code\\pydata-book-master\\ch06\\\\aa')
frame.load('D:\\Source Code\\pydata-book-master\\ch06\\\\aa')
import requests
url = 'http://gc.ditu.aliyun.com/regeocoding?l=39.938133,116.395739&type=001'
resp = requests.get(url)
resp
data = json.loads(resp.text)
以上是关于数据加载存储与文件格式的主要内容,如果未能解决你的问题,请参考以下文章
利用Python进行数据分析 第6章 数据加载存储与文件格式