python 将CSV文件导入db
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 将CSV文件导入db相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env python
import os
import argparse
import shutil
import subprocess
import re
import string
import codecs
os.getcwd()
class Helper():
@staticmethod
def which(program):
import os
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
@staticmethod
## separate as array keys to have data piped to next command
def readSubOutput(call):
try:
ps = False;
for idx, val in enumerate(call):
if(idx == len(call)-1) :
if ps == False:
return subprocess.check_output(val)
else:
return subprocess.check_output(val, stdin=ps.stdout)
else:
ps = subprocess.Popen(val, stdout=subprocess.PIPE)
return None
except subprocess.CalledProcessError, e:
print "Error - Last Call"
print call[0]
print " Stdout output:\n"
print e.output
exit(1)
# add arguments to script
parser = argparse.ArgumentParser(description='Create a table for csv and import data requires csvkit')
parser.add_argument("-i", "--host",type=str, help="The host for database.")
parser.add_argument("-u", "--username",type=str, help="The username for database.")
parser.add_argument("-p", "--password",type=str, help="The pasword for database.")
parser.add_argument("-db", "--db",type=str, help="The database name.")
parser.add_argument("-e", "--encoding",type=str, help="File char enconding.")
parser.add_argument("-d", "--delimiter",type=str, help="Delimiter.")
parser.add_argument("--nocreate", help="Create target table", action="store_true")
parser.add_argument("--truncate", help="Truncate table before load", action="store_true")
parser.add_argument("-t", "--tablename",type=str, help="Table name.")
parser.add_argument("filename", help="Import csv filename.")
args = parser.parse_args()
#requried program
programs = ["csvsql","csvstack","mysql"]
helper = Helper()
for x in programs:
inpath = helper.which(x)
if inpath == "None":
print(sys.stderr, "Could not find " + x + " program required")
sys.exit(1)
# default to the following if not provided
tablename = ("", args.tablename)[bool(args.tablename)]
encoding = ("utf8", args.encoding)[bool(args.encoding)]
delimiter = (",", args.delimiter)[bool(args.delimiter)]
#check if filenma of directory it is try to stack
filename = os.path.abspath(args.filename).strip()
files = []
if os.path.isdir(filename):
for file in os.listdir(filename):
if file.endswith(".csv") and file.find("stacked") == -1 :
files.append(filename + "/" + file)
print "stacking files in " + args.filename
filename += "/stacked.csv"
f = codecs.open(filename , "w",encoding)
exitcode = subprocess.call(["csvstack",'-e',encoding] + files, stdout=f)
f.close()
#change new lines to match what we expect
print "changing line endings"
with codecs.open(filename , "r", encoding) as f:
filename = filename + "-replaced.csv"
print filename
out = codecs.open(filename , "w",encoding)
for line in f:
out.write(re.sub("\n", "\r\n", line))
out.close()
#TODO: not a great way to do this butexitcode of csvstact
# seems to be no use
statinfo = os.stat(filename)
if statinfo.st_size == 0:
print "stack failed zero length stack file"
exit(1)
# if create table is specified use csvsql to get a create table statement
csvsqlCommand = ["csvsql" ,"-i","mysql","--table",tablename,"-e",encoding,"-d",delimiter]
if delimiter == "tabs":
csvsqlCommand = ["csvsql" ,"-i","mysql","--table",tablename,"-e",encoding,"--tabs"]
delimiter = "\\t"
sql = helper.readSubOutput([
["head","-n","1000",filename],
csvsqlCommand,
])
# csvsql is too consertive with columns widths so try to fix this
#TODO: can do something better than this
sql = re.sub("VARCHAR\(.*\)","VARCHAR(255)",sql)
sql = re.sub("DECIMAL NOT NULL","DECIMAL (12,4) NOT NULL",sql)
#construct a load data statement
loadDataSql = ("LOAD DATA LOCAL INFILE '" + filename + "' INTO TABLE `" + tablename + "` "
"CHARACTER SET " + encoding + " "
"FIELDS TERMINATED BY '" + delimiter +"' "
"ENCLOSED BY '\"' "
"ESCAPED BY '\' "
"LINES TERMINATED BY '\\r\\n' "
"IGNORE 1 LINES "
"(|cols|)"
)
#TODO: not 100% safe but shoulde work for our puroses
f = codecs.open(filename, "r", "utf-8")
cols = f.readline()
#get rid of UTF-8 BOM
cols = cols.replace(u'\ufeff', '')
cols = re.sub("[\r\n]","",cols)
if delimiter == "\\t":
cols = re.sub("[\t]",",",cols)
cols = string.split(cols,",")
cols = ["`" + string.strip(x.replace(" ","_")) + "`" for x in cols]
cols = " , ".join(cols)
loadDataSql = loadDataSql.replace("|cols|",cols)
#if no db provided just show sql needed to create table and load data otherwise run te commands on the db
if args.db == None :
print sql
print loadDataSql
else:
#check for fields required if inserting to db
if args.host == None :
"Host required if inserting to DB"
exit(1)
if args.tablename == None :
"Tablename required if inserting to DB"
exit(1)
#create table and or load data
if args.nocreate != True:
print "Creating table.."
print helper.readSubOutput([
['mysql','-h',args.host,'-u',args.username,'--password='+args.password,args.db,'-e',sql]
])
if args.truncate == True:
print "Truncating table " + args.tablename
truncateSql = "TRUNCATE TABLE " + args.tablename
print helper.readSubOutput([
['mysql','-h',args.host,'-u',args.username,'--password='+args.password,args.db,'-e',truncateSql]
])
print "Load data into file."
print loadDataSql
print helper.readSubOutput([
['mysql','--show-warnings','-h',args.host,'-u',args.username,'--password='+args.password,args.db,'-e',loadDataSql]
])
print "Done";
以上是关于python 将CSV文件导入db的主要内容,如果未能解决你的问题,请参考以下文章
在 Python 中将 CSV 记录导入 MySQL 数据库
如何使用 Spring Boot 将 CSV 文件导入 MYSQL
如何将导入 python 的数据从 csv 文件转换为时间序列?