python 此脚本将Open Refine的Json文件作为输入,并返回相同的文件,其中每个“转换”和每个“批量编辑”将是docum
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 此脚本将Open Refine的Json文件作为输入,并返回相同的文件,其中每个“转换”和每个“批量编辑”将是docum相关的知识,希望对你有一定的参考价值。
#!/usr/bin/python3
import json
with open("test.json", "r") as infile:
data = json.load(infile)
def transform_to_addcolumn(data):
data_trans = dict(data)
data_trans["op"] = "core/column-addition"
data_trans["expression"] = (
"""jython:return \"\"\"%s on cell %s\"\"\" """) %(
data['expression'].replace('grel:', ""), data['columnName'])
data_trans["onError"] = "store-error"
data_trans["description"] = "store operations in a column" + str(count+1)
data_trans.update({"columnInsertIndex": count-1})
data_trans.update({"newColumnName": "transform" + str(count)})
data_trans.update({"baseColumnName": data["columnName"]})
del data_trans["columnName"]
del data_trans["repeat"]
del data_trans["repeatCount"]
return data_trans
def massedit_to_addcolumn(data):
data_trans = dict(data)
data_trans["op"] = "core/column-addition"
data_trans["expression"] = (
"""jython:return \"\"\"MASS EDIT %s TO %s ON COLUMN %s\"\"\" """) %(
data["edits"][0]["from"][0], data["edits"][0]["to"], data["columnName"])
data_trans.update({"onError" : "store-error"})
data_trans["description"] = "store operations in a column" + str(count+1)
data_trans.update({"columnInsertIndex": count-1})
data_trans.update({"newColumnName": "transform" + str(count)})
data_trans.update({"baseColumnName": data["columnName"]})
del data_trans["edits"]
del data_trans["columnName"]
return data_trans
for count, el in enumerate(data):
if el['op'] == "core/text-transform":
data.insert(count + 1, transform_to_addcolumn(el))
elif el['op'] == "core/mass-edit":
data.insert(count + 1, massedit_to_addcolumn(el))
print(data)
with open("new_openrefine_operations.json", "w") as outfile:
json.dump(data, outfile, indent=4)
以上是关于python 此脚本将Open Refine的Json文件作为输入,并返回相同的文件,其中每个“转换”和每个“批量编辑”将是docum的主要内容,如果未能解决你的问题,请参考以下文章
RPythonOpen Refine采集pdf数据,清理数据和格式化数据
python refine.py
Python学习15:Open读取文件
Ruby的细化(refine关键字)
python脚本0b文件处理
将二级目录下的文件合并成一个文件的Python小脚本