WDL-生成配置文件
Posted 我家大宝最可爱
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了WDL-生成配置文件相关的知识,希望对你有一定的参考价值。
import re, os
# 这里生成json配置文件,我们数据通常是来源于
#
class Handler_FG:
def __init__(self):
self.default_match =
"bigint": 0,
"double": 0.0,
"string": "\\"\\""
self.result = []
def create_feature(self, feature_name, value_type, comment):
default_value = self.default_match[value_type]
pt = '\\n\\t\\t'
pt += '\\n\\t\\t\\t"feature_name":"0",'.format(feature_name)
pt += '\\n\\t\\t\\t"value_type":"0",'.format(value_type)
pt += '\\n\\t\\t\\t"default_value":0'.format(default_value)
if value_type == 'string':
if feature_name in ('user_id','seller_id','item_id','cate_id','cate_level1_id'):
pt += ',\\n\\t\\t\\t"hash_bucket_size":1000'
pt += ',\\n\\t\\t\\t"embedding_dimension":8'
if comment is not None:
vocabulary = ','.join(list(map(lambda x : '""'.format(x), comment)))
pt += ',\\n\\t\\t\\t"vocabulary":[]'.format(vocabulary)
else:
if comment is not None:
pt += """boundaries":[]""".format(','.join(comment)),
pt += '\\n\\t\\t'
self.result.append(pt)
def create_fg(self, feature_file='feature.txt', fg_name='fg.json'):
if not os.path.isfile(feature_file):
raise TypeError(feature_file + " does not exist")
else:
with open(feature_file, 'r') as file:
odps_columns = file.read()
item_fields = re.findall(',(.*?) (.*?)\\n', odps_columns)
for f, t in item_fields:
feature_name = f.strip()
if 'comment' in t:
value_type,_,comment = t.split(' ')
value_type = value_type.strip()
comment = comment.strip()[1:-1]
if comment == '':
comment = None
else:
comment = comment.split(',')
else:
value_type = t.strip()
comment = None
self.create_feature(feature_name, value_type, comment)
with open(fg_name, 'w') as f:
f.write("\\n")
f.write('\\t"features":[\\n')
f.write(','.join(self.result))
f.write('\\n\\t]')
f.write('\\n')
if __name__ == "__main__":
hf = Handler_FG()
hf.create_fg()
特征文件,用来生成配置文件
,age double
,workclass string
,fnlwgt string
,education string
,education_num double
,marital_status string
,occupation string
,relationship string
,race string
,gender string
,capital_gain double
,capital_loss double
,hours_per_week double
,native_country string
,income_bracket string
以上是关于WDL-生成配置文件的主要内容,如果未能解决你的问题,请参考以下文章