WDL-生成配置文件

Posted 我家大宝最可爱

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了WDL-生成配置文件相关的知识,希望对你有一定的参考价值。

import re, os

# 这里生成json配置文件,我们数据通常是来源于
# 

class Handler_FG:
    def __init__(self):
        self.default_match = 
            "bigint": 0,
            "double": 0.0,
            "string": "\\"\\""
        
        self.result = []

    def create_feature(self, feature_name, value_type, comment):

        default_value = self.default_match[value_type]
        pt = '\\n\\t\\t'
        pt += '\\n\\t\\t\\t"feature_name":"0",'.format(feature_name)
        pt += '\\n\\t\\t\\t"value_type":"0",'.format(value_type)
        pt += '\\n\\t\\t\\t"default_value":0'.format(default_value)

        if value_type == 'string':
            if feature_name in ('user_id','seller_id','item_id','cate_id','cate_level1_id'):
                pt += ',\\n\\t\\t\\t"hash_bucket_size":1000'
                pt += ',\\n\\t\\t\\t"embedding_dimension":8'
            if comment is not None:
                vocabulary = ','.join(list(map(lambda x : '""'.format(x), comment)))
                pt += ',\\n\\t\\t\\t"vocabulary":[]'.format(vocabulary)

        else:
            if comment is not None:
                pt += """boundaries":[]""".format(','.join(comment)),
        pt += '\\n\\t\\t'

        self.result.append(pt)

    def create_fg(self, feature_file='feature.txt', fg_name='fg.json'):

        if not os.path.isfile(feature_file):
            raise TypeError(feature_file + " does not exist")
        else:
            with open(feature_file, 'r') as file:
                odps_columns = file.read()
                item_fields = re.findall(',(.*?) (.*?)\\n', odps_columns)
                for f, t in item_fields:
                    feature_name = f.strip()
                    if 'comment' in t:
                        value_type,_,comment = t.split(' ')
                        value_type = value_type.strip()
                        comment = comment.strip()[1:-1]
                        if comment == '':
                            comment = None
                        else:
                            comment = comment.split(',')
                    else:
                        value_type = t.strip()
                        comment = None
                    
                    self.create_feature(feature_name, value_type, comment)

        with open(fg_name, 'w') as f:
            f.write("\\n")
            f.write('\\t"features":[\\n')
            f.write(','.join(self.result))

            f.write('\\n\\t]')
            f.write('\\n')


if __name__ == "__main__":
    hf = Handler_FG()
    hf.create_fg()



特征文件,用来生成配置文件

    ,age double
    ,workclass string
    ,fnlwgt string
    ,education string
    ,education_num double
    ,marital_status string
    ,occupation string
    ,relationship string
    ,race string
    ,gender string
    ,capital_gain double
    ,capital_loss double
    ,hours_per_week double
    ,native_country string
    ,income_bracket string

以上是关于WDL-生成配置文件的主要内容,如果未能解决你的问题,请参考以下文章

WDL-特征生成

WDL-训练模型

WDL数据加载

sh wdl设置别名/ ln

WDL-训练模型

WDL数据加载