python buffalo py任务脚本模板

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python buffalo py任务脚本模板相关的知识,希望对你有一定的参考价值。

#!/usr/bin/env python3
#===============================================================================
#
#         FILE: app_ad_data_market_gdt_virtual_cost.py
#        
#        USAGE: ./app_ad_data_market_gdt_virtual_cost.py 20160226
#
#  DESCRIPTION:-- 市场部广点通直投虚拟金投放数据
#
#      OPTIONS: ---./
# REQUIREMENTS: ---
#         BUGS: ---
#        NOTES: ---
#       AUTHOR: congyihao@jd.com
#      groupPANY: JD.group
#      VERSION: 3.0
#      CREATED: 04/27/2018 14:00:00
#     REVIEWER: ---
#     REVISION: ---
#    SRC_TABLE: ---
#    TGT_TABLE: ---
#===============================================================================

import os,sys
sys.path.append(os.getenv('HIVE_TASK'))
from HiveTask import HiveTask
from Calendar import *


ht = HiveTask()
monthFirst = ht.calendar.getMonthFirst()
monthLast = ht.calendar.getMonthLast()
date1=ht.data_day_str.replace('-','')

sqldrop = """
set mapreduce.job.running.reduce.limit=400;
set mapreduce.job.running.map.limit=500;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=4096;
set mapreduce.job.reduce.slowstart.completedmaps=1.0;
set mapred.output.compress=true;
set hive.exec.compress.output=true;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec ;
set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec ;
set hive.merge.smallfiles.avgsize=134217728;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.join = true ;
use app;
alter table app_ad_data_market_gdt_virtual_cost drop partition(dt = '"""+ ht.data_day_str + """');
"""
ht.exec_sql(schema_name = 'app', sql = sqldrop)
tableName="app_ad_data_market_gdt_virtual_cost/dt="+ht.data_day_str
os.system("hadoop fs -rm -r /user/jd_ad/app.db/" +tableName + " 2>/dev/null;exit 0")
sql = """
set mapreduce.job.running.reduce.limit=400;
set mapreduce.job.running.map.limit=500;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=4096;
set mapreduce.job.reduce.slowstart.completedmaps=1.0;
set mapred.output.compress=true;
set hive.exec.compress.output=true;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec ;
set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec ;
set hive.merge.smallfiles.avgsize=134217728;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.join = true ;
set hive.exec.dynamic.partition.mode=nonstrict;
use app;

insert overwrite table app.app_ad_data_market_gdt_virtual_cost partition(dt = '"""+ ht.data_day_str + """')
SELECT
    t.division_id_new AS division_id_new,
    t.division_name_new AS division_name_new,
    t.adv_first_dept_id AS adv_first_dept_id,
    t.adv_first_dept_name AS adv_first_dept_name,
    t.device_type AS device_type,
    t.ad_uin AS ad_uin,
    t.jd_pin AS jd_pin,
    t.parent_name AS parent_name,
    t.url AS url,
    t.cost_type AS cost_type,
    sum(t.consumption) AS consumption, -- 总消耗金额
    sum(t.cash_cost) AS cash_cost, -- 现金消耗金额
    sum(t.commission_free_cost) AS commission_free_cost, -- 免佣消耗金额
    sum(t.virt_cost) AS virt_cost,
    sum(t.t38_money) AS t38_money,
    sum(t.t12_money) AS t12_money,
    sum(t.t03_money) AS t03_money,
    t.cpm_price AS cpm_price,
    t.cpc_price AS cpc_price,
    sum(t.expose_nums) AS expose_nums,
    sum(t.effect_click_nums) AS effect_click_nums,
    sum(t.chan_effect_parent_ord_cnt) AS chan_effect_parent_ord_cnt,
    sum(t.chan_effect_parent_ord_first_ord_cnt) AS chan_effect_parent_ord_first_ord_cnt,
    sum(t.chan_effect_sub_ord_cnt) AS chan_effect_sub_ord_cnt,
    sum(t.chan_effect_sub_ord_first_ord_cnt) AS chan_effect_sub_ord_first_ord_cnt,
    sum(t.chan_effect_ord_line_cnt) AS chan_effect_ord_line_cnt,
    sum(t.chan_effect_ord_first_ord_line_cnt) AS chan_effect_ord_first_ord_line_cnt,
    sum(t.chan_effect_ord_line_price) AS chan_effect_ord_line_price,
    sum(t.chan_effect_ord_first_ord_line_price) AS chan_effect_ord_first_ord_line_price
FROM (
    SELECT
        t2.division_id_new AS division_id_new,
        t2.division_name_new AS division_name_new,
        t2.adv_first_dept_id AS adv_first_dept_id,
        t2.adv_first_dept_name AS adv_first_dept_name,
        t3.device_type AS device_type,
        t1.ad_uin AS ad_uin,
        t1.jd_pin AS jd_pin,
        t1.parent_name AS parent_name,
        t3.url AS url,
        t3.cost_type AS cost_type,
        sum(t1.cash_cost+t1.virt_cost+t1.subsidy_money+t1.t38_money+t1.t12_money+t1.t03_money) AS consumption, -- 总消耗金额
        sum(t1.cash_cost) AS cash_cost, -- 现金消耗金额
        sum(t1.commission_free_cost) AS commission_free_cost, -- 免佣消耗金额
        sum(t1.virt_cost) AS virt_cost,
        sum(t1.t38_money) AS t38_money,
        sum(t1.t12_money) AS t12_money,
        sum(t1.t03_money) AS t03_money,
        t3.cpm_price AS cpm_price,
        t3.cpc_price AS cpc_price,
        sum(t1.expose_nums) AS expose_nums,
        sum(t1.effect_click_nums) AS effect_click_nums,
        0.0 AS chan_effect_parent_ord_cnt,
        0.0 AS chan_effect_parent_ord_first_ord_cnt,
        0.0 AS chan_effect_sub_ord_cnt,
        0.0 AS chan_effect_sub_ord_first_ord_cnt,
        0.0 AS chan_effect_ord_line_cnt,
        0.0 AS chan_effect_ord_first_ord_line_cnt,
        0.0 AS chan_effect_ord_line_price,
        0.0 AS chan_effect_ord_first_ord_line_price
    FROM (
        SELECT
            ad_uin AS ad_uin,
            jd_pin AS jd_pin,
            parent_name AS parent_name,
            cash_cost AS cash_cost,
            virt_cost AS virt_cost,
            subsidy_money AS subsidy_money,
            t38_money AS t38_money,
            t12_money AS t12_money,
            t03_money AS t03_money,
            commission_free_cost AS commission_free_cost,
            expose_nums AS expose_nums,
            effect_click_nums AS effect_click_nums,
            ad_id AS ad_id
        FROM
            app.app_szad_w_ads_ad_detail_all_day
        WHERE
            ftime = '""" + date1 + """'
        AND prd_type = 'Operation'
        AND parent_name NOT IN('京东自动化投放运营','京东动态创意投放运营','自研中间页投放运营','京东自动化投放运营测试')
    ) AS t1
    LEFT OUTER JOIN (
        SELECT
            division_id_new AS division_id_new,
            division_name_new AS division_name_new,
            adv_first_dept_id AS adv_first_dept_id,
            adv_first_dept_name AS adv_first_dept_name,
            ad_uin AS ad_uin
        FROM
            dim.dim_jd_advertiser_info
        WHERE
            dt = '""" + ht.data_day_str + """'
        AND dp ='GDT'
    ) AS t2
    ON
        t1.ad_uin = t2.ad_uin
    LEFT OUTER JOIN (
        SELECT
            device_type AS device_type,
            cost_type AS cost_type,
            url AS url,
            cpm_price AS cpm_price,
            cpc_price AS cpc_price,
            ad_id AS ad_id
        FROM
            fdm.fdm_szad_w_ad_info_day
        WHERE
            ftime = '""" + date1 + """'
        AND platform_type = '京东直投'
        AND media_type = '广点通'
    ) AS t3
    ON
        t1.ad_id = t3.ad_id
    GROUP BY
        t2.division_id_new,
        t2.division_name_new,
        t2.adv_first_dept_id,
        t2.adv_first_dept_name,
        t3.device_type,
        t1.ad_uin,
        t1.jd_pin,
        t1.parent_name,
        t3.url,
        t3.cost_type,
        t3.cpm_price,
        t3.cpc_price

    UNION ALL

    -- TODO parent_name取值逻辑
    SELECT
        t5.adv_division_id_new AS adv_division_id_new,
        t5.adv_division_name_new AS adv_division_name_new,
        t5.adv_first_dept_id AS adv_first_dept_id,
        t5.adv_first_dept_name AS adv_first_dept_name,
        t4.device_type AS device_type,
        t5.ad_uin AS ad_uin,
        t5.jd_pin AS jd_pin,
        t4.url AS url,
        t4.cost_type AS cost_type, -- 扣费类型
        0.0 AS consumption,
        0.0 AS cash_cost,
        0.0 AS commission_free_cost,
        0.0 AS virt_cost,
        0.0 AS t38_money,
        0.0 AS t12_money,
        0.0 AS t03_money,
        t4.cpm_price AS cpm_price,
        t4.cpc_price AS cpc_price,
        0 AS expose_nums,
        0 AS effect_click_nums,
        count(distinct t5.ord_parent_sale_ord_id) AS chan_effect_parent_ord_cnt, --渠道有效订单量(父订单)
        count(distinct case when  t5.ord_first_ord_flag='0' then t5.ord_parent_sale_ord_id else null end)
        as chan_effect_parent_ord_first_ord_cnt, -- 渠道有效首次购订单量(父订单)
        count(distinct t5.ord_sale_ord_id) AS chan_effect_sub_ord_cnt, -- 渠道有效订单量(子订单)
        count(distinct case when  t5.ord_first_ord_flag='0' then t5.ord_sale_ord_id else null end)
        as chan_effect_sub_ord_first_ord_cnt, -- 渠道有效首次购订单量(子订单)
        count(0) AS chan_effect_ord_line_cnt, -- 渠道有效订单行量(订单行)
        count(distinct case when  t5.ord_first_ord_flag='0' then 0 else null end) AS chan_effect_ord_first_ord_line_cnt, --渠道有效首次购订单行量(订单行)
        sum(t5.ord_after_prefr_amount) as chan_effect_ord_line_price, -- 渠道有效订单金额(订单行)
        sum(case when t5.ord_first_ord_flag='0' then t5.ord_after_prefr_amount else 0 end)
        as chan_effect_ord_first_ord_line_price -- 渠道有效首次购订单金额(订单行)
    FROM (
        SELECT
            clk_adv_division_id_new AS adv_division_id_new,
            clk_adv_division_name_new AS adv_division_name_new,
            clk_adv_first_dept_id AS adv_first_dept_id,
            clk_adv_first_dept_name AS adv_first_dept_name,
            clk_ad_uin AS ad_uin,
            clk_advertise_pin AS jd_pin,
            ord_parent_sale_ord_id AS ord_parent_sale_ord_id,
            ord_first_ord_flag AS ord_first_ord_flag,
            ord_sale_ord_id AS ord_sale_ord_id,
            ord_after_prefr_amount AS ord_after_prefr_amount,
            clk_ad_id AS ad_id
        FROM
            ad.ad_base_model_m14_plat_chan_cart_ord_click_det
        WHERE
            dt = '""" + ht.data_day_str + """'
        AND dp = 'external_clk'
        AND clk_adsystem_type = 'GDT'
        AND clk_adv_account_type in ('accountMchVirtual', 'accountTransfer')
        AND ord_sale_ord_valid_flag = '1' --有效口径
    ) AS t5
    LEFT OUTER JOIN(
        SELECT
            device_type AS device_type,
            cost_type AS cost_type,
            url AS url,
            cpm_price AS cpm_price,
            cpc_price AS cpc_price,
            ad_id AS ad_id
        FROM
            fdm.fdm_szad_w_ad_info_day
        WHERE
            ftime = '""" + date1 + """'
        AND platform_type = '京东直投'
        AND media_type = '广点通'
    ) AS t4
    ON
        t5.ad_id = t4.ad_id
    GROUP BY
        t5.adv_division_id_new,
        t5.adv_division_name_new,
        t5.adv_first_dept_id,
        t5.adv_first_dept_name,
        t4.device_type,
        t5.ad_uin,
        t5.jd_pin,
        t4.url,
        t4.cost_type,
        t4.cpm_price,
        t4.cpc_price
) AS t
GROUP BY
    t.division_id_new,
    t.division_name_new,
    t.adv_first_dept_id,
    t.adv_first_dept_name,
    t.device_type,
    t.ad_uin,
    t.jd_pin,
    t.url,
    t.cost_type,
    t.cpm_price,
    t.cpc_price

;
"""
ht.exec_sql(schema_name = 'app', sql = sql)

以上是关于python buffalo py任务脚本模板的主要内容,如果未能解决你的问题,请参考以下文章

运行Sqoop任务的通用模板:Python2脚本实现(待优化)

markdown buffalo任务配置

琉璃小屋-Pycharm设置.py文件头部模板

教你如何在win10下自动运行python程序脚本

运行Sqoop任务的通用脚本Python2实现(待优化)

用shell把写需要定时执行的脚本写进计划任务