python buffalo py任务脚本模板
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python buffalo py任务脚本模板相关的知识,希望对你有一定的参考价值。
#!/usr/bin/env python3
#===============================================================================
#
# FILE: app_ad_data_market_gdt_virtual_cost.py
#
# USAGE: ./app_ad_data_market_gdt_virtual_cost.py 20160226
#
# DESCRIPTION:-- 市场部广点通直投虚拟金投放数据
#
# OPTIONS: ---./
# REQUIREMENTS: ---
# BUGS: ---
# NOTES: ---
# AUTHOR: congyihao@jd.com
# groupPANY: JD.group
# VERSION: 3.0
# CREATED: 04/27/2018 14:00:00
# REVIEWER: ---
# REVISION: ---
# SRC_TABLE: ---
# TGT_TABLE: ---
#===============================================================================
import os,sys
sys.path.append(os.getenv('HIVE_TASK'))
from HiveTask import HiveTask
from Calendar import *
ht = HiveTask()
monthFirst = ht.calendar.getMonthFirst()
monthLast = ht.calendar.getMonthLast()
date1=ht.data_day_str.replace('-','')
sqldrop = """
set mapreduce.job.running.reduce.limit=400;
set mapreduce.job.running.map.limit=500;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=4096;
set mapreduce.job.reduce.slowstart.completedmaps=1.0;
set mapred.output.compress=true;
set hive.exec.compress.output=true;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec ;
set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec ;
set hive.merge.smallfiles.avgsize=134217728;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.join = true ;
use app;
alter table app_ad_data_market_gdt_virtual_cost drop partition(dt = '"""+ ht.data_day_str + """');
"""
ht.exec_sql(schema_name = 'app', sql = sqldrop)
tableName="app_ad_data_market_gdt_virtual_cost/dt="+ht.data_day_str
os.system("hadoop fs -rm -r /user/jd_ad/app.db/" +tableName + " 2>/dev/null;exit 0")
sql = """
set mapreduce.job.running.reduce.limit=400;
set mapreduce.job.running.map.limit=500;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=4096;
set mapreduce.job.reduce.slowstart.completedmaps=1.0;
set mapred.output.compress=true;
set hive.exec.compress.output=true;
set mapred.output.compression.codec=com.hadoop.compression.lzo.LzopCodec ;
set io.compression.codecs=com.hadoop.compression.lzo.LzopCodec ;
set hive.merge.smallfiles.avgsize=134217728;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.join = true ;
set hive.exec.dynamic.partition.mode=nonstrict;
use app;
insert overwrite table app.app_ad_data_market_gdt_virtual_cost partition(dt = '"""+ ht.data_day_str + """')
SELECT
t.division_id_new AS division_id_new,
t.division_name_new AS division_name_new,
t.adv_first_dept_id AS adv_first_dept_id,
t.adv_first_dept_name AS adv_first_dept_name,
t.device_type AS device_type,
t.ad_uin AS ad_uin,
t.jd_pin AS jd_pin,
t.parent_name AS parent_name,
t.url AS url,
t.cost_type AS cost_type,
sum(t.consumption) AS consumption, -- 总消耗金额
sum(t.cash_cost) AS cash_cost, -- 现金消耗金额
sum(t.commission_free_cost) AS commission_free_cost, -- 免佣消耗金额
sum(t.virt_cost) AS virt_cost,
sum(t.t38_money) AS t38_money,
sum(t.t12_money) AS t12_money,
sum(t.t03_money) AS t03_money,
t.cpm_price AS cpm_price,
t.cpc_price AS cpc_price,
sum(t.expose_nums) AS expose_nums,
sum(t.effect_click_nums) AS effect_click_nums,
sum(t.chan_effect_parent_ord_cnt) AS chan_effect_parent_ord_cnt,
sum(t.chan_effect_parent_ord_first_ord_cnt) AS chan_effect_parent_ord_first_ord_cnt,
sum(t.chan_effect_sub_ord_cnt) AS chan_effect_sub_ord_cnt,
sum(t.chan_effect_sub_ord_first_ord_cnt) AS chan_effect_sub_ord_first_ord_cnt,
sum(t.chan_effect_ord_line_cnt) AS chan_effect_ord_line_cnt,
sum(t.chan_effect_ord_first_ord_line_cnt) AS chan_effect_ord_first_ord_line_cnt,
sum(t.chan_effect_ord_line_price) AS chan_effect_ord_line_price,
sum(t.chan_effect_ord_first_ord_line_price) AS chan_effect_ord_first_ord_line_price
FROM (
SELECT
t2.division_id_new AS division_id_new,
t2.division_name_new AS division_name_new,
t2.adv_first_dept_id AS adv_first_dept_id,
t2.adv_first_dept_name AS adv_first_dept_name,
t3.device_type AS device_type,
t1.ad_uin AS ad_uin,
t1.jd_pin AS jd_pin,
t1.parent_name AS parent_name,
t3.url AS url,
t3.cost_type AS cost_type,
sum(t1.cash_cost+t1.virt_cost+t1.subsidy_money+t1.t38_money+t1.t12_money+t1.t03_money) AS consumption, -- 总消耗金额
sum(t1.cash_cost) AS cash_cost, -- 现金消耗金额
sum(t1.commission_free_cost) AS commission_free_cost, -- 免佣消耗金额
sum(t1.virt_cost) AS virt_cost,
sum(t1.t38_money) AS t38_money,
sum(t1.t12_money) AS t12_money,
sum(t1.t03_money) AS t03_money,
t3.cpm_price AS cpm_price,
t3.cpc_price AS cpc_price,
sum(t1.expose_nums) AS expose_nums,
sum(t1.effect_click_nums) AS effect_click_nums,
0.0 AS chan_effect_parent_ord_cnt,
0.0 AS chan_effect_parent_ord_first_ord_cnt,
0.0 AS chan_effect_sub_ord_cnt,
0.0 AS chan_effect_sub_ord_first_ord_cnt,
0.0 AS chan_effect_ord_line_cnt,
0.0 AS chan_effect_ord_first_ord_line_cnt,
0.0 AS chan_effect_ord_line_price,
0.0 AS chan_effect_ord_first_ord_line_price
FROM (
SELECT
ad_uin AS ad_uin,
jd_pin AS jd_pin,
parent_name AS parent_name,
cash_cost AS cash_cost,
virt_cost AS virt_cost,
subsidy_money AS subsidy_money,
t38_money AS t38_money,
t12_money AS t12_money,
t03_money AS t03_money,
commission_free_cost AS commission_free_cost,
expose_nums AS expose_nums,
effect_click_nums AS effect_click_nums,
ad_id AS ad_id
FROM
app.app_szad_w_ads_ad_detail_all_day
WHERE
ftime = '""" + date1 + """'
AND prd_type = 'Operation'
AND parent_name NOT IN('京东自动化投放运营','京东动态创意投放运营','自研中间页投放运营','京东自动化投放运营测试')
) AS t1
LEFT OUTER JOIN (
SELECT
division_id_new AS division_id_new,
division_name_new AS division_name_new,
adv_first_dept_id AS adv_first_dept_id,
adv_first_dept_name AS adv_first_dept_name,
ad_uin AS ad_uin
FROM
dim.dim_jd_advertiser_info
WHERE
dt = '""" + ht.data_day_str + """'
AND dp ='GDT'
) AS t2
ON
t1.ad_uin = t2.ad_uin
LEFT OUTER JOIN (
SELECT
device_type AS device_type,
cost_type AS cost_type,
url AS url,
cpm_price AS cpm_price,
cpc_price AS cpc_price,
ad_id AS ad_id
FROM
fdm.fdm_szad_w_ad_info_day
WHERE
ftime = '""" + date1 + """'
AND platform_type = '京东直投'
AND media_type = '广点通'
) AS t3
ON
t1.ad_id = t3.ad_id
GROUP BY
t2.division_id_new,
t2.division_name_new,
t2.adv_first_dept_id,
t2.adv_first_dept_name,
t3.device_type,
t1.ad_uin,
t1.jd_pin,
t1.parent_name,
t3.url,
t3.cost_type,
t3.cpm_price,
t3.cpc_price
UNION ALL
-- TODO parent_name取值逻辑
SELECT
t5.adv_division_id_new AS adv_division_id_new,
t5.adv_division_name_new AS adv_division_name_new,
t5.adv_first_dept_id AS adv_first_dept_id,
t5.adv_first_dept_name AS adv_first_dept_name,
t4.device_type AS device_type,
t5.ad_uin AS ad_uin,
t5.jd_pin AS jd_pin,
t4.url AS url,
t4.cost_type AS cost_type, -- 扣费类型
0.0 AS consumption,
0.0 AS cash_cost,
0.0 AS commission_free_cost,
0.0 AS virt_cost,
0.0 AS t38_money,
0.0 AS t12_money,
0.0 AS t03_money,
t4.cpm_price AS cpm_price,
t4.cpc_price AS cpc_price,
0 AS expose_nums,
0 AS effect_click_nums,
count(distinct t5.ord_parent_sale_ord_id) AS chan_effect_parent_ord_cnt, --渠道有效订单量(父订单)
count(distinct case when t5.ord_first_ord_flag='0' then t5.ord_parent_sale_ord_id else null end)
as chan_effect_parent_ord_first_ord_cnt, -- 渠道有效首次购订单量(父订单)
count(distinct t5.ord_sale_ord_id) AS chan_effect_sub_ord_cnt, -- 渠道有效订单量(子订单)
count(distinct case when t5.ord_first_ord_flag='0' then t5.ord_sale_ord_id else null end)
as chan_effect_sub_ord_first_ord_cnt, -- 渠道有效首次购订单量(子订单)
count(0) AS chan_effect_ord_line_cnt, -- 渠道有效订单行量(订单行)
count(distinct case when t5.ord_first_ord_flag='0' then 0 else null end) AS chan_effect_ord_first_ord_line_cnt, --渠道有效首次购订单行量(订单行)
sum(t5.ord_after_prefr_amount) as chan_effect_ord_line_price, -- 渠道有效订单金额(订单行)
sum(case when t5.ord_first_ord_flag='0' then t5.ord_after_prefr_amount else 0 end)
as chan_effect_ord_first_ord_line_price -- 渠道有效首次购订单金额(订单行)
FROM (
SELECT
clk_adv_division_id_new AS adv_division_id_new,
clk_adv_division_name_new AS adv_division_name_new,
clk_adv_first_dept_id AS adv_first_dept_id,
clk_adv_first_dept_name AS adv_first_dept_name,
clk_ad_uin AS ad_uin,
clk_advertise_pin AS jd_pin,
ord_parent_sale_ord_id AS ord_parent_sale_ord_id,
ord_first_ord_flag AS ord_first_ord_flag,
ord_sale_ord_id AS ord_sale_ord_id,
ord_after_prefr_amount AS ord_after_prefr_amount,
clk_ad_id AS ad_id
FROM
ad.ad_base_model_m14_plat_chan_cart_ord_click_det
WHERE
dt = '""" + ht.data_day_str + """'
AND dp = 'external_clk'
AND clk_adsystem_type = 'GDT'
AND clk_adv_account_type in ('accountMchVirtual', 'accountTransfer')
AND ord_sale_ord_valid_flag = '1' --有效口径
) AS t5
LEFT OUTER JOIN(
SELECT
device_type AS device_type,
cost_type AS cost_type,
url AS url,
cpm_price AS cpm_price,
cpc_price AS cpc_price,
ad_id AS ad_id
FROM
fdm.fdm_szad_w_ad_info_day
WHERE
ftime = '""" + date1 + """'
AND platform_type = '京东直投'
AND media_type = '广点通'
) AS t4
ON
t5.ad_id = t4.ad_id
GROUP BY
t5.adv_division_id_new,
t5.adv_division_name_new,
t5.adv_first_dept_id,
t5.adv_first_dept_name,
t4.device_type,
t5.ad_uin,
t5.jd_pin,
t4.url,
t4.cost_type,
t4.cpm_price,
t4.cpc_price
) AS t
GROUP BY
t.division_id_new,
t.division_name_new,
t.adv_first_dept_id,
t.adv_first_dept_name,
t.device_type,
t.ad_uin,
t.jd_pin,
t.url,
t.cost_type,
t.cpm_price,
t.cpc_price
;
"""
ht.exec_sql(schema_name = 'app', sql = sql)
以上是关于python buffalo py任务脚本模板的主要内容,如果未能解决你的问题,请参考以下文章