使基于雪花 Javascript 的过程查询更快
Posted
技术标签:
【中文标题】使基于雪花 Javascript 的过程查询更快【英文标题】:Making Snowflake Javascript based procedure query faster 【发布时间】:2020-06-29 18:54:16 【问题描述】:我有一个存储过程,我刚刚从 PL/SQL 转换为 Snowflake javascript。它每分钟插入大约 100 条记录。总记录数约为 700。因为很难知道 Snowflake 中的问题出在哪里,所以我会随着整体功能的进展插入日志语句。我还将消息推送到在底部返回的数组。但是,我在 PL/SQL 中插入日志表类型的东西,它几乎没有产生性能差异。我承认我的进度加载减慢了进程,但我怀疑它是主要贡献者。
脚本创建一个表格,给定日期,显示它对应的财政季度。这对于未显示的其他查询很有帮助。我有一个简单的循环,从第一季度开始到最后一个季度结束,并将相应的季度放入查找表中。
按照所写的运行需要 9 分钟,但在 Oracle 中,用时不到一秒。
我想知道如何让它运行得更快:
create or replace procedure periodic_load()
RETURNS varchar
LANGUAGE javascript
execute as owner
as
$$
var result = "";
var messages = new Array();
try
/**
Constants shared between functions
*/
var SINGLE_QUOTE_CHAR="'";
var DOUBLE_QUOTE_CHAR="\"";
var COMMA_CHAR=",";
var LEFT_PARENTHESIS="(";
var RIGHT_PARENTHESIS=")";
var ESCAPED_SINGLE_QUOTE_CHAR="\\'";
var ESCAPED_DOUBLE_QUOTE_CHAR="\\\"";
var CONSOLE_LOG_USED = true;
var IS_SNOWFLAKE = false;
/*
Execute Snowflake SQL or simulate the execution thereof
@parmam sqlTextIn,binds...
sqlTextIn: String of the sql command to run.
binds: zero or more parameters to bind to the execution of the command.
*/
function execute_with_log()
var result = null;
messages.push('@@@'+"execute_with_log()");
messages.push('@@@'+"EXECUTE_WITH_LOG(BP1)");
var argumentsArray = Array.prototype.slice.apply(arguments);
var sqlTextIn = argumentsArray[0];
messages.push('@@@'+'EXECUTE_WITH_LOG argument count: '+arguments.length);
if(!IS_SNOWFLAKE)
messages.push('@@@'+ "EXECUTE_WITH_LOG(BP2)");
console.log('SKIPPING SNOWFLAKE SQL: '+sqlTextIn);
else
messages.push('@@@'+ " EXECUTE_WITH_LOG(BP3)");
var statementResult;
var logMessage = sqlTextIn;
if(argumentsArray.length==1)
messages.push('@@@'+ " EXECUTE_WITH_LOG(BP4)");
messages.push('@@@'+" ** NO BIND PARAMETERS DETECTED **");
else
messages.push('@@@'+ " EXECUTE_WITH_LOG(BP5)");
for(var bindParmCounter = 1; bindParmCounter < argumentsArray.length; bindParmCounter++)
messages.push('@@@'+" ,"+argumentsArray[bindParmCounter]);
messages.push('@@@'+ " EXECUTE_WITH_LOG(BP6)");
log_message('I',logMessage);
if(argumentsArray.length===1)
messages.push('@@@'+ " EXECUTE_WITH_LOG(BP7)");
statement = snowflake.createStatement( sqlText: sqlTextIn );
else
messages.push('@@@'+ " EXECUTE_WITH_LOG(BP8)");
var bindsIn = argumentsArray.slice(1,argumentsArray.length);
for(var bindParmCounter = 0; bindParmCounter < bindsIn.length; bindParmCounter++)
messages.push('@@@bindsIn['+bindParmCounter+"]="+bindsIn[bindParmCounter]);
messages.push('@@@bindsIn['+bindParmCounter+"] type ="+bindsIn[bindParmCounter].getName());
statement = snowflake.createStatement(
sqlText: sqlTextIn,
binds: bindsIn
);
messages.push('@@@'+ " EXECUTE_WITH_LOG(BP9) sqlTextIn="+sqlTextIn);
result = statement.execute();
messages.push('@@@'+ " After execute BP10 =");
commit();
messages.push('@@@'+ " After commit BP11 =");
return result;
function commit()
messages.push('@@@'+ " commit");
statement = snowflake.createStatement(
sqlText: 'commit'
);
statement.execute();
return messages;
function log_message(severity,message)
messages.push('@@@'+"log_message(severity,message): severity="+severity+" message="+message);
var result = null;
if(!IS_SNOWFLAKE)
console.log(severity+": "+message);
messages.push('@@@'+severity+": "+message);
else
var record = 'severity': severity,'date_time': value: 'current_timestamp::timestamp_ntz',useQuote:false,message:message;
try
var escapeStep1=message.replaceAll(SINGLE_QUOTE_CHAR,ESCAPED_SINGLE_QUOTE_CHAR);
var escapeStep2=escapeStep1.replaceAll(DOUBLE_QUOTE_CHAR,ESCAPED_DOUBLE_QUOTE_CHAR);
quotedValue=SINGLE_QUOTE_CHAR+escapeStep2+SINGLE_QUOTE_CHAR;
var quotedSeverity = SINGLE_QUOTE_CHAR+severity+SINGLE_QUOTE_CHAR;
var sql_command = "insert into LOG_MESSAGES(severity,date_time,message) values("+quotedSeverity+",current_timestamp::timestamp_ntz,"+quotedValue+")";
statement = snowflake.createStatement( sqlText: sql_command);
var sql_command = "commit";
statement = snowflake.createStatement( sqlText: sql_command);
catch(error)
messages.push('@@@'+'FAILURE: '+error);
return result;
function truncate_table(tableName)
messages.push('@@@'+"(truncate_table()");
var result = execute_with_log("truncate table "+tableName);
messages.push('@@@'+'I','End truncate_table()');
return result;
function fql()
messages.push('@@@'+"begin fql()");
log_message('I','Begin fql()');
var table_name='fiscal_quarter_list';
truncate_table(table_name);
execute(
"insert into fiscal_quarter_list (fiscal_quarter_id,fiscal_quarter_name,fiscal_year,start_date,end_date,last_mod_date_stamp) ("
+" select fiscal_quarter_id,fiscal_quarter_name,fiscal_year,min(start_date) start_date,max(end_date) end_date,current_date from cdw_fiscal_periods cfp"
+" where (cfp.start_date >= add_months(sysdate(),-24) and sysdate() >= cfp.end_date ) or "
+" (cfp.start_date <= sysdate() and sysdate() < cfp.end_date) "
+" group by fiscal_quarter_id,fiscal_quarter_name,fiscal_year "
+" order by fiscal_quarter_id desc "
+" fetch first 8 rows only "
+")"
);
log_message('I','End fql()');
/*
Function to increment a Date object by one standard day
Sourced from https://***.com/questions/563406/add-days-to-javascript-date
*/
function addDaysInJs(dateIn, days)
var result = new Date(dateIn);
result.setDate(result.getDate() + days);
return result;
function dtfq()
messages.push('@@@'+"dtfq()");
tableName = 'date_to_fiscal_quarter';
var firstDate;
var runningDate;
log_message('I','Begin dtfq');
truncate_table(tableName);
var result = null;
var resultSet = execute_with_log(" SELECT FISCAL_QUARTER_ID, FISCAL_QUARTER_NAME,try_to_date(START_DATE) as START_DATE, try_to_date(END_DATE) as END_DATE"
+ " FROM FISCAL_QUARTER_LIST "
+ " ORDER BY START_DATE ");
log_message('D','resultSet ='+resultSet);
log_message('D','resultSet typeof='+typeof resultSet);
while(resultSet.next())
messages.push('@@@'+"bp1 dtfq() loop start_date="+resultSet.getColumnValue("START_DATE")+" end_date="+resultSet.getColumnValue("END_DATE"));
firstDate = resultSet.getColumnValue("START_DATE");
lastDate = resultSet.getColumnValue("END_DATE");
runningDate=new Date(firstDate);
lastDate = new Date(lastDate);
log_message('D','Start date='+firstDate);
while (runningDate <= lastDate)
var fiscalQuarterId=resultSet.getColumnValue("FISCAL_QUARTER_ID")
var fiscalQuarterName=resultSet.getColumnValue("FISCAL_QUARTER_NAME")
messages.push('@@@'+"bp2 dtfq() runningDate="+runningDate+' fiscalQuarterId='+fiscalQuarterId+' fiscalQuarterName='+fiscalQuarterName);
log_message('D','Fiscal quarter id='+fiscalQuarterId);
/*
execute_with_log(" insert into sc_hub_date_to_fiscal_quarter(date_stamp,) "
+" values(try_to_date(?)) "
,runningDate.toISOString());
*/
execute_with_log(" insert into sc_hub_date_to_fiscal_quarter(date_stamp,fiscal_quarter_id,fiscal_quarter_name) "
+" values(?,?,?)"
,runningDate.toISOString()
,fiscalQuarterId
,fiscalQuarterName);
runningDate = addDaysInJs(runningDate, 1);
log_message('I','End dtfq Success');
return result;
/*
Execute Snowflake SQL or simulate the execution thereof
@parmam sqlTextIn,binds...
sqlTextIn: String of the sql command to run.
binds: zero or more parameters to bind to the execution of the command.
*/
function execute()
messages.push('@@@'+"execute():");
var result = null;
var argumentsArray = Array.prototype.slice.apply(arguments);
var sqlTextIn = argumentsArray[0];
if(!IS_SNOWFLAKE)
console.log('SKIPPING SNOWFLAKE SQL: '+sqlTextIn);
messages.push('@@@'+'SKIPPING SNOWFLAKE SQL: '+sqlTextIn);
else
messages.push('@@@'+'USING SNOWFLAKE SQL: '+sqlTextIn);
var statementResult;
if(argumentsArray.length>2)
messages.push('@@@'+'Has bind arguments: ');
var bindsIn = argumentsArray.slice(2,argumentsArray.length);
statement = snowflake.createStatement(
sqlText: sqlTextIn,
binds: bindsIn
);
else
messages.push('@@@'+'Has no bind arguments: ');
messages.push('@@@'+'###sqlText='+sqlTextIn+'###');
statement = snowflake.createStatement( sqlText: sqlTextIn );
result = statement.execute();
messages.push('@@@'+'statement.execute succeeded');
log_message('I',sqlTextIn);
return result;
String.prototype.replaceAll = function(target, replacement)
return this.split(target).join(replacement);
;
Object.prototype.getName = function()
var funcNameRegex = /function (.1,)\(/;
var results = (funcNameRegex).exec((this).constructor.toString());
return (results && results.length > 1) ? results[1] : "";
;
dtfq();
catch(error)
messages.push('@@@'+error);
finally
result = messages.join("\n");
return result;
$$
;
call periodic_load()
【问题讨论】:
看起来好像您正在日志中或其他地方进行逐行插入。无论您是否使用 begin 和 commit 进行包装,性能都会很慢。如果您将它们缓存在一个变量中并将所有插入放在一个事务中,那么性能会更快。如果一次插入的行太多,您可以一次将它们捆绑 1000 或 100 个,具体取决于行的大小。 【参考方案1】:此处并未完全说明用例,但您的存储过程似乎仅针对源表输入行中遇到的每个日期范围生成(分解)并将一系列日期插入表中。
这可以直接使用 SQL (with recursive CTEs) 来实现,这将比线性存储过程迭代更有效地运行:
create table destination_table (fiscal_quarter_id integer, fiscal_quarter_name string, date_stamp date);
insert into destination_table
with source_table(fiscal_quarter_id, fiscal_quarter_name, start_date, end_date) as (
select 1, 'Q1', '2020-01-01'::date, '2020-03-31'::date union all
select 2, 'Q2', '2020-04-01'::date, '2020-06-30'::date union all
select 3, 'Q3', '2020-07-01'::date, '2020-09-30'::date union all
select 4, 'Q4', '2020-10-01'::date, '2020-12-31'::date
), recursive_expand as (
select
fiscal_quarter_id, fiscal_quarter_name, start_date, end_date,
start_date as date_stamp
from source_table
union all
select
fiscal_quarter_id, fiscal_quarter_name, start_date, end_date,
dateadd(day, 1, date_stamp)::date date_stamp
from recursive_expand
where date_stamp < end_date
)
select fiscal_quarter_id, fiscal_quarter_name, date_stamp
from recursive_expand
order by date_stamp asc;
该示例将 366 行插入到涵盖所有四个季度的日期的 destination_table
(2020
是闰年)中。
@Greg Pavlik's comment 解释了为什么存储过程由于执行整个语句而变慢(从雪花查询处理服务中独立提交、编译、计划、执行和返回的每个语句都会增加很多开销)。如果您仍想为您的用例继续使用存储过程 API,一个想法是进行两项特定更改:
-
将所有生成的数据行存储到一个数组中,而不是像这样直接插入它们(由于内存限制,这只适用于几百行,而不是超过):
function dtfq()
var all_rows = [];
// … iteration and other logic here …
all_rows.push([fiscalQuarterId, fiscalQuarterName, runningDate]);
// … iteration and other logic ends here (minus inserts) …
return all_rows;
-
插入使用带有
n
值容器的单个生成的INSERT
语句生成的n
行列表。此类代码的示例can be seen in this answer。
【讨论】:
以上是关于使基于雪花 Javascript 的过程查询更快的主要内容,如果未能解决你的问题,请参考以下文章