使基于雪花 Javascript 的过程查询更快

Posted

技术标签:

【中文标题】使基于雪花 Javascript 的过程查询更快【英文标题】:Making Snowflake Javascript based procedure query faster 【发布时间】:2020-06-29 18:54:16 【问题描述】:

我有一个存储过程,我刚刚从 PL/SQL 转换为 Snowflake javascript。它每分钟插入大约 100 条记录。总记录数约为 700。因为很难知道 Snowflake 中的问题出在哪里,所以我会随着整体功能的进展插入日志语句。我还将消息推送到在底部返回的数组。但是,我在 PL/SQL 中插入日志表类型的东西,它几乎没有产生性能差异。我承认我的进度加载减慢了进程,但我怀疑它是主要贡献者。

脚本创建一个表格,给定日期,显示它对应的财政季度。这对于未显示的其他查询很有帮助。我有一个简单的循环,从第一季度开始到最后一个季度结束,并将相应的季度放入查找表中。

按照所写的运行需要 9 分钟,但在 Oracle 中,用时不到一秒。

我想知道如何让它运行得更快:

create or replace procedure periodic_load()
    RETURNS varchar
    LANGUAGE javascript
    execute as owner 
    as 
    $$
    var result = "";
    var messages = new Array();
    try 
        /**
        Constants shared between functions
        */
        var SINGLE_QUOTE_CHAR="'";
        var DOUBLE_QUOTE_CHAR="\"";
        var COMMA_CHAR=",";
        var LEFT_PARENTHESIS="(";
        var RIGHT_PARENTHESIS=")";
        var ESCAPED_SINGLE_QUOTE_CHAR="\\'";
        var ESCAPED_DOUBLE_QUOTE_CHAR="\\\"";
        var CONSOLE_LOG_USED = true;
        var IS_SNOWFLAKE = false; 


        /*
        Execute Snowflake SQL or simulate the execution thereof
        @parmam  sqlTextIn,binds...
        sqlTextIn: String of the sql command to run.
        binds: zero or more parameters to bind to the execution of the command.
        */
        function execute_with_log() 
            var result = null;
            messages.push('@@@'+"execute_with_log()");
            messages.push('@@@'+"EXECUTE_WITH_LOG(BP1)");

            var argumentsArray = Array.prototype.slice.apply(arguments); 
            var sqlTextIn = argumentsArray[0];
            messages.push('@@@'+'EXECUTE_WITH_LOG argument count: '+arguments.length);
            if(!IS_SNOWFLAKE)  
                messages.push('@@@'+ "EXECUTE_WITH_LOG(BP2)");
                console.log('SKIPPING SNOWFLAKE SQL: '+sqlTextIn);
             else 
                messages.push('@@@'+ " EXECUTE_WITH_LOG(BP3)");
                var statementResult;
                var logMessage = sqlTextIn;
                if(argumentsArray.length==1) 
                    messages.push('@@@'+ " EXECUTE_WITH_LOG(BP4)");
                    messages.push('@@@'+" ** NO BIND PARAMETERS DETECTED **");
                 else 
                    messages.push('@@@'+ " EXECUTE_WITH_LOG(BP5)");
                    for(var bindParmCounter = 1; bindParmCounter < argumentsArray.length; bindParmCounter++) 
                        messages.push('@@@'+" ,"+argumentsArray[bindParmCounter]);    
                    
                
                messages.push('@@@'+ " EXECUTE_WITH_LOG(BP6)");
                log_message('I',logMessage);
                if(argumentsArray.length===1) 
                    messages.push('@@@'+ " EXECUTE_WITH_LOG(BP7)");
                    statement = snowflake.createStatement(  sqlText: sqlTextIn );
                 else 
                    messages.push('@@@'+ " EXECUTE_WITH_LOG(BP8)");
                    var bindsIn = argumentsArray.slice(1,argumentsArray.length);
                    for(var bindParmCounter = 0; bindParmCounter < bindsIn.length; bindParmCounter++) 
                        messages.push('@@@bindsIn['+bindParmCounter+"]="+bindsIn[bindParmCounter]);    
                        messages.push('@@@bindsIn['+bindParmCounter+"] type ="+bindsIn[bindParmCounter].getName());    
                    
                    statement = snowflake.createStatement(
                        
                            sqlText: sqlTextIn,
                            binds: bindsIn
                        
                    );
                

                messages.push('@@@'+ " EXECUTE_WITH_LOG(BP9) sqlTextIn="+sqlTextIn);
                result = statement.execute();
                messages.push('@@@'+ " After execute BP10 =");
                commit();
                messages.push('@@@'+ " After commit BP11 =");

            
            return result;
        

        function commit() 
            messages.push('@@@'+ " commit");
            statement = snowflake.createStatement(
                
                    sqlText: 'commit'
                
            );
            statement.execute();
            return messages;
        

        function log_message(severity,message) 
            messages.push('@@@'+"log_message(severity,message): severity="+severity+" message="+message);
            var result = null;
            if(!IS_SNOWFLAKE) 
                console.log(severity+": "+message);
                messages.push('@@@'+severity+": "+message);
             else 
                var record = 'severity': severity,'date_time': value: 'current_timestamp::timestamp_ntz',useQuote:false,message:message;
                try 
                    var escapeStep1=message.replaceAll(SINGLE_QUOTE_CHAR,ESCAPED_SINGLE_QUOTE_CHAR);
                    var escapeStep2=escapeStep1.replaceAll(DOUBLE_QUOTE_CHAR,ESCAPED_DOUBLE_QUOTE_CHAR);
                    quotedValue=SINGLE_QUOTE_CHAR+escapeStep2+SINGLE_QUOTE_CHAR;

                    var quotedSeverity = SINGLE_QUOTE_CHAR+severity+SINGLE_QUOTE_CHAR;

                    var sql_command = "insert into LOG_MESSAGES(severity,date_time,message) values("+quotedSeverity+",current_timestamp::timestamp_ntz,"+quotedValue+")";
                    statement = snowflake.createStatement(  sqlText: sql_command);

                    var sql_command = "commit";
                    statement = snowflake.createStatement(  sqlText: sql_command);
                 catch(error) 
                    messages.push('@@@'+'FAILURE: '+error);
                
            
            return result;
        

        function truncate_table(tableName) 
            messages.push('@@@'+"(truncate_table()");
            var result = execute_with_log("truncate table "+tableName); 
            messages.push('@@@'+'I','End truncate_table()');
            return result;
        

        function fql() 
            messages.push('@@@'+"begin fql()");
            log_message('I','Begin fql()');
            var table_name='fiscal_quarter_list';
            truncate_table(table_name);
            execute(
                "insert into fiscal_quarter_list (fiscal_quarter_id,fiscal_quarter_name,fiscal_year,start_date,end_date,last_mod_date_stamp) ("
                +"    select fiscal_quarter_id,fiscal_quarter_name,fiscal_year,min(start_date) start_date,max(end_date) end_date,current_date from cdw_fiscal_periods cfp"
                +"    where (cfp.start_date >= add_months(sysdate(),-24) and  sysdate() >= cfp.end_date ) or "
                +"          (cfp.start_date <= sysdate() and sysdate() < cfp.end_date)  "
                +"    group by fiscal_quarter_id,fiscal_quarter_name,fiscal_year "
                +"    order by fiscal_quarter_id desc "
                +"    fetch first 8 rows only "
                +")"
            );
            log_message('I','End fql()');
        


        /*
        Function to increment a Date object by one standard day
        Sourced from https://***.com/questions/563406/add-days-to-javascript-date
        */
        function addDaysInJs(dateIn, days) 
            var result = new Date(dateIn);
            result.setDate(result.getDate() + days);
            return result;
        


        function dtfq()  
            messages.push('@@@'+"dtfq()");
            tableName = 'date_to_fiscal_quarter';
            var firstDate;
            var runningDate;

            log_message('I','Begin dtfq');
            truncate_table(tableName);
            var result = null;
            var resultSet = execute_with_log(" SELECT FISCAL_QUARTER_ID, FISCAL_QUARTER_NAME,try_to_date(START_DATE) as START_DATE, try_to_date(END_DATE)  as END_DATE"
                                                    + " FROM FISCAL_QUARTER_LIST "
                                                    + " ORDER BY START_DATE  ");

            log_message('D','resultSet ='+resultSet);
            log_message('D','resultSet typeof='+typeof resultSet);
            while(resultSet.next()) 
                messages.push('@@@'+"bp1 dtfq() loop start_date="+resultSet.getColumnValue("START_DATE")+" end_date="+resultSet.getColumnValue("END_DATE"));
                firstDate = resultSet.getColumnValue("START_DATE");
                lastDate = resultSet.getColumnValue("END_DATE");
                runningDate=new Date(firstDate);
                lastDate = new Date(lastDate);
                log_message('D','Start date='+firstDate);
                while (runningDate <= lastDate) 
                    var fiscalQuarterId=resultSet.getColumnValue("FISCAL_QUARTER_ID")
                    var fiscalQuarterName=resultSet.getColumnValue("FISCAL_QUARTER_NAME")
                    messages.push('@@@'+"bp2 dtfq() runningDate="+runningDate+' fiscalQuarterId='+fiscalQuarterId+' fiscalQuarterName='+fiscalQuarterName);
                    log_message('D','Fiscal quarter id='+fiscalQuarterId);
                    /*
                    execute_with_log(" insert into sc_hub_date_to_fiscal_quarter(date_stamp,) "
                                            +" values(try_to_date(?)) "
                                            ,runningDate.toISOString());
                                            */
                    execute_with_log(" insert into sc_hub_date_to_fiscal_quarter(date_stamp,fiscal_quarter_id,fiscal_quarter_name) "
                                            +" values(?,?,?)"
                                            ,runningDate.toISOString()
                                            ,fiscalQuarterId
                                            ,fiscalQuarterName);
                                         
                    runningDate = addDaysInJs(runningDate, 1);
                

            

            log_message('I','End dtfq Success');
            return result;
        

        /*
        Execute Snowflake SQL or simulate the execution thereof
        @parmam  sqlTextIn,binds...
        sqlTextIn: String of the sql command to run.
        binds: zero or more parameters to bind to the execution of the command.
        */
        function execute() 
            messages.push('@@@'+"execute():");
       
            var result = null;
            var argumentsArray = Array.prototype.slice.apply(arguments); 
            var sqlTextIn = argumentsArray[0];
            if(!IS_SNOWFLAKE)  
                console.log('SKIPPING SNOWFLAKE SQL: '+sqlTextIn);
                messages.push('@@@'+'SKIPPING SNOWFLAKE SQL: '+sqlTextIn);
             else 
                messages.push('@@@'+'USING SNOWFLAKE SQL: '+sqlTextIn);
                var statementResult;
                if(argumentsArray.length>2) 
                    messages.push('@@@'+'Has bind arguments: ');
                    var bindsIn = argumentsArray.slice(2,argumentsArray.length);
                    statement = snowflake.createStatement(
                        
                        sqlText: sqlTextIn,
                        binds: bindsIn
                        
                    );
                 else 
                    messages.push('@@@'+'Has no bind arguments: ');
                    messages.push('@@@'+'###sqlText='+sqlTextIn+'###');
                    statement = snowflake.createStatement(  sqlText: sqlTextIn );
                
                result = statement.execute();
                messages.push('@@@'+'statement.execute succeeded');
                log_message('I',sqlTextIn);
            
            return result;
        
        

        String.prototype.replaceAll = function(target, replacement) 
          return this.split(target).join(replacement);
        ;

        Object.prototype.getName = function()  
           var funcNameRegex = /function (.1,)\(/;
           var results = (funcNameRegex).exec((this).constructor.toString());
           return (results && results.length > 1) ? results[1] : "";
        ;

        dtfq(); 
     catch(error) 
        messages.push('@@@'+error);
     finally 
        result = messages.join("\n");
    

    

    return result;
    $$
    ;


call periodic_load()




【问题讨论】:

看起来好像您正在日志中或其他地方进行逐行插入。无论您是否使用 begin 和 commit 进行包装,性能都会很慢。如果您将它们缓存在一个变量中并将所有插入放在一个事务中,那么性能会更快。如果一次插入的行太多,您可以一次将它们捆绑 1000 或 100 个,具体取决于行的大小。 【参考方案1】:

此处并未完全说明用例,但您的存储过程似乎仅针对源表输入行中遇到的每个日期范围生成(分解)并将一系列日期插入表中。

这可以直接使用 SQL (with recursive CTEs) 来实现,这将比线性存储过程迭代更有效地运行:

create table destination_table (fiscal_quarter_id integer, fiscal_quarter_name string, date_stamp date);

insert into destination_table
with source_table(fiscal_quarter_id, fiscal_quarter_name, start_date, end_date) as (

  select 1, 'Q1', '2020-01-01'::date, '2020-03-31'::date union all
  select 2, 'Q2', '2020-04-01'::date, '2020-06-30'::date union all
  select 3, 'Q3', '2020-07-01'::date, '2020-09-30'::date union all
  select 4, 'Q4', '2020-10-01'::date, '2020-12-31'::date

), recursive_expand as (

  select
    fiscal_quarter_id, fiscal_quarter_name, start_date, end_date,
    start_date as date_stamp
  from source_table
  
  union all
  
  select 
    fiscal_quarter_id, fiscal_quarter_name, start_date, end_date,
    dateadd(day, 1, date_stamp)::date date_stamp
  from recursive_expand
  where date_stamp < end_date

)

select fiscal_quarter_id, fiscal_quarter_name, date_stamp
from recursive_expand
order by date_stamp asc;

该示例将 366 行插入到涵盖所有四个季度的日期的 destination_table2020 是闰年)中。

@Greg Pavlik's comment 解释了为什么存储过程由于执行整个语句而变慢(从雪花查询处理服务中独立提交、编译、计划、执行和返回的每个语句都会增加很多开销)。如果您仍想为您的用例继续使用存储过程 API,一个想法是进行两项特定更改:

    将所有生成的数据行存储到一个数组中,而不是像这样直接插入它们(由于内存限制,这只适用于几百行,而不是超过):
function dtfq() 
  var all_rows = [];

  // … iteration and other logic here …
      all_rows.push([fiscalQuarterId, fiscalQuarterName, runningDate]);
  // … iteration and other logic ends here (minus inserts) …

  return all_rows;

    插入使用带有n 值容器的单个生成的INSERT 语句生成的n 行列表。此类代码的示例can be seen in this answer。

【讨论】:

以上是关于使基于雪花 Javascript 的过程查询更快的主要内容,如果未能解决你的问题,请参考以下文章

C# 开源一个新的雪花算法

外部表的雪花外部阶段文件推荐

具有高基数的雪花性能调优列

使这个查询更快

需要从 JAVA UDF 连接雪花表

雪花中的 UDF JavaScript 实现