如何在表列中访问 SQL 中的 varray 元素

Posted

技术标签:

【中文标题】如何在表列中访问 SQL 中的 varray 元素【英文标题】:How to access varray element in SQL in the table column 【发布时间】:2016-04-27 19:50:22 【问题描述】:

我正在尝试找到一种 简单 方法来访问 SQL 查询 中表列中的 varray 元素。 varray 类型的列 id,但需要将每个元素作为列呈现给数据库客户端。类似于 column_name(1).x、column_name(1).y、..column_name(20).y 我们目前使用一个函数将数组元素作为列返回

create or replace function get_point_x(
  p_graph in graph_t,
  p_point in PLS_INTEGER
)
return number
is
begin
  return p_graph(p_point).x;
exception
when no_data_found then
  return to_number(null);
end get_point_x;
/

但是,每行调用此函数 40 次大约需要查询已用时间的 40%。所以我想知道是否有一种简单有效的 SQL 替代方法来访问 SQL 查询 中的元素。我尝试了下面的方法,但它并不比 PLSQL 函数更有效!?

-- A graph point
CREATE TYPE point_t AS object(
x number(6,0),
y number(6,0)
);
/

-- Graph can contain up to 20 points, no more
CREATE TYPE graph_t AS VARRAY(20) OF point_t;
/

-- Customer graphs
create table customer_graphs (customer_id number(9,0), graph graph_t);

insert into customer_graphs values(1, graph_t(point_t(10,10), point_t(20,20), point_t(30,30)));

insert into customer_graphs values(2, graph_t(point_t(5,5), point_t(10,10), point_t(30,30), point_t(40,31)));

-- That works but returns graph points as rows
-- But I need columns x1, y1, ..x20, y20 
select cg.customer_id, g.* from customer_graphs cg, TABLE(cg.graph) g;

-- Of cource I can pivot but it impacts performance with 40 columns
select 
  customer_id,
  max(
    CASE rn
      WHEN 1
      THEN x
    END
  ) x_1,
  max(
    CASE rn
      WHEN 1
      THEN y
    END
  ) y_1,
  max(
    CASE rn
      WHEN 2
      THEN x
    END
  ) x_2,
  max(
    CASE rn
      WHEN 2
      THEN y
    END
  ) y_2,  
  -- ..
  max(
    CASE rn
      WHEN 20
      THEN x
    END
  ) x_20,
  max(
    CASE rn
      WHEN 20
      THEN y
    END
  ) y_20
from (
  select cg.customer_id, g.*, row_number() over(partition by cg.customer_id order by g.x) rn 
  from 
    customer_graphs cg, 
    TABLE(cg.graph) g
)
group by customer_id
;

-- Is there an easy way to access volumn's varray element in SQL??
-- Something like below
select 
  cg.customer_id, 
  cg.graph,
-- From this line on it does not work  
 cg.graph(1).x x_1,
 cg.graph(1).y y_1,
-- ..
 cg.graph(20).x x_20,
 cg.graph(20).y y_20
from customer_graphs cg;

ORA-00904: "CG"."GRAPH": invalid identifier
00904. 00000 -  "%s: invalid identifier"
*Cause:    
*Action:
Error at Line: 258 Column: 2

是否有一些我缺少的高效 SQL 解决方案?

提前谢谢你

【问题讨论】:

【参考方案1】:

最好的解决方案是删除类型和 VARRAY 并将所有内容存储在普通表中。

如果这不是一个选项,您可以通过将 VARRAY 包装在对象类型中并通过成员函数访问元素来显着提高性能。这种方法比从 VARRAY 旋转结果快几倍。

下面的代码有点痛苦,但它是对 20 列和 100,000 个样本行的完整功能测试。

使用 VARRAY 的示例架构

CREATE TYPE point_t AS object(
x number(6,0),
y number(6,0)
);

-- Graph can contain up to 20 points, no more
CREATE TYPE graph_t AS VARRAY(20) OF point_t;

-- Customer graphs
create table customer_graphs (customer_id number(9,0), graph graph_t);

--100K rows, 5.2 seconds.
begin
    for i in 1 .. 100000 loop
        insert into customer_graphs values(i, graph_t(point_t(1,1),point_t(2,2),point_t(3,3),point_t(4,4),point_t(5,5),point_t(6,6),point_t(7,7),point_t(8,8),point_t(9,9),point_t(10,10),point_t(11,11),point_t(12,12),point_t(13,13),point_t(14,14),point_t(15,15),point_t(16,16),point_t(17,17),point_t(18,18),point_t(19,19),point_t(20,20)));
    end loop;
    commit;
end;
/

begin
    dbms_stats.gather_table_stats(user, 'CUSTOMER_GRAPHS');
end;
/

对象包含 VARRAY 的示例架构

--Create type to store and access graph and X and Y elements.
create or replace type graph_obj as object
(
    graph graph_t,
    member function x(p_index number) return number,
    member function y(p_index number) return number
);

create or replace type body graph_obj is
    member function x(p_index number) return number is
    begin
        return graph(p_index).x;
    end;

    member function y(p_index number) return number is
    begin
        return graph(p_index).y;
    end;
end;
/

-- Customer graphs 2
create table customer_graphs2(customer_id number(9,0), graph graph_obj);

--100K rows, 5.54 seconds.
begin
    for i in 1 .. 100000 loop
        insert into customer_graphs2 values(i, graph_obj(graph_t(point_t(1,1),point_t(2,2),point_t(3,3),point_t(4,4),point_t(5,5),point_t(6,6),point_t(7,7),point_t(8,8),point_t(9,9),point_t(10,10),point_t(11,11),point_t(12,12),point_t(13,13),point_t(14,14),point_t(15,15),point_t(16,16),point_t(17,17),point_t(18,18),point_t(19,19),point_t(20,20))));
    end loop;
    commit;
end;
/

begin
    dbms_stats.gather_table_stats(user, 'CUSTOMER_GRAPHS2');
end;
/

VARRAY PIVOT 性能

前 N 行 - 4.5 秒。

select customer_id,
    max(CASE rn WHEN 1 THEN x END) x_1, max(CASE rn WHEN 1 THEN y END) y_1, max(CASE rn WHEN 2 THEN x END) x_2, max(CASE rn WHEN 2 THEN y END) y_2, max(CASE rn WHEN 3 THEN x END) x_3, max(CASE rn WHEN 3 THEN y END) y_3, max(CASE rn WHEN 4 THEN x END) x_4, max(CASE rn WHEN 4 THEN y END) y_4, max(CASE rn WHEN 5 THEN x END) x_5, max(CASE rn WHEN 5 THEN y END) y_5, max(CASE rn WHEN 6 THEN x END) x_6, max(CASE rn WHEN 6 THEN y END) y_6, max(CASE rn WHEN 7 THEN x END) x_7, max(CASE rn WHEN 7 THEN y END) y_7, max(CASE rn WHEN 8 THEN x END) x_8, max(CASE rn WHEN 8 THEN y END) y_8, max(CASE rn WHEN 9 THEN x END) x_9, max(CASE rn WHEN 9 THEN y END) y_9, max(CASE rn WHEN 10 THEN x END) x_10, max(CASE rn WHEN 10 THEN y END) y_10, max(CASE rn WHEN 11 THEN x END) x_11, max(CASE rn WHEN 11 THEN y END) y_11, max(CASE rn WHEN 12 THEN x END) x_12, max(CASE rn WHEN 12 THEN y END) y_12, max(CASE rn WHEN 13 THEN x END) x_13, max(CASE rn WHEN 13 THEN y END) y_13, max(CASE rn WHEN 14 THEN x END) x_14, max(CASE rn WHEN 14 THEN y END) y_14, max(CASE rn WHEN 15 THEN x END) x_15, max(CASE rn WHEN 15 THEN y END) y_15, max(CASE rn WHEN 16 THEN x END) x_16, max(CASE rn WHEN 16 THEN y END) y_16, max(CASE rn WHEN 17 THEN x END) x_17, max(CASE rn WHEN 17 THEN y END) y_17, max(CASE rn WHEN 18 THEN x END) x_18, max(CASE rn WHEN 18 THEN y END) y_18, max(CASE rn WHEN 19 THEN x END) x_19, max(CASE rn WHEN 19 THEN y END) y_19, max(CASE rn WHEN 20 THEN x END) x_20, max(CASE rn WHEN 20 THEN y END) y_20
from (
  select cg.customer_id, g.*, row_number() over(partition by cg.customer_id order by g.x) rn 
  from 
    customer_graphs cg, 
    TABLE(cg.graph) g
)
group by customer_id;

所有行 - 17 秒

select sum(x_1) x
from
(
    select customer_id,
        max(CASE rn WHEN 1 THEN x END) x_1, max(CASE rn WHEN 1 THEN y END) y_1, max(CASE rn WHEN 2 THEN x END) x_2, max(CASE rn WHEN 2 THEN y END) y_2, max(CASE rn WHEN 3 THEN x END) x_3, max(CASE rn WHEN 3 THEN y END) y_3, max(CASE rn WHEN 4 THEN x END) x_4, max(CASE rn WHEN 4 THEN y END) y_4, max(CASE rn WHEN 5 THEN x END) x_5, max(CASE rn WHEN 5 THEN y END) y_5, max(CASE rn WHEN 6 THEN x END) x_6, max(CASE rn WHEN 6 THEN y END) y_6, max(CASE rn WHEN 7 THEN x END) x_7, max(CASE rn WHEN 7 THEN y END) y_7, max(CASE rn WHEN 8 THEN x END) x_8, max(CASE rn WHEN 8 THEN y END) y_8, max(CASE rn WHEN 9 THEN x END) x_9, max(CASE rn WHEN 9 THEN y END) y_9, max(CASE rn WHEN 10 THEN x END) x_10, max(CASE rn WHEN 10 THEN y END) y_10, max(CASE rn WHEN 11 THEN x END) x_11, max(CASE rn WHEN 11 THEN y END) y_11, max(CASE rn WHEN 12 THEN x END) x_12, max(CASE rn WHEN 12 THEN y END) y_12, max(CASE rn WHEN 13 THEN x END) x_13, max(CASE rn WHEN 13 THEN y END) y_13, max(CASE rn WHEN 14 THEN x END) x_14, max(CASE rn WHEN 14 THEN y END) y_14, max(CASE rn WHEN 15 THEN x END) x_15, max(CASE rn WHEN 15 THEN y END) y_15, max(CASE rn WHEN 16 THEN x END) x_16, max(CASE rn WHEN 16 THEN y END) y_16, max(CASE rn WHEN 17 THEN x END) x_17, max(CASE rn WHEN 17 THEN y END) y_17, max(CASE rn WHEN 18 THEN x END) x_18, max(CASE rn WHEN 18 THEN y END) y_18, max(CASE rn WHEN 19 THEN x END) x_19, max(CASE rn WHEN 19 THEN y END) y_19, max(CASE rn WHEN 20 THEN x END) x_20, max(CASE rn WHEN 20 THEN y END) y_20
    from (
      select cg.customer_id, g.*, row_number() over(partition by cg.customer_id order by g.x) rn 
      from 
        customer_graphs cg, 
        TABLE(cg.graph) g
    )
    group by customer_id
);

对象性能

前 N 行 - 0.4 秒

select cg.customer_id, cg.graph.x(1) x_1, cg.graph.y(1) y_1, cg.graph.x(2) x_2, cg.graph.y(2) y_2, cg.graph.x(3) x_3, cg.graph.y(3) y_3, cg.graph.x(4) x_4, cg.graph.y(4) y_4, cg.graph.x(5) x_5, cg.graph.y(5) y_5, cg.graph.x(6) x_6, cg.graph.y(6) y_6, cg.graph.x(7) x_7, cg.graph.y(7) y_7, cg.graph.x(8) x_8, cg.graph.y(8) y_8, cg.graph.x(9) x_9, cg.graph.y(9) y_9, cg.graph.x(10) x_10, cg.graph.y(10) y_10, cg.graph.x(11) x_11, cg.graph.y(11) y_11, cg.graph.x(12) x_12, cg.graph.y(12) y_12, cg.graph.x(13) x_13, cg.graph.y(13) y_13, cg.graph.x(14) x_14, cg.graph.y(14) y_14, cg.graph.x(15) x_15, cg.graph.y(15) y_15, cg.graph.x(16) x_16, cg.graph.y(16) y_16, cg.graph.x(17) x_17, cg.graph.y(17) y_17, cg.graph.x(18) x_18, cg.graph.y(18) y_18, cg.graph.x(19) x_19, cg.graph.y(19) y_19, cg.graph.x(20) x_20, cg.graph.y(20) y_20
from customer_graphs2 cg;

所有行 - 2.5 秒

select sum(x_1)
from
(
    select cg.customer_id, cg.graph.x(1) x_1, cg.graph.y(1) y_1, cg.graph.x(2) x_2, cg.graph.y(2) y_2, cg.graph.x(3) x_3, cg.graph.y(3) y_3, cg.graph.x(4) x_4, cg.graph.y(4) y_4, cg.graph.x(5) x_5, cg.graph.y(5) y_5, cg.graph.x(6) x_6, cg.graph.y(6) y_6, cg.graph.x(7) x_7, cg.graph.y(7) y_7, cg.graph.x(8) x_8, cg.graph.y(8) y_8, cg.graph.x(9) x_9, cg.graph.y(9) y_9, cg.graph.x(10) x_10, cg.graph.y(10) y_10, cg.graph.x(11) x_11, cg.graph.y(11) y_11, cg.graph.x(12) x_12, cg.graph.y(12) y_12, cg.graph.x(13) x_13, cg.graph.y(13) y_13, cg.graph.x(14) x_14, cg.graph.y(14) y_14, cg.graph.x(15) x_15, cg.graph.y(15) y_15, cg.graph.x(16) x_16, cg.graph.y(16) y_16, cg.graph.x(17) x_17, cg.graph.y(17) y_17, cg.graph.x(18) x_18, cg.graph.y(18) y_18, cg.graph.x(19) x_19, cg.graph.y(19) y_19, cg.graph.x(20) x_20, cg.graph.y(20) y_20
    from customer_graphs2 cg
);

【讨论】:

感谢您的宝贵时间和有趣的想法。您的回答帮助我找到了性能问题的真正罪魁祸首。结果是其他 pl/sql 函数消耗了 35% 的查询运行时间。我查看了 graph_obj 成员函数并意识到它们与原始 pl/sql 函数基本相同,并且您证明了您的方法是有效的。非常感谢你的帮助!一旦我修复了其他 pl/sql 函数,很明显,关键方法比 pl/sql 或对象更快。附:我发布了一个答案,因为我无法将我的发现纳入此评论。【参考方案2】:

设置

CREATE TYPE point_t AS object(
x number(6,0),
y number(6,0)
);
/
-- Graph can contain up to 20 points, no more
CREATE TYPE graph_t AS VARRAY(20) OF point_t;
/
-- Customer graphs
create table customer_graphs (customer_id number(9,0), graph graph_t);

BEGIN

  FOR i IN 1 .. 100000
  LOOP
    -- build graph as it may contain up to 20 points

    DECLARE
      lv_graph graph_t := graph_t();
    BEGIN

      FOR j IN 1..mod( i, 20 ) + 1
      LOOP

        lv_graph.extend( );
        lv_graph( j ) := point_t( i, i );

      END LOOP;

      INSERT INTO customer_graphs VALUES ( i, lv_graph);

    END;

  END LOOP;

  COMMIT;

END;
/

begin
    dbms_stats.gather_table_stats(user, 'CUSTOMER_GRAPHS');
end;
/

原始 PL/SQL 函数

create or replace function x(p_graph graph_t, p_index number) return number is
    begin
        if p_graph.exists(p_index) then
          return p_graph(p_index).x;
        else
          return to_number(null);
        end if;
    end;
/    

create or replace function y(p_graph graph_t, p_index number) return number is
    begin
      if p_graph.exists(p_index) then
        return p_graph(p_index).y;
      else
          return to_number(null);
      end if;
    end;
/

Jon Heller 建议(稍作修改)

我添加了对 NULL 值的测试,因为图表并不总是包含所有 20 个点。

create or replace type graph_obj as object
(
    graph graph_t,
    member function x(p_index number) return number,
    member function y(p_index number) return number
);
/

create or replace type body graph_obj is
    member function x(p_index number) return number is
    begin
        if graph.exists(p_index) then
          return graph(p_index).x;
        else
          return to_number(null);
        end if;
    end;

    member function y(p_index number) return number is
    begin
      if graph.exists(p_index) then
        return graph(p_index).y;
      else
          return to_number(null);
      end if;
    end;
end;
/

这个应用程序非常老旧,被许多其他应用程序使用。我正在为另一个应用程序编写一个界面。我可以看到这个应用程序是如何随着时间的推移而开发的),现在是很好的旧关系表。我同意最好使用普通的关系表。

我无法修改应用程序。所以我动态创建了 graph_obj 对象。我需要所有行。我使用

在 SQL*Plus 中运行查询
set timing on
set arraysize 5000
set autotrace TRACEONLY statistics

我确实不使用 sum(x_1) 技巧,因为 Oracle 很聪明不会执行其他 39 次调用来计算 y_1,..,x_20,y_20。这就是为什么对象方法似乎比枢轴方法更快的原因。

VARRAY Pivot 性能

-- Pivot 13 secs
select customer_id,
        max(CASE rn WHEN 1 THEN x END) x_1, max(CASE rn WHEN 1 THEN y END) y_1, max(CASE rn WHEN 2 THEN x END) x_2, max(CASE rn WHEN 2 THEN y END) y_2, max(CASE rn WHEN 3 THEN x END) x_3, max(CASE rn WHEN 3 THEN y END) y_3, max(CASE rn WHEN 4 THEN x END) x_4, max(CASE rn WHEN 4 THEN y END) y_4, max(CASE rn WHEN 5 THEN x END) x_5, max(CASE rn WHEN 5 THEN y END) y_5, max(CASE rn WHEN 6 THEN x END) x_6, max(CASE rn WHEN 6 THEN y END) y_6, max(CASE rn WHEN 7 THEN x END) x_7, max(CASE rn WHEN 7 THEN y END) y_7, max(CASE rn WHEN 8 THEN x END) x_8, max(CASE rn WHEN 8 THEN y END) y_8, max(CASE rn WHEN 9 THEN x END) x_9, max(CASE rn WHEN 9 THEN y END) y_9, max(CASE rn WHEN 10 THEN x END) x_10, max(CASE rn WHEN 10 THEN y END) y_10, max(CASE rn WHEN 11 THEN x END) x_11, max(CASE rn WHEN 11 THEN y END) y_11, max(CASE rn WHEN 12 THEN x END) x_12, max(CASE rn WHEN 12 THEN y END) y_12, max(CASE rn WHEN 13 THEN x END) x_13, max(CASE rn WHEN 13 THEN y END) y_13, max(CASE rn WHEN 14 THEN x END) x_14, max(CASE rn WHEN 14 THEN y END) y_14, max(CASE rn WHEN 15 THEN x END) x_15, max(CASE rn WHEN 15 THEN y END) y_15, max(CASE rn WHEN 16 THEN x END) x_16, max(CASE rn WHEN 16 THEN y END) y_16, max(CASE rn WHEN 17 THEN x END) x_17, max(CASE rn WHEN 17 THEN y END) y_17, max(CASE rn WHEN 18 THEN x END) x_18, max(CASE rn WHEN 18 THEN y END) y_18, max(CASE rn WHEN 19 THEN x END) x_19, max(CASE rn WHEN 19 THEN y END) y_19, max(CASE rn WHEN 20 THEN x END) x_20, max(CASE rn WHEN 20 THEN y END) y_20
    from (
      select cg.customer_id, g.*, row_number() over(partition by cg.customer_id order by g.x) rn 
      from 
        customer_graphs cg, 
        TABLE(cg.graph) g
    )
    group by customer_id;

原始 PLSQL 函数性能

-- PLSQL 75 secs. 
select cg.customer_id, x(cg.graph,1) x_1, y(cg.graph,1) y_1, x(cg.graph,2) x_2, y(cg.graph,2) y_2, x(cg.graph,3) x_3, y(cg.graph,3) y_3, x(cg.graph,4) x_4, y(cg.graph,4) y_4, x(cg.graph,5) x_5, y(cg.graph,5) y_5, x(cg.graph,6) x_6, y(cg.graph,6) y_6, x(cg.graph,7) x_7, y(cg.graph,7) y_7, x(cg.graph,8) x_8, y(cg.graph,8) y_8, x(cg.graph,9) x_9, y(cg.graph,9) y_9, x(cg.graph,10) x_10, y(cg.graph,10) y_10, x(cg.graph,11) x_11, y(cg.graph,11) y_11, x(cg.graph,12) x_12, y(cg.graph,12) y_12, x(cg.graph,13) x_13, y(cg.graph,13) y_13, x(cg.graph,14) x_14, y(cg.graph,14) y_14, x(cg.graph,15) x_15, y(cg.graph,15) y_15, x(cg.graph,16) x_16, y(cg.graph,16) y_16, x(cg.graph,17) x_17, y(cg.graph,17) y_17, x(cg.graph,18) x_18, y(cg.graph,18) y_18, x(cg.graph,19) x_19, y(cg.graph,19) y_19, x(cg.graph,20) x_20, y(cg.graph,20) y_20
    from customer_graphs cg;

对象性能

-- Object 83 secs, 6 times slower than pivot query
-- I assume that additional 8 secs were caused by creating objects on the fly

    select cg.customer_id, cg.graph.x(1) x_1, cg.graph.y(1) y_1, cg.graph.x(2) x_2, cg.graph.y(2) y_2, cg.graph.x(3) x_3, cg.graph.y(3) y_3, cg.graph.x(4) x_4, cg.graph.y(4) y_4, cg.graph.x(5) x_5, cg.graph.y(5) y_5, cg.graph.x(6) x_6, cg.graph.y(6) y_6, cg.graph.x(7) x_7, cg.graph.y(7) y_7, cg.graph.x(8) x_8, cg.graph.y(8) y_8, cg.graph.x(9) x_9, cg.graph.y(9) y_9, cg.graph.x(10) x_10, cg.graph.y(10) y_10, cg.graph.x(11) x_11, cg.graph.y(11) y_11, cg.graph.x(12) x_12, cg.graph.y(12) y_12, cg.graph.x(13) x_13, cg.graph.y(13) y_13, cg.graph.x(14) x_14, cg.graph.y(14) y_14, cg.graph.x(15) x_15, cg.graph.y(15) y_15, cg.graph.x(16) x_16, cg.graph.y(16) y_16, cg.graph.x(17) x_17, cg.graph.y(17) y_17, cg.graph.x(18) x_18, cg.graph.y(18) y_18, cg.graph.x(19) x_19, cg.graph.y(19) y_19, cg.graph.x(20) x_20, cg.graph.y(20) y_20
    from (
    select cgi.customer_id, graph_obj(cgi.graph) graph
    from customer_graphs cgi
    ) cg
;

【讨论】:

以上是关于如何在表列中访问 SQL 中的 varray 元素的主要内容,如果未能解决你的问题,请参考以下文章

如何在代码中的sql表列中插入默认值

如何存储和调用表列中的 sql 查询?

另一个表列中多个值条件的SQL命令错误

访问可变数组列中的第二个元素

在表列中查找值并返回表行号 - VBA

如何通过 sql loader 设置要加载到表列中的固定长度