使用 3 个 FOR 循环优化 SQL 查询
Posted
技术标签:
【中文标题】使用 3 个 FOR 循环优化 SQL 查询【英文标题】:Optimize SQL query with 3 FOR loops 【发布时间】:2016-01-05 20:12:27 【问题描述】:我有一个完整的 SQL 查询。但是,它非常非常慢。我正在寻找优化它的方法。
CREATE TABLE trajectory_geom (
id SERIAL PRIMARY KEY,
trajectory_id BIGINT,
user_id BIGINT,
geom GEOMETRY(Linestring, 4326)
);
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326)
FROM point p
GROUP BY p.trajectory_id
;
DO $$
DECLARE
urow record;
vrow record;
wrow record;
BEGIN
FOR wrow IN
SELECT DISTINCT(p.user_id) FROM point p
LOOP
raise notice 'User id: %', wrow.user_id;
FOR vrow IN
SELECT DISTINCT(p.trajectory_id) FROM point p WHERE p.user_id = wrow.user_id
LOOP
FOR urow IN
SELECT
analyzed_tr.*
FROM trajectory_start_end_geom analyzed_tr
WHERE
analyzed_tr.user_id = wrow.user_id
AND
ST_Intersects (
(
analyzed_tr.start_geom
)
,
(
SELECT g.geom
FROM trajectory_geom g
WHERE g.trajectory_id = vrow.trajectory_id
)
) = TRUE
LOOP
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
wrow.user_id
WHERE urow.trajectory_id <> vrow.trajectory_id
;
END LOOP;
END LOOP;
END LOOP;
END;
$$;
它有 3 个循环...我怎样才能避免它们?
基本上,我循环所有用户 ID,每个用户循环所有轨迹并检查轨迹是否与该用户的任何其他轨迹交互。
架构:
CREATE TABLE public.trajectory_start_end_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_start_end_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
start_geom geometry(Polygon,4326),
end_geom geometry(Polygon,4326),
CONSTRAINT trajectory_start_end_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.trajectory_geom
(
id integer NOT NULL DEFAULT nextval('trajectory_geom_id_seq'::regclass),
trajectory_id bigint,
user_id bigint,
geom geometry(LineString,4326),
CONSTRAINT trajectory_geom_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE public.point
(
id integer NOT NULL DEFAULT nextval('point_id_seq'::regclass),
user_id bigint,
date date,
"time" time without time zone,
lat double precision,
lon double precision,
trajectory_id integer,
geom geometry(Geometry,4326),
CONSTRAINT point_pkey PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
【问题讨论】:
为什么不在单个查询而不是过程中进行呢? 其实我想要一个查询。我想出的只是程序 您不需要任何循环,只需加入您需要的表即可。 您可以在您的问题中添加简单的数据库架构吗?我认为您可以使用 JOIN 运算符轻松解决此任务 【参考方案1】:试试这个 SQL 查询。希望这会有所帮助。
INSERT INTO trajectories_intercepting_with_starting_point
(initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
TG.trajectory_id AS first_trajectory_id,
TG2.trajectory_id AS last_trajectory_id,
TG.user_id
FROM Trajectory_geom AS TG
JOIN Trajectory_geom AS TG2 ON TG.user_id = TG2.user_id
AND TG.trajectory_id < TG2.trajectory_id
JOIN Trajectory_start_end_geom AS TSE ON TSE.trajectory_id = TG.trajectory_id
WHERE ST_Intersects(TSE.start_geom, TG2.geom) = TRUE
【讨论】:
为什么需要JOIN Trajectory_geom AS TG2 ON TG.user_id = TG2.user_id AND TG.trajectory_id < TG2.trajectory_id
?
您需要检查每个用户的 track_id 与另一个找到匹配。所以我做了临时表。【参考方案2】:
这应该可以解决问题:
WITH vrow AS(
INSERT INTO trajectory_geom (trajectory_id, user_id, geom)
SELECT
p.trajectory_id,
p.user_id,
ST_Transform(ST_MakeLine(p.geom), 4326) AS geom
FROM point p
GROUP BY p.trajectory_id
RETURNING trajectory_id, user_id, geom
)
INSERT INTO trajectories_intercepting_with_starting_point (initial_trajectory_id, mathced_trajectory_id, user_id)
SELECT
vrow.trajectory_id,
urow.trajectory_id,
vrow.user_id
FROM trajectory_start_end_geom AS urow
JOIN vrow
ON urow.user_id = vrow.user_id
AND urow.trajectory_id <> vrow.trajectory_id
AND ST_Intersects(urow.start_geom, vrow.geom)
如果您不需要插入到 trajectory_geom
中,消除它(和 CTE)会加快速度
【讨论】:
我不熟悉 RETURNING 语法并收到错误:ERROR: missing FROM-clause entry for table "p" LINE 9: RETURNING p.trajectory_id, p.user_id, geom
。
哦,只是为了删除p
。以上是关于使用 3 个 FOR 循环优化 SQL 查询的主要内容,如果未能解决你的问题,请参考以下文章