SQL Server 图形数据库 - 使用多种边类型的最短路径

Posted

技术标签:

【中文标题】SQL Server 图形数据库 - 使用多种边类型的最短路径【英文标题】:SQL Server Graph Database - shortest path using multiple edge types 【发布时间】:2021-05-09 16:36:25 【问题描述】:

我已经对 SQL Server GraphDB 进行了研究,但到目前为止我发现的所有人为示例都只使用了一个边表。例如,它总是Person-friend_of->Person。就我而言,我已经在我们的数据中心创建了一个已部署软件组件的图表,并且存在不同的边缘/关系。 Application-connects_to->SqlDatabaseServer-hosts->Application 之类的东西。

我想编写一个查询,它将显示任意两个节点之间的最短路径,而不管使用的边是什么。我想如果我使用的是 Neo4j,我会将MATCH 写为:

Server-*->SqlDatabase 注意星号。

在 SQL Server 中是否有惯用的方法来执行此操作?

【问题讨论】:

【参考方案1】:

自 SQL Server 2019 起,您可以使用派生表或视图完全做到这一点。我找不到任何有关此功能的官方文档,但我在video about Bill of Materials 中找到了一个小注释。

编辑:他们在该视频中有一些链接,但我们只需要关注这个Github example。

关键是您使用多个EDGE(或NODE)表的UNION ALL 作为一个EDGE(或MATCH 运算符中的 em>NODE) 表。

如果您使用子选择,您应该使用视图,它并不总是按预期工作(见下文) 您可以使用 SUBSELECT,但您将无法在聚合函数中使用子选择的列(这可能是可能的,但它并不易于使用,而且绝对没有文档记录) 你可以使用公用表表达式(虽然我没有特别努力,但我无法让它工作)

示例

本例使用heterogenous节点视图和heterogenous边视图。它还描述了一个专业(我会称之为错误,但它可能是一个特性,这是 M$ 需要回答的问题)。如果您正在寻找两个 heterogenous 节点之间的最短路径,则它们都必须是 heterogenous。如果您从 特定 节点开始,然后继续进行 heterogenous 节点,则无论出于何种原因,该算法都能够在距离起始节点仅一条边的位置遍历图形。

BEGIN TRANSACTION
GO

CREATE TABLE graph.SmallCities (Name varchar(1000) COLLATE Czech_100_CI_AI_SC_UTF8, SmallCity_ID INTEGER IDENTITY(666,666) PRIMARY KEY) AS NODE;
CREATE TABLE graph.LargeCities (Name varchar(1000) COLLATE Czech_100_CI_AI_SC_UTF8, LargeCity_ID INTEGER IDENTITY(666,666) PRIMARY KEY) AS NODE;
CREATE TABLE graph.Villages    (Name varchar(1000) COLLATE Czech_100_CI_AI_SC_UTF8, Village_ID   INTEGER IDENTITY(666,666) PRIMARY KEY) AS NODE;

CREATE TABLE graph.Footpaths (INDEX UQ UNIQUE nonclustered ($from_id, $to_id)) AS EDGE;
CREATE TABLE graph.Roads     (INDEX UQ UNIQUE nonclustered ($from_id, $to_id)) AS EDGE;
CREATE TABLE graph.Railways  (INDEX UQ UNIQUE nonclustered ($from_id, $to_id)) AS EDGE;

INSERT INTO graph.SmallCities (Name) VALUES (N'SmallCityOnRoad');
INSERT INTO graph.LargeCities (Name) VALUES (N'BigCityOnRailway'), (N'BiggishCityOnRailway');
INSERT INTO graph.Villages    (Name) VALUES (N'VillageInMountains');

INSERT INTO graph.Railways
  ($from_id, $to_id) 
SELECT
  L1.$node_id, L2.$node_id
FROM
  graph.LargeCities AS L1,
  graph.LargeCities AS L2
WHERE
  L1.Name = N'BigCityOnRailway' 
  AND L2.Name = N'BiggishCityOnRailway'
OPTION(RECOMPILE);

INSERT INTO graph.Roads
  ($from_id, $to_id) 
SELECT
  L1.$node_id, L2.$node_id
FROM
  graph.LargeCities AS L1,
  graph.SmallCities AS L2
WHERE
  L1.Name = N'BiggishCityOnRailway'
  AND L2.Name = N'SmallCityOnRoad'
OPTION(RECOMPILE);

INSERT INTO graph.Footpaths
  ($from_id, $to_id) 
SELECT
  L1.$node_id, L2.$node_id
FROM
  graph.SmallCities AS L1,
  graph.Villages AS L2
WHERE
  L1.Name = N'SmallCityOnRoad'
  AND L2.Name = N'VillageInMountains'
OPTION(RECOMPILE);

GO

CREATE VIEW graph.AllResidentialAreas AS
  SELECT
    LC.$node_id AS node_id,
    LC.Name,
    LC.LargeCity_ID AS Area_ID,
    'Large city' AS AreaType
  FROM
    graph.LargeCities AS LC
  UNION ALL
  SELECT
    SC.$node_id AS node_id,
    SC.Name,
    SC.SmallCity_ID,
    'Small city' AS AreaType
  FROM
    graph.SmallCities AS SC
  UNION ALL
  SELECT
    V.$node_id AS node_id,
    V.Name,
    V.Village_ID,
    'Village' AS AreaType
  FROM
    graph.Villages AS V;

GO

CREATE VIEW graph.AllPaths AS
  SELECT
    $edge_id AS edge_id,
    'Railway' AS PathType
  FROM
    graph.RailWays
  UNION ALL
  SELECT
    $edge_id,
    'Road' AS PathType
  FROM
    graph.Roads
  UNION ALL
  SELECT
    $edge_id,
    'Footpath' AS PathType
  FROM
    graph.Footpaths;

GO

/*************
ERROR
*/
SELECT
  STRT.Name AS FromArea,
  LAST_VALUE(NOD.Name) within GROUP (graph path) AS ToArea,
  STRING_AGG(PTH.PathType, '->') within GROUP (graph path) AS Way
FROM
  --graph.AllResidentialAreas          AS STRT,
  graph.LargeCities                  AS STRT, -------this is a problem, view vs edge table
  graph.AllPaths            FOR PATH AS PTH,
  graph.AllResidentialAreas FOR PATH AS NOD
WHERE 1=1
  AND MATCH(
    --STRT-(PTH)->NOD
    SHORTEST_PATH(
      STRT(-(PTH)->NOD)+
    )
  )
  AND STRT.Name = 'BigCityOnRailway'

/*
output:
FromArea         ToArea               Way
BigCityOnRailway BiggishCityOnRailway Railway
BigCityOnRailway SmallCityOnRoad      Railway->Road
*/

/*****************
WORKS
*/
SELECT
  STRT.Name AS FromArea,
  LAST_VALUE(NOD.Name) within GROUP (graph path) AS ToArea,
  STRING_AGG(PTH.PathType, '->') within GROUP (graph path) AS Way
FROM
  graph.AllResidentialAreas          AS STRT,
  --graph.LargeCities                  AS STRT,
  graph.AllPaths            FOR PATH AS PTH,
  graph.AllResidentialAreas FOR PATH AS NOD
WHERE 1=1
  AND MATCH(
    --STRT-(PTH)->NOD
    SHORTEST_PATH(
      STRT(-(PTH)->NOD)+
    )
  )
  AND STRT.Name = 'BigCityOnRailway'
  AND STRT.AreaType = 'Large city';

/*
FromArea         ToArea               Way
BigCityOnRailway BiggishCityOnRailway Railway
BigCityOnRailway SmallCityOnRoad      Railway->Road
BigCityOnRailway VillageInMountains   Railway->Road->Footpath
*/

GO
IF @@TRANCOUNT > 0
  ROLLBACK TRANSACTION
GO

【讨论】:

以上是关于SQL Server 图形数据库 - 使用多种边类型的最短路径的主要内容,如果未能解决你的问题,请参考以下文章

SQL Server 图形数据库

实体框架查询 SQL Server 2017 图形数据库的语法

获取 SQL Server 图形数据库中的所有好友

如何在 SQL Server 2017 中将“生成脚本”与图形数据库对象一起使用?

schemacrawler 未在 MS sql server db 的图形选项中显示关系/边

sql Server 查询方法的优化