在 SQLAlchemy 中加入后加入
Posted
技术标签:
【中文标题】在 SQLAlchemy 中加入后加入【英文标题】:Join after outerjoin in SQLAlchemy 【发布时间】:2015-10-01 01:04:51 【问题描述】:假设我有一个一对多的关系,父母和孩子被一些group_id
分组。
注意:这个例子是我的代码的精简版,它实际上是一个多对多的关系。可能有一些与问题无关的错误。
class Node(Base):
__tablename__ = 'node'
id = Column(GUID, default=uuid.uuid4, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID)
title = Column(Text, nullable=False)
class Leaf(Base):
__tablename__ = 'leaf'
id = Column(GUID, nullable=False, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID, nullable=False)
group_id
用于创建新版本 - 因此具有相同 id
的节点和叶子可以存在于多个组中。
我要做的是比较两组,找出所有父母发生变化的叶子。我正在尝试使用外连接进行比较,然后使用两个连接来过滤父节点:
def find_changed_leaves(group_id_a, group_id_b, session):
NodeA = model.Node
NodeB = aliased(model.Node, name='node_b')
LeafA = model.Leaf
LeafB = aliased(model.Leaf, name='leaf_b')
query = (session.query(LeafA, LeafB)
.outerjoin(LeafB, LeafA.id == LeafB.id)
.join(NodeA, (LeafA.group_id == NodeA.group_id) &
(LeafA.parent_id == NodeA.id))
.join(NodeB, (LeafB.group_id == NodeB.group_id) &
(LeafB.parent_id == NodeB.id))
# Group membership
.filter(LeafA.group_id == group_id_a,
LeafB.group_id == group_id_b)
# Filter for modified parents
.filter(NodeA.title != NodeB.title)
)
return query.all()
这可行,但它不显示仅在其中一个组中的叶子(例如,如果叶子被添加到新组中的节点)。如何显示所有叶子,返回 None
以获取其中一个组中缺少的叶子?
编辑:我看到有perils mixing join with outer join。我天真地尝试将其更改为.outerjoin(NodeA, ...
,但没有帮助。
【问题讨论】:
输入和所需输出的扩展(类似单元测试)示例案例将有助于理解您想要实现的比较范围。 【参考方案1】:正如评论中提到的,尚不完全清楚需要实现什么。尽管如此,下面的代码至少应该给你一些指导。
首先,我不会尝试将它们全部组合到一个查询中(可能使用完全连接和子查询),而是将其拆分为 3 个单独的查询:
-
获取
LeafA, LeafB
谁的父母已经改变
获取没有对应LeafB
的LaefA
获取没有对应LeafA
的LaefB
下面是应该在sqlite
和postgresql
中运行的代码。请注意,我已添加关系并在查询中使用它们。但是您可以使用代码 sn-p 中的显式连接条件执行相同的操作。
import uuid
from sqlalchemy import (
create_engine, Column, Integer, String, ForeignKey, Text, and_,
ForeignKeyConstraint, UniqueConstraint, exists
)
from sqlalchemy.orm import sessionmaker, relationship, eagerload, aliased
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.dialects.postgresql import UUID as GUID
_db_uri = 'sqlite:///:memory:'; GUID = String
# _db_uri = "postgresql://aaa:bbb@localhost/mytestdb"
engine = create_engine(_db_uri, echo=True)
Session = sessionmaker(bind=engine)
Base = declarative_base(engine)
newid = lambda: str(uuid.uuid4())
# define object model
class Node(Base):
__tablename__ = 'node'
id = Column(GUID, default=newid, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
# parent_id = Column(GUID)
title = Column(Text, nullable=False)
class Leaf(Base):
__tablename__ = 'leaf'
id = Column(GUID, nullable=False, primary_key=True)
group_id = Column(GUID, nullable=False, primary_key=True)
parent_id = Column(GUID, nullable=False)
title = Column(Text, nullable=False)
# define relationships - easier test data creation and querying
parent = relationship(
Node,
primaryjoin=and_(Node.id == parent_id, Node.group_id == group_id),
backref="children",
)
__table_args__ = (
ForeignKeyConstraint(
['parent_id', 'group_id'], ['node.id', 'node.group_id']
),
)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
session = Session()
g1, g2, l1, l2, l3 = [newid() for _ in range(5)]
# Create test data
def _add_test_data():
n11 = Node(
title="node1", group_id=g1,
children=[
Leaf(id=l1, title="g1 only"),
Leaf(id=l3, title="both groups"),
]
)
n21 = Node(
title="node1 changed", group_id=g2,
children=[
Leaf(id=l2, title="g2 only"),
Leaf(id=l3, title="both groups"),
]
)
session.add_all([n11, n21])
session.commit()
def find_changed_leaves(group_id_a, group_id_b):
"""
Leaves which are in both versions, but a `title` for their parents is changed.
"""
NodeA = aliased(Node, name='node_a')
NodeB = aliased(Node, name='node_b')
LeafA = aliased(Leaf, name='leaf_a')
LeafB = aliased(Leaf, name='leaf_b')
query = (
session.query(LeafA, LeafB)
.filter(LeafA.group_id == group_id_a)
# @note: group membership for LeafB is part of join now
.join(LeafB, (LeafA.id == LeafB.id) & (LeafB.group_id == group_id_b))
.join(NodeA, LeafA.parent)
.join(NodeB, LeafB.parent)
# Filter for modified parents
.filter(NodeA.title != NodeB.title)
)
return query.all()
def find_orphaned_leaves(group_id_a, group_id_b):
"""
Leaves found in group A, but not in group B.
"""
LeafA = aliased(Leaf, name='leaf_a')
LeafB = aliased(Leaf, name='leaf_b')
query = (
session.query(LeafA)
.filter(~(
session.query(LeafB)
.filter(LeafA.id == LeafB.id)
.filter(group_id_b == LeafB.group_id)
.exists()
))
# Group membership
.filter(LeafA.group_id == group_id_a)
)
return query.all()
def find_deleted_leaves(group_id_a, group_id_b):
a_s = find_orphaned_leaves(group_id_a, group_id_b)
return tuple((a, None) for a in a_s)
def find_added_leaves(group_id_a, group_id_b):
b_s = find_orphaned_leaves(group_id_b, group_id_a)
return tuple((None, b) for b in b_s)
# add test data
_add_test_data()
# check the results
changed = find_changed_leaves(g1, g2)
assert 1 == len(changed)
le, ri = changed[0]
assert le.id == ri.id == l3
added = find_added_leaves(g1, g2)
assert 1 == len(added)
le, ri = added[0]
assert le is None
assert ri.id == l2
deleted = find_deleted_leaves(g1, g2)
assert 1 == len(deleted)
le, ri = deleted[0]
assert le.id == l1
assert ri is None
【讨论】:
谢谢!我得出了同样的结论;很高兴得到验证。并感谢您为制作测试用例所做的努力。我应该让赏金更大:) 我没有意识到你可以加入关系,比如.join(NodeA, LeafA.parent)
。这真的很酷。以上是关于在 SQLAlchemy 中加入后加入的主要内容,如果未能解决你的问题,请参考以下文章