树形结构存储方案对比分析
一、邻接表
关系只由一个parent_id维护

1、优点:结构简单,易插入
2、缺点:不易查询
二、前缀编码(物化路径)
存储节点的完整层级路径,借助了unix文件目录的思想。


1、优点:查询方便(效率较高,得看数据分布的选择性程度)
2、缺点:层级太深有可能会超过PATH字段的长度,占用空间大
3、查询示例:
// 查询某一节点下的所有子节点:(以Fruit为例)
SET @path = (SELECT path FROM pathTree WHERE node_name = 'Fruit');
SELECT * FROM pathTree WHERE path like CONCAT(@path,'/%');
// 如何查询直属子节点?需要采用MySQL的正则表达式查询:
SET @path = (SELECT path FROM pathTree WHERE node_name = 'Fruit');
SELECT * FROM pathTree WHERE path REGEXP CONCAT(@path,'/','[0-9]$');
// 查询任意节点的所有上级:(以Yellow为例):
SET @path = (SELECT path FROM pathTree WHERE node_name = 'Yellow');
SELECT * FROM pathTree WHERE @path LIKE CONCAT(path, '%') AND path <> @path;
// 插入新增数据:
SET @parent_path = ( SELECT path FROM pathTree WHERE node_name = 'Fruit');
INSERT INTO pathtree (path,node_name) VALUES (CONCAT(@parent_path,'/',LAST_INSERT_ID()+1),'White')
三、左右值编码
利用二叉树的前序遍历(根、左、右)思想编码,存储节点的左右子节点。


1、优点:查询方便、效率高
2、缺点:插入、更新、删除涉及到更新内容太多,需要配套算法,计算复杂且耗时。
3、查询示例:
//查询Fruit的子节点
select * from pathtree where LFT > 2 and Rgt < 11
四、Closure Table
通过两张表,主表、关系表来维护:
// 主表
CREATE TABLE nodeInfo (
node_id INT NOT NULL AUTO_INCREMENT,
node_name VARCHAR (255),
PRIMARY KEY (`node_id`)
) DEFAULT CHARSET = utf8;
// 关系表
CREATE TABLE nodeRelationship (
ancestor INT NOT NULL COMMENT '祖先节点',
descendant INT NOT NULL COMMENT '后代节点',
distance INT NOT NULL COMMENT '祖先距离后代的距离',
PRIMARY KEY (ancestor, descendant)
) DEFAULT CHARSET = utf8;


1、优点:查询方便、效率高、更新较方便
2、缺点:查询比较费空间
3、示例
① 创建数据的存储过程
CREATE DEFINER = `root`@`localhost` PROCEDURE `AddNode`(`_parent_name` varchar(255),`_node_name` varchar(255))
BEGIN
DECLARE _ancestor INT;
DECLARE _descendant INT;
DECLARE _parent INT;
IF NOT EXISTS(SELECT node_id From nodeinfo WHERE node_name = _node_name)
THEN
INSERT INTO nodeinfo (node_name) VALUES(_node_name);
SET _descendant = (SELECT node_id FROM nodeinfo WHERE node_name = _node_name);
INSERT INTO noderelationship (ancestor,descendant,distance) VALUES(_descendant,_descendant,0);
IF EXISTS (SELECT node_id FROM nodeinfo WHERE node_name = _parent_name)
THEN
SET _parent = (SELECT node_id FROM nodeinfo WHERE node_name = _parent_name);
INSERT INTO noderelationship (ancestor,descendant,distance) SELECT ancestor,_descendant,distance+1 from noderelationship where descendant = _parent;
END IF;
END IF;
END;
② 查询
// 查询Fruit下所有的子节点
SELECT
n3.node_name
FROM
nodeinfo n1
INNER JOIN noderelationship n2 ON n1.node_id = n2.ancestor
INNER JOIN nodeinfo n3 ON n2.descendant = n3.node_id
WHERE
n1.node_name = 'Fruit'
AND n2.distance != 0
// 查询Fruit下直属子节点:
SELECT
n3.node_name
FROM
nodeinfo n1
INNER JOIN noderelationship n2 ON n1.node_id = n2.ancestor
INNER JOIN nodeinfo n3 ON n2.descendant = n3.node_id
WHERE
n1.node_name = 'Fruit'
AND n2.distance = 1
// 查询Fruit所处的层级:
SELECT
n2.*, n3.node_name
FROM
nodeinfo n1
INNER JOIN noderelationship n2 ON n1.node_id = n2.descendant
INNER JOIN nodeinfo n3 ON n2.ancestor = n3.node_id
WHERE
n1.node_name = 'Fruit'
ORDER BY
n2.distance DESC
③ 新增
首先,把自己新增进去,SQL:insert into releation values(‘E’,‘E’,0);,然后找到E的祖先,那么现在E的祖先是B的祖先加上B自己,然后告诉这些祖先们,他们新增了一个后代。通过找祖先的SQL,我们找到了B的祖先,A,那么E的祖先就是B和A:insert into releation values(‘A’,‘E’,2);insert into releation values(‘B’,‘E’,1);那么我们可以看出,新增子节点,除了新增自己以外,还需要通知祖先,并让祖先保存自己,下面提供一个伪码,实现该功能
public insert(Node a,Node b){
//1.将自己记录下来
conn.excuteSql(insert into releation values(a,a,0));
//2.查找a的祖先和自己,并告知他们,他们新增的子孙b
List<Releation> ancestors = conn.excuteSql(select r.ancestor from releation r where r.descendant=a order by distacne);
for(Releation r : ancestors){
conn.excuteSql(insert into releation values(r.ancestor,b,r.distacne+1))
}
}