SQL 数据处理流程
RelNode
RelNode 是关系表达式,主要描述处理数据的关系,比如何sort ,join,投影,过滤,扫描
LogicalTableScan,扫描整表
// SQL 表示
select *
from
test_user
// 关系表达式生成
RelNode root = builder.scan("test_user").build();
//实际生成的逻辑计划
LogicalTableScan(table=[[test_user]])
LogicalSort 排序
// SQL 表示
select id,name
from test_user
group by id
// 关系表达式生成
RelNode root =
builder
.scan("test_user").sort(0)
.build();
//实际生成的逻辑计划
LogicalSort(sort0=[$0], dir0=[ASC])
LogicalTableScan(table=[[test_user]])
LogicalProject 投影
// SQL 表示
select id
from test_user
// 关系表达式生成
RelNode root = builder.scan("test_user").project(builder.field("id")).build();
//实际生成的逻辑计划
LogicalProject(id=[$0])
LogicalTableScan(table=[[test_user]])
LogicalFilter 过滤
// SQL 表示
select * from test_user where id=10
// 关系表达式生成
RelNode root = builder.scan("test_user").filter(builder.equals(builder.field(0),builder.literal(10))).build();
//实际生成的逻辑计划
LogicalFilter(condition=[=($0, 10)])
LogicalTableScan(table=[[test_user]])
LogicalJoin 双表join
// SQL 表示
select * from test_user join test_address
on test_user.id=test_address.id
// 关系表达式生成
RelNode root = builder.scan("test_user")
.scan("test_address").join(JoinRelType.INNER, builder.field("id"))
.build();
//实际生成的逻辑计划
LogicalJoin(condition=[$0], joinType=[inner])
LogicalTableScan(table=[[test_user]])
LogicalTableScan(table=[[test_address]])
LogicalMinus 差集
// 关系表达式生成
RelNode root = builder.scan("test_user")
.scan("test_address").minus(true)
.build();
//实际生成的逻辑计划
LogicalMinus(all=[true])
LogicalTableScan(table=[[test_user]])
LogicalTableScan(table=[[test_address]])
LogicalMinus 交集
// 关系表达式生成
RelNode root = builder.scan("test_user")
.scan("test_address").intersect(true)
.build();
//实际生成的逻辑计划
LogicalIntersect(all=[true])
LogicalTableScan(table=[[test_user]])
LogicalTableScan(table=[[test_address]])
LogicalUnion union 并集
// SQL 表示
select id,name from test_user
union all
select id,address as name from
test_address
// 关系表达式生成
RelNode root = builder.scan("test_user")
.scan("test_address").union(true)
.build();
//实际生成的逻辑计划
LogicalUnion(all=[true])
LogicalTableScan(table=[[test_user]])
LogicalTableScan(table=[[test_address]])
LogicalAggregate 分组操作
// SQL 表示
select
id,max(name)
from
test_user
group by id
// 关系表达式生成
RelNode root = builder.scan("test_user").aggregate(builder.groupKey("id"), builder.max(builder.field("name")))
.build();
//实际生成的逻辑计划
LogicalAggregate(group=[{0}], agg
LogicalTableScan(table=[[test_user]])
ReXNode
RexNode 主要表示的对数据行的处理
RexLiteral 常量表达式
//SQL 表示
select * from user where id=1
1 就是常量
builder.literal(10)
RelNode root = builder.scan("test_user").filter(builder.equals(builder.field("id"),builder.literal(10)))
.build();
LogicalFilter(condition=[=($0, 10)])
LogicalTableScan(table=[[test_user]])
RexInputRef 输入引用符
//SQL 表示
select id from test_user
RelNode root = builder.scan("test_user").project(builder.field(0))
.build();
//这里builder.filed(0) 返回的就是第一个元素
//逻辑计划
LogicalProject(id=[$0])
LogicalTableScan(table=[[test_user]])
RexCall 多个操作符构建的表达式
select id,case name='ming' then null else name
end from test_user
比如这里的
case name='ming' then null else name
end
RexSubQuery 子查询
select * from test_user where id existes (10,40)
RelNode root = builder.scan("test_user").filter(builder.call(SqlStdOperatorTable.EXISTS, builder.field(0), builder.literal(10), builder.literal(40)))
.build();
LogicalFilter(condition=[EXISTS($0, 10, 40)])
LogicalTableScan(table=[[test_user]])
参考代码
public static Frameworks.ConfigBuilder config() {
SchemaPlus schemaPlus = Frameworks.createRootSchema(true);
schemaPlus.add("test_user", new AbstractTable() {
@Override
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
List<RelDataType> dataTypeList = new ArrayList<>();
RelDataType integerDataType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true);
RelDataType stringDataType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.VARCHAR), true);
dataTypeList.add(integerDataType);
dataTypeList.add(stringDataType);
List<String> fieldNames = new ArrayList<>();
fieldNames.add("id");
fieldNames.add("name");
return typeFactory.createStructType(dataTypeList, fieldNames);
}
});
schemaPlus.add("test_address", new AbstractTable() {
@Override
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
List<RelDataType> dataTypeList = new ArrayList<>();
RelDataType integerDataType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true);
RelDataType stringDataType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.VARCHAR), true);
dataTypeList.add(integerDataType);
dataTypeList.add(stringDataType);
List<String> fieldNames = new ArrayList<>();
fieldNames.add("id");
fieldNames.add("address");
return typeFactory.createStructType(dataTypeList, fieldNames);
}
});
return Frameworks.newConfigBuilder()
.parserConfig(SqlParser.Config.DEFAULT)
.defaultSchema(schemaPlus)
.traitDefs((List<RelTraitDef>) null)
.programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2));
}
public static void main(String[] args) {
RelBuilder builder = RelBuilder.create(config().build());
RelNode root = builder.scan("test_user")
.build();
System.out.println(RelOptUtil.toString(root));
}
|