package com.atguigu.HUDF;
import com.atguigu.Zbeans.SensorReading;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.table.functions.AggregateFunction;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;
/**
* 本来展示了自定义标量函数、表函数、聚合函数的使用。
*/
public class AMyUDFTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
DataStreamSource<String> inputStream = env.readTextFile("G:\\SoftwareInstall\\idea\\project\\UserBehaviorAnalysis\\BasicKnowledge\\src\\main\\resources\\sensor.txt");
SingleOutputStreamOperator<SensorReading> mapStream = inputStream.map(line -> {
String[] fields = line.split(",");
return new SensorReading(new String(fields[0]), new Long(fields[1]), new Double(fields[2]));
});
Table inputTable = tableEnv.fromDataStream(mapStream, "id,timestamp as ts,temperature as temp");//流转表
tableEnv.createTemporaryView("sensor",inputTable);//注册视图,便于书写sql
tableEnv.registerFunction("hashCode",new UDF_Scalar(13));//注册标量函数
tableEnv.registerFunction("split",new UDF_table("_"));//注册表函数
tableEnv.registerFunction("avgTemp",new UDF_aggregate());//注册聚合函数
//Table api
Table result1 = inputTable.select("id,ts,hashCode(id)");//测标量函数
Table result2 = inputTable.joinLateral("split(id) as (word,length)")//侧写连接,将拆分结果放到原表末尾
.select("id,ts,word,length");//测表函数
Table result3 = inputTable.groupBy("id")
.aggregate("avgTemp(temp) as avg_temp")//使用注册的聚合函数
.select("id,avg_temp");//测试聚合函数
//Flink sql
Table result11 = tableEnv.sqlQuery("select id,ts,hashCode(id) from sensor");//测标量函数
Table result22 = tableEnv.sqlQuery("select id,ts,word,length from " +
" sensor,lateral table(split(id)) as t_split(word,length)");//测表函数
Table result33 = tableEnv.sqlQuery("select id,avgTemp(temp) avg_temp from sensor group by id");//测聚合函数
//打印输出
tableEnv.toAppendStream(result1, Row.class).print("scalar function in table api");
tableEnv.toAppendStream(result11, Row.class).print("scalar function in flink sql");
tableEnv.toAppendStream(result2, Row.class).print("table function in table api");
tableEnv.toAppendStream(result22, Row.class).print("table function in flink sql");
tableEnv.toRetractStream(result3, Row.class).print("aggregate function in table api");//非窗口内聚合,用toRetractStream
tableEnv.toRetractStream(result33, Row.class).print("aggregate function in flink sql");//非窗口内聚合,用toRetractStream
//执行
env.execute("table api & flink sql 自定义函数测试");
}
//自定义标量函数,一对一。继承ScalarFunction,定义eval方法。
// eval返回值和参数没要求,但必须public修饰。
public static class UDF_Scalar extends ScalarFunction{
private Integer factor;
public UDF_Scalar(Integer factor) {
this.factor = factor;
}
//函数名必须eval,访问范围必须public,返回值和参数类型没要求
public int eval(String str){
return str.hashCode()*factor;
}
}
//自定义表函数,一对N。继承TableFuction(泛型为输出类型),定义eval方法。
// eval参数没要求,但必须public修饰。返回值为空,通过collect输出。
public static class UDF_table extends TableFunction<Tuple2<String,Integer>>{
private String separator=",";
public UDF_table(String separator) {
this.separator = separator;
}
public void eval(String str){
for (String s:str.split(separator)) {
collect(new Tuple2<>(s,s.length()));
}
}
}
//自定义聚合函数,N对1。继承AggregateFuction(泛型输出类型和累加器类型),定义createAccumulator、getValue、accumulate方法。
//accumulate方法必须public void修饰,且形参1必须是累加器,形参2必须是输入数据。
public static class UDF_aggregate extends AggregateFunction<Double, Tuple2<Double,Integer>>{
@Override
public Double getValue(Tuple2<Double, Integer> acc) {//定义累加器累加规则
return acc.f0/acc.f1;
}
@Override
public Tuple2<Double, Integer> createAccumulator() {//定义累加器并赋初值
return new Tuple2<Double,Integer>(0.0,0);
}
public void accumulate(Tuple2<Double, Integer> acc,Double temp){//获取外部数据,更新累加器
acc.f0 += temp;
acc.f1 += 1;
}
}
}
|