1. 什么是xml ?
xml快速入门
2. 导入依赖
本方案中使用DOM4J + xpath 解析xml文件
<dependency>
<groupId>org.dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>2.1.3</version>
</dependency>
<dependency>
<groupId>jaxen</groupId>
<artifactId>jaxen</artifactId>
<version>1.2.0</version>
</dependency>
3. 案例文件
<?xml version="1.0" encoding="UTF-8"?>
<map-reduce>
<jobName type="MR">测试MR参数使用</jobName>
<jobPath>/opt/soft/cdp_job_jar/测试MR参数使用_20220505112618.jar</jobPath>
<configuration/>
<job>
<setMapperClass>xx.xxx.xxxxxx.mr.TestMR</setMapperClass>
<setOutputFormatClass>org.apache.hadoop.mapreduce.lib.output.TextOutputFormat</setOutputFormatClass>
<setMapOutputValueClass>org.apache.hadoop.io.LongWritable</setMapOutputValueClass>
<setMapOutputKeyClass>org.apache.hadoop.io.Text</setMapOutputKeyClass>
<setInputFormatClass>org.apache.hadoop.mapreduce.lib.input.TextInputFormat</setInputFormatClass>
</job>
<input>/user/hdfs/rawdata/gn-20220411.log</input>
<output>/user/hdfs/rawdata/test-mr-1</output>
</map-reduce>
4. 案例代码
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import java.io.File;
import java.util.List;
public class 解析xml {
public static void main(String[] args) throws DocumentException {
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(new File(解析xml.class.getClassLoader().getResource("testxml.xml").getPath()));
Node jobNameNode = document.selectSingleNode("//jobName");
String jobName = jobNameNode.getText();
System.out.println("任务名称 : " + jobName);
String type = jobNameNode.selectSingleNode("@type").getText();
System.out.println("任务类型 : " + type);
String jobPath = document.selectSingleNode("//jobPath").getText();
System.out.println("任务jar包位置: " + jobPath);
List<Node> jobnodes = document.selectNodes("//job/*");
for (Node jobnode : jobnodes) {
System.out.println(jobnode.getText());
}
String input = document.selectSingleNode("//input").getText();
System.out.println("输入文件路径 : " + input );
String output = document.selectSingleNode("//output").getText();
System.out.println("输出文件路径 : " + output);
}
}
5. 解析结果
任务名称 : 测试MR参数使用
任务类型 : MR
任务jar包位置: /opt/soft/cdp_job_jar/测试MR参数使用_20220505112618.jar
xx.xxx.xxxxxx.mr.TestMR
org.apache.hadoop.mapreduce.lib.output.TextOutputFormat
org.apache.hadoop.io.LongWritable
org.apache.hadoop.io.Text
org.apache.hadoop.mapreduce.lib.input.TextInputFormat
输入文件路径 : /user/hdfs/rawdata/gn-20220411.log
输出文件路径 : /user/hdfs/rawdata/test-mr-1
欢迎大家留言一起讨论学习!
|