MapReduce前置程序设计
1.先用java程序单机版进行模拟统计操作:
例子:我们日常的全国有很多的手机店,那么每天都有卖到的手机的销量;假如我们把他们认为做成大数据, 全国的销量进行统计;
1.我们先用程序随机一个文件来存储我们的手机品牌:我们模拟300万条数据;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Random;
public class Test02 {
public static void main(String[] args) throws IOException {
File file=new File("e:/hfc.txt");
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream foS=new FileOutputStream(file);
OutputStreamWriter out=new OutputStreamWriter(foS, "utf-8");
BufferedWriter buf=new BufferedWriter(out);
String str01[]= {
"华为P40",
"华为P30",
"HUAWEI P50 Pro",
"HUAWEI nova 8 SE",
"HUAWEI nova 8 SE 活力版",
"HUAWEI Mate 30",
"HUAWEI nova 7 SE",
"华为畅享20 Pro 5G",
"华为畅享 10e",
"华为畅享Z 5G"
};
String phone[]= {"小米5"
,"红米20"
,"联系手机"
,"vivo手机"
,"努比亚手机"
,"苹果手机"
,"一加手机"
,"oppo手机"
,"锤子手机"
,"魅族手机"};
for (int i = 0; i <3000000; i++) {
for (int j = 0; j <=phone.length; j++) {
buf.write(str01[new Random().nextInt(phone.length)] +
","+phone[new Random().nextInt(phone.length)]+","+
str01[new Random().nextInt(phone.length)]+","+
phone[new Random().nextInt(phone.length)]);
buf.newLine();
}
}
buf.flush();
}
}
非常简单的就可以生成文件;但是,txt文件 太大就不可以打开了; 大家可以改成.doc文件
2.java 统计 品牌销量;
就可以通过程序对这个文件进行读取,然后把每行取出来; 以 (,)隔开;然后出现一个就加1;然后统计出来每个品牌的销售情况;
代码:
package com.mr.java;
import java.awt.image.Kernel;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* @Program BigData
* @Package com.mr.java
* @Auther TeacherHuang
* @Date 2021/8/22 9:16
* @Version 1.0
* 从文件test.txt中读取内容,统计每个单词出现的次数
*/
//java程序来进行统计数据
public class WordCount {
public static void main(String[] args) throws IOException {
/*
先使用BufferedReader读取文件中的内容
将读取到的内容存储到数组中,并且根据分隔符将单词分隔
在HashMap中进行统计个数
*/
// 创建map对象,存储数据
HashMap<String,Integer> map=new HashMap<String, Integer>();
// 创建流读取文件
BufferedReader bufferedReader=new BufferedReader
(new InputStreamReader(new FileInputStream("e:/hfc.txt")));
// 用来读取一行的内容;
String line="";
// 开始读取内容放入到行中
while ((line=bufferedReader.readLine())!=null){ //读取一行不为空继续读取;
String [] words=line.split(","); //根据逗号进行拆分放入数组
for (String word : words) { //遍历数组
if (map.containsKey(word)){ //判断这个单词在不在map中存在
Integer value = map.get(word); //获取这个单词,这个单词做的是key
map.put(word,value+1); //然后把这个单词的value加1 存到map; 这个单词不重复出现;
}else {
map.put(word,1); //如果不存在,但是现在也是第一次
}
}
}
// 关闭流
bufferedReader.close();
// 从map中取出来:
// 循环遍历这个map;
Set<String> set = map.keySet(); //获取map中的set 也就是他的键
for (String string : set) { //遍历这个set
Integer value = map.get(string); //根据这个key找他的值
System.out.println("手机品牌:"+string+"\t\t月销量:"+value);
}
System.out.println("------------------------------------------------------------------");
// 使用Entry打印
Set<Map.Entry<String, Integer>> entries = map.entrySet();
for (Map.Entry<String, Integer> entry : entries) {
System.out.println("手机品牌:"+entry.getKey()+"\t\t\t月销量:"+entry.getValue());
}
}
}
3.把计算出来的结果写入到文件上
把上面的处理完毕的结果写出到一个文件上;方便以后使用;
// 打印到后端的时候,写到文件上;
BufferedWriter bufferedWriter=
new BufferedWriter(new OutputStreamWriter(new FileOutputStream("e:/count.txt")));
Set<Map.Entry<String, Integer>> entries = map.entrySet();
for (Map.Entry<String, Integer> entry : entries) {
System.out.println("手机品牌:"+entry.getKey()+"\t\t\t月销量:"+entry.getValue());
// 写入文件中
bufferedWriter.write("手机品牌:"+entry.getKey()+"\t\t\t\t月销量:"+entry.getValue());
// 换行
bufferedWriter.newLine();
}
// 刷新流
bufferedWriter.flush();
4.排序出来的结果显示;写入到文件
对上面的数据进行排序操作:(通过java的比较器进行)
// 进行数据的排序;
List<Map.Entry<String,Integer>> list=new ArrayList(entries);
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return o2.getValue()-o1.getValue();
}
});
System.out.println("排序操作------------------------------------------------");
for (Map.Entry<String, Integer> stringIntegerEntry : list) {
System.out.println(stringIntegerEntry.getKey()+"\t\t\t\t\t\t"+
stringIntegerEntry.getValue());
}
4.java集合中的比较器使用:
内部比较器和外部比较器的使用:
1.内部比较器–他适合在bean文件可以自由的修改的基础上进行:
package com.mr.entity;
/**
* @Program BigData
* @Package com.mr.entity
* @Auther TeacherHuang
* @Date 2021/8/22 21:13
* @Version 1.0
*/
public class User implements Comparable<User>{
private int id;
private String name;
private int age;
private int score;
public User(int id, String name, int age, int score) {
this.id = id;
this.name = name;
this.age = age;
this.score = score;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
@Override
public String toString() {
return "User{" +
"id=" + id +
", name='" + name + '\'' +
", age=" + age +
", score=" + score +
'}';
}
// 在bean的这个实体类中可以进行实现接口的方式;内部进行比较
public int compareTo(User o) {
if (this.score==o.score){
// 成绩相同了就根据age进行排序
return this.age-o.age;
}
return this.score-o.score;
}
}
测试代码:
public static void main(String[] args) {
List<User> list=new ArrayList<User>();
list.add(new User(1231,"张三",23,56));
list.add(new User(1232,"李四",21,66));
list.add(new User(1234,"王五",12,16));
list.add(new User(1235,"赵柳",30,46));
list.add(new User(1261,"王沟",3,86));
list.add(new User(1131,"王麻子",53,90));
Collections.sort(list);
for (User user : list) {
System.out.println(user);
}
}
- 外部比较器—适合在bean文件中不能修改的地方进行:
package com.mr.entity;
/**
* @Program BigData
* @Package com.mr.entity
* @Auther TeacherHuang
* @Date 2021/8/22 21:13
* @Version 1.0
*/
public class User implements Comparable<User>{
private int id;
private String name;
private int age;
private int score;
public User(int id, String name, int age, int score) {
this.id = id;
this.name = name;
this.age = age;
this.score = score;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public int getScore() {
return score;
}
public void setScore(int score) {
this.score = score;
}
// 在bean的这个实体类中可以进行实现接口的方式;内部进行比较
public int compareTo(User o) {
if (this.score==o.score){
// 成绩相同了就根据age进行排序
return this.age-o.age;
}
return this.score-o.score;
}
@Override
public String toString() {
return "User{" +
"id=" + id +
", name='" + name + '\'' +
", age=" + age +
", score=" + score +
'}';
}
}
测试:
package com.mr.java;
import com.mr.entity.Student;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* @Program BigData
* @Package com.mr.java
* @Auther TeacherHuang
* @Date 2021/8/22 22:05
* @Version 1.0
*/
public class ComparatorTest {
public static void main(String[] args) {
List<Student> list=new ArrayList<Student>();
list.add(new Student(1231,"张三",23,56));
list.add(new Student(1232,"李四",21,66));
list.add(new Student(1234,"王五",12,16));
list.add(new Student(1235,"赵柳",30,46));
list.add(new Student(1261,"王沟",3,86));
list.add(new Student(1131,"王麻子",53,90));
// 自己定义比较器来进行比较
Collections.sort(list, new Comparator<Student>() {
public int compare(Student o1, Student o2) {
return o1.getId()-o2.getId();
}
});
for (Student student : list) {
System.out.println(student);
}
}
}
|