// 1、统计班级人数 // 2、统计学生的总分
// 1、统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数] // 2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分] // 3、统计每科都及格的学生 [学号,姓名,班级,科目,分数] // 4、统计每个班级的前三名 [学号,姓名,班级,分数] // 5、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]
package com.shujia
import org.junit.{Before, Test}
import scala.collection.mutable.ListBuffer
import scala.io.{BufferedSource, Source}
class Demo30propect {
var students: List[Students] = _
var score: List[Score] = _
var subject: List[Subject] = _
// 1、统计班级人数
// 2、统计学生的总分
// 1、统计年级排名前十学生各科的分数 [学号,学生姓名,学生班级,科目名,分数]
// 2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
// 3、统计每科都及格的学生 [学号,姓名,班级,科目,分数]
// 4、统计每个班级的前三名 [学号,姓名,班级,分数]
// 5、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]
/**
* 读取数据文件
*/
@Before
def readFile() = {
//读取文件
val stu_bs: BufferedSource = Source.
fromFile("D:\\Program Files\\IDEA\\IdeaProject\\shujiabigdata\\data\\students.txt")
val sco_bs: BufferedSource = Source.
fromFile("D:\\Program Files\\IDEA\\IdeaProject\\shujiabigdata\\data\\score.txt")
val sub_bs: BufferedSource = Source.
fromFile("D:\\Program Files\\IDEA\\IdeaProject\\shujiabigdata\\data\\subject.txt")
students = stu_bs.getLines() //将读取文件转成list便于操作
.toList
.map(line => {
val splits: Array[String] = line.split(",")
val id: Int = splits(0).toInt
val name: String = splits(1)
val age: Int = splits(2).toInt
val gender: String = splits(3)
val clazz: String = splits(4)
Students(id, name, age, gender, clazz) //最后一条为默认返回的数据类型是Students类
})
score = sco_bs.getLines()
.toList
.map(line => {
val splits: Array[String] = line.split(",")
val id: Int = splits(0).toInt
val subject_id: Int = splits(1).toInt
val score: Int = splits(2).toInt
Score(id, subject_id, score)
})
subject = sub_bs.getLines()
.toList
.map(line => {
val splits: Array[String] = line.split(",")
val subject_id: Int = splits(0).toInt
val subject_name: String = splits(1)
val subject_score: Int = splits(2).toInt
Subject(subject_id, subject_name, subject_score)
})
//关闭
stu_bs.close()
sco_bs.close()
sub_bs.close()
}
/**
* 打印数据前10条
*/
@Test
def printAll() = {
students.take(10).foreach(println) //take取前多少条数据
score.take(10).foreach(println)
subject.take(10).foreach(println)
}
/**
* 1、统计班级人数
*/
@Test
def clazz_sum() = {
students
.groupBy(stu => stu.clazz) ///按班级分组
.map(s => (s._1, s._2.size))
.foreach(println)
}
/**
* 2、统计学生的总分
*/
@Test
def stu_cnt() = {
score.groupBy(sco => sco.id)
.map(kv => {
val id: Int = kv._1
val scores: List[Score] = kv._2
val scor: List[Int] = scores.map(sco => sco.score)
val sum: Int = scor.sum
(id, sum)
}).foreach(println)
}
/**
* 1、统计年级排名前十学生各科的分数
* [学号,学生姓名,学生班级,科目名,分数]
*/
@Test
def stu_score_top10() = {
//学生成绩总分
//group by之后数据也是kv格式的,转成list才可以排序
val stu_top10_id: List[Int] = score.groupBy(sco => sco.id) //求top10学生的id
.map(kv => { //先求总分排序取前10条
val id: Int = kv._1
val scores: List[Score] = kv._2
val scor: List[Int] = scores.map(sco => sco.score)
val sum: Int = scor.sum
(id, sum)
}).toList //转成list
.sortBy(-_._2) //排序降序
.take(10) //取前10条
.map(_._1) //只取出id
//取学生各个成绩与科目名,根据id查询成绩(kv格式),构建map
//以id作为k,对象本身作为v,构建map
val scoMap: Map[Int, List[(Int, Score)]] = //构建map
score.map(sco => (sco.id, sco)).groupBy(_._1) //一个id多条数据,map会覆盖所以分组在转成map
scoMap.foreach(println)
val subMap: Map[Int, String] = subject.map(sub => //构建map,便于之后的查找关联
(sub.subject_id, sub.subject_name)).toMap //转成map
//提取学生信息
students.filter(stu => stu_top10_id.contains(stu.id)) 查找包含top的id的学生,过滤
.flatMap(stu => { //一条数据返回list集合,使用flatMap
val top10list = ListBuffer[(Int, String, String, String, Int)]() //创建返回的list
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val scores: List[Score] = scoMap(stu.id).map(_._2) //根据id查找成绩数据,操作
scores.map(sco => {
val score: Int = sco.score
val subject_id: Int = sco.subject_id
val sub_name: String = subMap(subject_id)
top10list.append((id, name, clazz, sub_name, score)) //追加到list中
})
top10list //返回list,多条数据
})
.foreach(println)
}
/**
* 2、统计总分大于年级平均分的学生 [学号,姓名,班级,总分]
*/
@Test
def score_sum_gather_avg() = {
//每个学生的总分
val sum_stu_score: Map[Int, Int] = score.groupBy(sco => sco.id)
.map(kv => {
val id = kv._1
val scores: List[Score] = kv._2
val scor: List[Int] = scores.map(sco => sco.score)
val sum: Int = scor.sum
(id, sum)
})
//平均分
val avg: Double = sum_stu_score.values.sum / sum_stu_score.size.toDouble
//println(avg)
//构建学生map
val stuMap: Map[Int, Students] = students.map(stu => (stu.id, stu)).toMap
//过滤
sum_stu_score.filter(kv => kv._2 > avg)
.map(kv => {
val id: Int = kv._1
val scoresum: Int = kv._2
val students: Students = stuMap(id)
val name = students.name
val clazz = students.clazz
(id, name, clazz, scoresum)
})
.foreach(println)
}
/**
* 3、统计每科都及格的学生 [学号,姓名,班级,科目,分数]
*/
@Test
def any_score_gather_avg() = {
//计算及格分数
val pass_sub_score_map: Map[Int, (String, Double)] = subject.map(sub => {
val subject_id: Int = sub.subject_id
val subject_name: String = sub.subject_name
val pass_score: Double = sub.subject_score * 0.6 //及格分数
(subject_id, (subject_name, pass_score))
}).toMap
//pass_sub_score_map.foreach(println)
val any_pass_id: List[Int] = score.filter(sco => sco.score >=
pass_sub_score_map(sco.subject_id)._2)
.groupBy(sco => sco.id)
.map(kv => {
val id: Int = kv._1
val value: List[Score] = kv._2
val size: Int = value.size
(id, size)
})
.filter(_._2 == 6)
.keys
.toList
val stuMap: Map[Int, Students] = students.map(stu => (stu.id, stu)).toMap
val scoMap: Map[Int, List[(Int, Score)]] =
score.map(sco => (sco.id, sco)).groupBy(_._1)
//scoMap.foreach(println)
val subMap: Map[Int, String] = subject.map(sub => (sub.subject_id, sub.subject_name)).toMap
//学号,姓名,班级,科目,分数
students.filter(stu => any_pass_id.contains(stu.id))
.flatMap(stu => {
var listbuffer = ListBuffer[(Int, String, String, String, Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val scolist: List[(Int, Score)] = scoMap(id)
scolist.foreach(sco => {
val subject_id: Int = sco._2.subject_id
val score: Int = sco._2.score
val subname: String = pass_sub_score_map(subject_id)._1
listbuffer.append((id, name, clazz, subname, score))
})
listbuffer
})
.foreach(println)
}
/**
* 4、统计每个班级的前三名 [学号,姓名,班级,分数]
*/
@Test
def clazz_top3() = {
//学生总分
val stu_id_score: Map[Int, Int] = score.groupBy(sco => sco.id)
.map(kv => {
val id = kv._1
val listscore: List[Score] = kv._2
val score: List[Int] = listscore.map(_.score)
val stu_sum_score: Int = score.sum
(id, stu_sum_score)
})
students.map(stu => {
val id = stu.id
val name = stu.name
val clazz = stu.clazz
val score = stu_id_score(id)
(id, name, clazz, score)
})
.groupBy(stu => stu._3)
.flatMap(kv => {
val clazz = kv._1
val liststu = kv._2
val tuples = liststu.sortBy(-_._4).take(3)
(tuples)
})
.foreach(println)
}
/**
* 5、统计偏科最严重的前100名学生 [学号,姓名,班级,科目,分数]
*/
@Test
def unbalanceTop100Stu(): Unit = {
val sub_score_map: Map[Int, Int] = subject
.map(sub => {
(sub.subject_id, sub.subject_score)
}).toMap
//sub_score_map.foreach(println)
// 归一化:每门科目的满分不一样 故需要将其全部转换成百分制
// 例如:语文满分150 小明考了90分 转换成百分制就是 90 / 150 * 100 = 60分
val id_new_score: List[(Int, Float)] = score.map(sco => {
val id: Int = sco.id
val sub_score: Int = sub_score_map(sco.subject_id)
val new_score: Float = sco.score * 100 / sub_score.toFloat
(id, new_score)
})
//id_new_score.foreach(println)
// 计算每个学生的平均成绩 得到id_avg_map
// k:学生id v:学生的平均成绩
val id_avg_map: Map[Int, Float] = id_new_score
.groupBy(kv => kv._1)
.map(kv => {
val id: Int = kv._1
val values: List[(Int, Float)] = kv._2
val sum: Float = values.map(_._2).sum
val avg: Float = sum / values.size
(id, avg)
})
//id_avg_map.foreach(println)
val nobalanceIds: List[Int] = id_new_score.map(kv => {
val id: Int = kv._1
val score1: Float = kv._2
val avg: Float = id_avg_map(id)
(id, Math.pow(score1 - avg, 2))
})
.groupBy(_._1)
.map(kv => {
val id: Int = kv._1
val values: List[(Int, Double)] = kv._2
val variance: Double = values.map(_._2).sum / values.size
(id, variance)
})
.toList
.sortBy(-_._2)
.map(_._1)
.take(100)
val scoMap: Map[Int, List[(Int, Score)]] = score
.map(sco => {
(sco.id, sco)
}).groupBy(_._1)
val sub_name_map: Map[Int, String] = subject
.map(sub => {
(sub.subject_id, sub.subject_name)
}).toMap
students.filter(stu => nobalanceIds.contains(stu.id))
.flatMap(stu => {
val lb: ListBuffer[(Int, String, String, String, Int)]
= ListBuffer[(Int, String, String, String, Int)]()
val id: Int = stu.id
val name: String = stu.name
val clazz: String = stu.clazz
val id_scores: List[(Int, Score)] = scoMap(id)
id_scores.foreach(id_score => {
val sco: Score = id_score._2
val score1: Int = sco.score
val sub_name: String = sub_name_map(sco.subject_id)
lb.append((id, name, clazz, sub_name, score1))
})
lb
})
//.foreach(println)
//三个样例类
case class Students(id: Int, name: String, age: Int, gender: String, clazz: String)
case class Score(id: Int, subject_id: Int, score: Int)
case class Subject(subject_id: Int, subject_name: String, subject_score: Int)
}
|