降维(Dimensionality Reduction) 是机器学习中的一种重要的特征处理手段,
package dimensionalityreduction
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.mllib.linalg
import org.apache.spark.mllib.linalg.{Matrix, SingularValueDecomposition, Vectors}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.rdd.RDD
object SingularValue {
def main(args: Array[String]): Unit = {
Logger.getLogger("org").setLevel(Level.OFF)
val sc = new SparkContext("local[*]", "li")
val data: RDD[linalg.Vector] = sc.textFile("/home/rjxy/IdeaProjects/spark/spark_mllib_course/src/main/resources/data/a.mat")
.map(
(_: String).split(" ").map((_: String).toDouble)
)
.map((line: Array[Double]) => Vectors.dense(line))
val matrix: RowMatrix = new RowMatrix(data)
val value: SingularValueDecomposition[RowMatrix, Matrix] = matrix.computeSVD(3)
println(value.s)
println(value.V)
println(value.U)
val value1: SingularValueDecomposition[RowMatrix, Matrix] = matrix.computeSVD(3, computeU = true)
println(value1.s)
println(value1.V)
val u: RowMatrix = value1.U
val rows: RDD[linalg.Vector] = u.rows
println(rows.foreach(println))
PrincipalComponentAnalysis
}
}
|