读取文件的多种方式:
1、spark直接读取http数据,直接生成DataSet
2、InputStreamReader
3、Source.fromInputStream
package com.ku.test
import java.io.{BufferedReader, InputStreamReader}
import java.net.{URL, URLConnection}
import org.apache.http.client.methods.HttpGet
import org.apache.http.impl.client.DefaultHttpClient
import scala.io.Source
import org.apache.spark.SparkFiles
import org.apache.spark.sql.SparkSession
object TokenTest {
def main(args: Array[String]): Unit = {
//方法1
val spark = SparkSession.builder()
.appName("tokenTest")
.master("local")
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
val fileName = "test.txt"
spark.sparkContext.addFile("http://127.0.0.1/test/" + fileName)
spark.read
.textFile("file:///" + SparkFiles.get(fileName))
.filter(s => !s.isEmpty)
.show(truncate = true, numRows = 20000)
//方法2
val url2: URL = new URL("http://127.0.0.1/test/test.txt")
val urlConn: URLConnection = url2.openConnection()
val in = new BufferedReader(new InputStreamReader(urlConn.getInputStream))
var line: String = ""
while (!"".equals(line = in.readLine())) {
println(line)
}
//方法3
val url3 = "http://127.0.0.1/test/test.txt"
val httpclient = new DefaultHttpClient()
try {
val response = httpclient.execute(new HttpGet(url3))
val entity = response.getEntity
val file = Source.fromInputStream(entity.getContent) //.getLines().mkString //.foreach(row => println(row))
for (line <- file.getLines) {
println(line)
}
}
}
}
先记录下,后面作详细更新
|