working feature parsing

This commit is contained in:
Joe Darby
2016-12-14 00:12:49 +00:00
parent 36ee19d7d9
commit a832da7a77
7 changed files with 32 additions and 23 deletions
+3 -8
View File
@@ -81,17 +81,12 @@
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.11.8</version>
<version>2.10.4</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.0.2</version>
<artifactId>spark-core_2.10</artifactId>
<version>1.0.0-cdh5.1.0</version>
</dependency>
</dependencies>
</project>
+28 -14
View File
@@ -1,5 +1,7 @@
package bdp.spark.KMeans
import scala.collection.mutable.ListBuffer
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
@@ -7,28 +9,40 @@ import org.apache.spark.SparkConf
object SparkKMeans {
def main(args: Array[String]) {
val sc = new SparkContext(new SparkConf().setAppName("Spark KMeans"))
val lines = sc.textFile(args(0))
val featureSet = lines.map(getFeatures)
featureSet.foreach(println)
val printableFeatureSet = featureSet.map(makeListPrintable)
printableFeatureSet.foreach{println}
}
def getFeatures(line :String) : [String] = {
val featureIDs = [" rowID="," Reputation="," CreationDate="," LastAccessDate="," Views="," UpVotes="," DownVotes="," Age="]
def getFeatures(line :String) : List[String] = {
val fragments = line.split("\"")
var features = []
for (a <- 0 to 7) {
if (fragments.contains(featureIDs(a))) {
val index = fragments.indexOf(featureIDs(a))
features(a) = fragments(index + 1)
val featureIDs = List(" Reputation="," CreationDate="," LastAccessDate="," Views="," UpVotes="," DownVotes="," Age=")
var features = new ListBuffer[String]()
features += fragments(1)
var a = ""
for (a <- featureIDs) {
if (fragments.contains(a)) {
val index = fragments.indexOf(a)
features += fragments(index + 1)
} else {
features(a) = ""
features += ""
}
}
return features
features(2) = features(2).substring(0,10)
features(3) = features(3).substring(0,10)
val featuresList = features.toList
return featuresList
}
def makeListPrintable(featureList : List[String]) : String = {
return featureList.mkString(", ")
}
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
+1 -1
View File
@@ -1,5 +1,5 @@
#Generated by Maven
#Tue Dec 13 19:39:54 GMT 2016
#Wed Dec 14 00:04:22 GMT 2016
version=0.0.1
groupId=uk.ac.qmul.spark
artifactId=KMeans