try convert Row to List[Float]
This commit is contained in:
Generated
+42
-33
@@ -2,7 +2,7 @@
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="74fa95ce-dfd4-40da-a7a1-b336badfaea8" name="Default" comment="">
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" afterPath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/KMeans.scala" afterPath="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
</list>
|
||||
<ignored path="$PROJECT_DIR$/target/" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
@@ -18,11 +18,11 @@
|
||||
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
|
||||
<component name="FileEditorManager">
|
||||
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
|
||||
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="false">
|
||||
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="509">
|
||||
<caret line="39" column="8" lean-forward="true" selection-start-line="39" selection-start-column="8" selection-end-line="39" selection-end-column="8" />
|
||||
<state relative-caret-position="414">
|
||||
<caret line="23" column="17" lean-forward="true" selection-start-line="23" selection-start-column="17" selection-end-line="23" selection-end-column="17" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -35,12 +35,15 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="38" column="34" lean-forward="false" selection-start-line="38" selection-start-column="34" selection-end-line="38" selection-end-column="34" />
|
||||
<folding />
|
||||
<folding>
|
||||
<element signature="n#!!doc" expanded="false" />
|
||||
<element signature="e#832#872#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="XMLParser.scala" pinned="false" current-in-tab="true">
|
||||
<file leaf-file-name="XMLParser.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="441">
|
||||
@@ -55,8 +58,8 @@
|
||||
<file leaf-file-name="Main.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="401">
|
||||
<caret line="38" column="19" lean-forward="true" selection-start-line="38" selection-start-column="19" selection-end-line="38" selection-end-column="19" />
|
||||
<state relative-caret-position="419">
|
||||
<caret line="39" column="28" lean-forward="true" selection-start-line="39" selection-start-column="28" selection-end-line="39" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#22#58#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -78,8 +81,8 @@
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/src/main/scala/Main.scala" />
|
||||
<option value="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
<option value="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
|
||||
<option value="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
@@ -601,12 +604,12 @@
|
||||
<updated>1481830590764</updated>
|
||||
<workItem from="1481830593703" duration="700000" />
|
||||
<workItem from="1481831304788" duration="5133000" />
|
||||
<workItem from="1481837779668" duration="2057000" />
|
||||
<workItem from="1481837779668" duration="4028000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TimeTrackingManager">
|
||||
<option name="totallyTimeSpent" value="7890000" />
|
||||
<option name="totallyTimeSpent" value="9861000" />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="77" y="122" width="1400" height="893" extended-state="0" />
|
||||
@@ -669,7 +672,10 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="38" column="34" lean-forward="false" selection-start-line="38" selection-start-column="34" selection-end-line="38" selection-end-column="34" />
|
||||
<folding />
|
||||
<folding>
|
||||
<element signature="n#!!doc" expanded="false" />
|
||||
<element signature="e#832#872#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -759,7 +765,10 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="38" column="34" lean-forward="false" selection-start-line="38" selection-start-column="34" selection-end-line="38" selection-end-column="34" />
|
||||
<folding />
|
||||
<folding>
|
||||
<element signature="n#!!doc" expanded="false" />
|
||||
<element signature="e#832#872#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -785,26 +794,6 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="401">
|
||||
<caret line="38" column="19" lean-forward="true" selection-start-line="38" selection-start-column="19" selection-end-line="38" selection-end-column="19" />
|
||||
<folding>
|
||||
<element signature="e#22#58#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="509">
|
||||
<caret line="39" column="8" lean-forward="true" selection-start-line="39" selection-start-column="8" selection-end-line="39" selection-end-column="8" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar:///usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar!/java/util/Date.class">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="262">
|
||||
@@ -816,6 +805,16 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="419">
|
||||
<caret line="39" column="28" lean-forward="true" selection-start-line="39" selection-start-column="28" selection-end-line="39" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#22#58#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="441">
|
||||
@@ -826,5 +825,15 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="414">
|
||||
<caret line="23" column="17" lean-forward="true" selection-start-line="23" selection-start-column="17" selection-end-line="23" selection-end-column="17" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
</project>
|
||||
+15
-14
@@ -2,6 +2,7 @@ package ClusterSOData
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql._
|
||||
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
object KMeans {
|
||||
@@ -10,10 +11,11 @@ object KMeans {
|
||||
*/
|
||||
//Create a map to store each data row with its closest cluster index as key
|
||||
|
||||
def train(dataset : DataFrame) : RDD[(Int,Row)] = {
|
||||
def train(dataset : DataFrame) : RDD[(Int,List[Float])] = {
|
||||
val rows = dataset.rdd
|
||||
val rowsAsArray = dataset.map(row => List(row.getInt(0).toFloat, row.getInt(1).toFloat, row.getInt(2).toFloat) )
|
||||
val K = 5 //number of intended clusters
|
||||
val n = rows.count() //number of datapoints
|
||||
//val n = rows.count() //number of datapoints
|
||||
val m = 3 //number of features
|
||||
//var centres = new ArrayBuffer[Row]
|
||||
|
||||
@@ -24,23 +26,23 @@ object KMeans {
|
||||
for (a <- 0 until K) {
|
||||
centres(a) = rows(r.ne
|
||||
}*/
|
||||
val centres = rows.takeSample(false, K, System.nanoTime().toInt)
|
||||
val clusterMap :RDD[(Int,Row)]= rows.map(row => (assignCluster(row,centres,m,K),row))
|
||||
val centres = rowsAsArray.takeSample(false, K, System.nanoTime().toInt)
|
||||
val clusterMap :RDD[(Int,List[Float])]= rowsAsArray.map(row => (assignCluster(row,centres,m,K),row))
|
||||
val newCentres = calculateNewCentres(clusterMap)
|
||||
newCentres
|
||||
|
||||
}
|
||||
|
||||
def calculateNorm(datapoint : Row, centre : Row, m: Int): Double = {
|
||||
def calculateNorm(datapoint : List[Float], centre : List[Float], m: Int): Double = {
|
||||
var norm : Double = 0.0
|
||||
for (a <- 0 to m) {
|
||||
norm = norm + Math.pow(datapoint.getInt(a).toFloat - centre.getInt(a).toFloat, 2.0)
|
||||
for (a <- 0 until m) {
|
||||
norm = norm + Math.pow(datapoint(a) - centre(a), 2.0)
|
||||
}
|
||||
norm = Math.pow(norm, 0.5)
|
||||
norm
|
||||
}
|
||||
|
||||
def assignCluster(row : Row, centres: Array[Row], m : Int, K :Int): Int = {
|
||||
def assignCluster(row : List[Float], centres: Array[List[Float]], m : Int, K :Int): Int = {
|
||||
var smallestNorm = 99999999999.0
|
||||
var closestCentre = 0
|
||||
for (centreNumber <- 0 until K) {
|
||||
@@ -53,7 +55,7 @@ object KMeans {
|
||||
closestCentre
|
||||
}
|
||||
|
||||
def calculateNewCentres(clusterMap : RDD[(Int,Row)]): RDD[(Int,Row)] = {
|
||||
def calculateNewCentres(clusterMap : RDD[(Int,List[Float])]): RDD[(Int,List[Float])] = {
|
||||
//val data = clusterMap.map(x => (x._1, x._2.asInstanceOf[ArrayBuffer[Double]]))
|
||||
val newCentres = clusterMap.reduceByKey((a, b) => averageRow(a, b))
|
||||
//val singleCluster = clusterMap.filter(x => x._1 == 0)
|
||||
@@ -85,14 +87,13 @@ object KMeans {
|
||||
newRow
|
||||
}*/
|
||||
|
||||
def averageRow(a:Row, b:Row) : Row = {
|
||||
val means = new ArrayBuffer[Double]()
|
||||
def averageRow(a:List[Float], b:List[Float]) : List[Float] = {
|
||||
val means = new ArrayBuffer[Float]
|
||||
for (i <- 0 until a.size) {
|
||||
val mean = (a.getInt(i) + b.getInt(i)) /2.0
|
||||
val mean = (a(i) + b(i)) /2.0f
|
||||
means(i) = mean
|
||||
}
|
||||
val newRow = Row.fromSeq(means)
|
||||
newRow
|
||||
return means.toList
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user