first iteration

This commit is contained in:
joedarby
2016-12-15 16:39:43 +00:00
parent e90318a9a7
commit 3d39ad082c
2 changed files with 66 additions and 29 deletions
+53 -21
View File
@@ -2,8 +2,8 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="b41a9788-25b3-4e04-923f-17cde259631b" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/KMeans.scala" afterPath="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/Main.scala" afterPath="$PROJECT_DIR$/src/main/scala/Main.scala" />
</list>
<ignored path="$PROJECT_DIR$/target/" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
@@ -22,8 +22,8 @@
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-450">
<caret line="5" column="0" lean-forward="true" selection-start-line="5" selection-start-column="0" selection-end-line="5" selection-end-column="0" />
<state relative-caret-position="353">
<caret line="20" column="31" lean-forward="true" selection-start-line="20" selection-start-column="31" selection-end-line="20" selection-end-column="31" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
@@ -34,8 +34,8 @@
<file leaf-file-name="Main.scala" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="360">
<caret line="33" column="1" lean-forward="true" selection-start-line="33" selection-start-column="1" selection-end-line="33" selection-end-column="1" />
<state relative-caret-position="209">
<caret line="25" column="0" lean-forward="true" selection-start-line="25" selection-start-column="0" selection-end-line="25" selection-end-column="0" />
<folding />
</state>
</provider>
@@ -44,9 +44,11 @@
<file leaf-file-name="XMLParser.scala" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
<folding />
<state relative-caret-position="396">
<caret line="52" column="3" lean-forward="true" selection-start-line="52" selection-start-column="3" selection-end-line="52" selection-end-column="3" />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
@@ -110,6 +112,32 @@
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="KMeans" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="Big_Data_Assignment_2" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="src" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="main" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="scala" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="XMLParser.scala" />
<option name="myItemType" value="org.jetbrains.plugins.scala.components.ScalaDefsProjectViewProvider$ScalaFileTreeNode" />
</PATH_ELEMENT>
</PATH>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="KMeans" />
@@ -462,15 +490,9 @@
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270">
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
<folding />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
@@ -482,18 +504,28 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="396">
<caret line="52" column="3" lean-forward="true" selection-start-line="52" selection-start-column="3" selection-end-line="52" selection-end-column="3" />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="360">
<caret line="33" column="1" lean-forward="true" selection-start-line="33" selection-start-column="1" selection-end-line="33" selection-end-column="1" />
<state relative-caret-position="209">
<caret line="25" column="0" lean-forward="true" selection-start-line="25" selection-start-column="0" selection-end-line="25" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-450">
<caret line="5" column="0" lean-forward="true" selection-start-line="5" selection-start-column="0" selection-end-line="5" selection-end-column="0" />
<state relative-caret-position="353">
<caret line="20" column="31" lean-forward="true" selection-start-line="20" selection-start-column="31" selection-end-line="20" selection-end-column="31" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
+13 -8
View File
@@ -10,7 +10,7 @@ object KMeans {
*/
//Create a map to store each data row with its closest cluster index as key
def train(dataset : DataFrame) : RDD[(Int,Row)] = {
def train(dataset : DataFrame) : RDD[(Int,ArrayBuffer[Float])] = {
val rows = dataset.rdd
val K = 5 //number of intended clusters
val n = rows.count() //number of datapoints
@@ -37,6 +37,7 @@ object KMeans {
norm = norm + Math.pow(datapoint.getFloat(a) - centre.getFloat(a), 2.0)
}
norm = Math.pow(norm, 0.5)
norm
}
def assignCluster(row : Row, centres: Array[Row], m : Int, K :Int): Int = {
@@ -52,8 +53,11 @@ object KMeans {
closestCentre
}
def calculateNewCentres(clusterMap : RDD[(Int,Row)]): RDD[(Int,Row)] = {
val newCentres = clusterMap.reduceByKey((a,b) => averageRow(a,b))
def calculateNewCentres(clusterMap : RDD[(Int,Row)]): RDD[(Int,ArrayBuffer[Float])] = {
val data = clusterMap.map(x => (x._1, x._2.asInstanceOf[ArrayBuffer[Float]]))
val newCentres = data.reduceByKey((a, b) => averageRow(a, b))
newCentres
}
@@ -61,7 +65,7 @@ object KMeans {
var cluster = clusterMap.filter{case (a,_) => a == 0}
var data = cluster.map((_,a) => a :Row)*/
}
/*def getCentre(cluster : RDD[(Int,Row)], oldCentre : Row, clusterIndex :Int) : Row = {
val unWrappedData :RDD[Row] = cluster.map(x => x._2)
@@ -69,11 +73,12 @@ object KMeans {
return features
}*/
def averageRow(a :Row, b:Row) : Row = {
val newRow = new ArrayBuffer[Float]
for (i <- a.size) {
val avgI = (a.getFloat(i) + b.getFloat(i)) /2
def averageRow(a :ArrayBuffer[Float], b:ArrayBuffer[Float]) : ArrayBuffer[Float] = {
val newRow = new ArrayBuffer[Float]()
for (i <- 0 until a.length) {
val avgI = (a(i) + b(i)) /2
newRow(i) = avgI
}
newRow
}
}