first iteration
This commit is contained in:
Generated
+53
-21
@@ -2,8 +2,8 @@
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="b41a9788-25b3-4e04-923f-17cde259631b" name="Default" comment="">
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/KMeans.scala" afterPath="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/Main.scala" afterPath="$PROJECT_DIR$/src/main/scala/Main.scala" />
|
||||
</list>
|
||||
<ignored path="$PROJECT_DIR$/target/" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
@@ -22,8 +22,8 @@
|
||||
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-450">
|
||||
<caret line="5" column="0" lean-forward="true" selection-start-line="5" selection-start-column="0" selection-end-line="5" selection-end-column="0" />
|
||||
<state relative-caret-position="353">
|
||||
<caret line="20" column="31" lean-forward="true" selection-start-line="20" selection-start-column="31" selection-end-line="20" selection-end-column="31" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -34,8 +34,8 @@
|
||||
<file leaf-file-name="Main.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="360">
|
||||
<caret line="33" column="1" lean-forward="true" selection-start-line="33" selection-start-column="1" selection-end-line="33" selection-end-column="1" />
|
||||
<state relative-caret-position="209">
|
||||
<caret line="25" column="0" lean-forward="true" selection-start-line="25" selection-start-column="0" selection-end-line="25" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
@@ -44,9 +44,11 @@
|
||||
<file leaf-file-name="XMLParser.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="144">
|
||||
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
|
||||
<folding />
|
||||
<state relative-caret-position="396">
|
||||
<caret line="52" column="3" lean-forward="true" selection-start-line="52" selection-start-column="3" selection-end-line="52" selection-end-column="3" />
|
||||
<folding>
|
||||
<element signature="e#23#59#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -110,6 +112,32 @@
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="KMeans" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="Big_Data_Assignment_2" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="src" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="main" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="scala" />
|
||||
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
|
||||
</PATH_ELEMENT>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="XMLParser.scala" />
|
||||
<option name="myItemType" value="org.jetbrains.plugins.scala.components.ScalaDefsProjectViewProvider$ScalaFileTreeNode" />
|
||||
</PATH_ELEMENT>
|
||||
</PATH>
|
||||
<PATH>
|
||||
<PATH_ELEMENT>
|
||||
<option name="myItemId" value="KMeans" />
|
||||
@@ -462,15 +490,9 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="270">
|
||||
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="144">
|
||||
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
|
||||
<folding />
|
||||
<folding>
|
||||
<element signature="e#23#59#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -482,18 +504,28 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="396">
|
||||
<caret line="52" column="3" lean-forward="true" selection-start-line="52" selection-start-column="3" selection-end-line="52" selection-end-column="3" />
|
||||
<folding>
|
||||
<element signature="e#23#59#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="360">
|
||||
<caret line="33" column="1" lean-forward="true" selection-start-line="33" selection-start-column="1" selection-end-line="33" selection-end-column="1" />
|
||||
<state relative-caret-position="209">
|
||||
<caret line="25" column="0" lean-forward="true" selection-start-line="25" selection-start-column="0" selection-end-line="25" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-450">
|
||||
<caret line="5" column="0" lean-forward="true" selection-start-line="5" selection-start-column="0" selection-end-line="5" selection-end-column="0" />
|
||||
<state relative-caret-position="353">
|
||||
<caret line="20" column="31" lean-forward="true" selection-start-line="20" selection-start-column="31" selection-end-line="20" selection-end-column="31" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
|
||||
@@ -10,7 +10,7 @@ object KMeans {
|
||||
*/
|
||||
//Create a map to store each data row with its closest cluster index as key
|
||||
|
||||
def train(dataset : DataFrame) : RDD[(Int,Row)] = {
|
||||
def train(dataset : DataFrame) : RDD[(Int,ArrayBuffer[Float])] = {
|
||||
val rows = dataset.rdd
|
||||
val K = 5 //number of intended clusters
|
||||
val n = rows.count() //number of datapoints
|
||||
@@ -37,6 +37,7 @@ object KMeans {
|
||||
norm = norm + Math.pow(datapoint.getFloat(a) - centre.getFloat(a), 2.0)
|
||||
}
|
||||
norm = Math.pow(norm, 0.5)
|
||||
norm
|
||||
}
|
||||
|
||||
def assignCluster(row : Row, centres: Array[Row], m : Int, K :Int): Int = {
|
||||
@@ -52,8 +53,11 @@ object KMeans {
|
||||
closestCentre
|
||||
}
|
||||
|
||||
def calculateNewCentres(clusterMap : RDD[(Int,Row)]): RDD[(Int,Row)] = {
|
||||
val newCentres = clusterMap.reduceByKey((a,b) => averageRow(a,b))
|
||||
def calculateNewCentres(clusterMap : RDD[(Int,Row)]): RDD[(Int,ArrayBuffer[Float])] = {
|
||||
val data = clusterMap.map(x => (x._1, x._2.asInstanceOf[ArrayBuffer[Float]]))
|
||||
val newCentres = data.reduceByKey((a, b) => averageRow(a, b))
|
||||
newCentres
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -61,7 +65,7 @@ object KMeans {
|
||||
var cluster = clusterMap.filter{case (a,_) => a == 0}
|
||||
var data = cluster.map((_,a) => a :Row)*/
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*def getCentre(cluster : RDD[(Int,Row)], oldCentre : Row, clusterIndex :Int) : Row = {
|
||||
val unWrappedData :RDD[Row] = cluster.map(x => x._2)
|
||||
@@ -69,11 +73,12 @@ object KMeans {
|
||||
return features
|
||||
}*/
|
||||
|
||||
def averageRow(a :Row, b:Row) : Row = {
|
||||
val newRow = new ArrayBuffer[Float]
|
||||
for (i <- a.size) {
|
||||
val avgI = (a.getFloat(i) + b.getFloat(i)) /2
|
||||
def averageRow(a :ArrayBuffer[Float], b:ArrayBuffer[Float]) : ArrayBuffer[Float] = {
|
||||
val newRow = new ArrayBuffer[Float]()
|
||||
for (i <- 0 until a.length) {
|
||||
val avgI = (a(i) + b(i)) /2
|
||||
newRow(i) = avgI
|
||||
}
|
||||
newRow
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user