add counts function
This commit is contained in:
Generated
+105
-61
@@ -2,7 +2,9 @@
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="a45ae16c-c18b-46fd-bdd5-74c3ba5fabef" name="Default" comment="">
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/KMeans.scala" afterPath="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" afterPath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
|
||||
</list>
|
||||
<ignored path="$PROJECT_DIR$/target/" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
@@ -21,8 +23,8 @@
|
||||
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1152">
|
||||
<caret line="64" column="0" lean-forward="true" selection-start-line="64" selection-start-column="0" selection-end-line="64" selection-end-column="0" />
|
||||
<state relative-caret-position="491">
|
||||
<caret line="52" column="0" lean-forward="true" selection-start-line="52" selection-start-column="0" selection-end-line="52" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -30,23 +32,11 @@
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="ArrayOps.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.10.5/scala-library-2.10.5-sources.jar!/scala/collection/mutable/ArrayOps.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-2214">
|
||||
<caret line="42" column="58" lean-forward="true" selection-start-line="42" selection-start-column="58" selection-end-line="42" selection-end-column="58" />
|
||||
<folding>
|
||||
<element signature="e#576#614#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="Main.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="144">
|
||||
<caret line="16" column="51" lean-forward="false" selection-start-line="16" selection-start-column="51" selection-end-line="16" selection-end-column="51" />
|
||||
<state relative-caret-position="360">
|
||||
<caret line="27" column="34" lean-forward="false" selection-start-line="27" selection-start-column="34" selection-end-line="27" selection-end-column="34" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
@@ -55,8 +45,8 @@
|
||||
<file leaf-file-name="XMLParser.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="612">
|
||||
<caret line="41" column="21" lean-forward="false" selection-start-line="41" selection-start-column="21" selection-end-line="41" selection-end-column="21" />
|
||||
<state relative-caret-position="-144">
|
||||
<caret line="44" column="62" lean-forward="false" selection-start-line="44" selection-start-column="62" selection-end-line="44" selection-end-column="62" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
@@ -75,6 +65,7 @@
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
|
||||
<option value="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
</list>
|
||||
</option>
|
||||
@@ -86,10 +77,10 @@
|
||||
<sorting>DEFINITION_ORDER</sorting>
|
||||
</component>
|
||||
<component name="ProjectFrameBounds">
|
||||
<option name="x" value="49" />
|
||||
<option name="y" value="63" />
|
||||
<option name="x" value="75" />
|
||||
<option name="y" value="52" />
|
||||
<option name="width" value="1605" />
|
||||
<option name="height" value="968" />
|
||||
<option name="height" value="893" />
|
||||
</component>
|
||||
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
|
||||
<component name="ProjectView">
|
||||
@@ -107,6 +98,7 @@
|
||||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="Scratches" />
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<PATH>
|
||||
@@ -143,9 +135,8 @@
|
||||
</PATH>
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="PackagesPane" />
|
||||
<pane id="Scratches" />
|
||||
<pane id="Scope" />
|
||||
<pane id="PackagesPane" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
@@ -263,15 +254,17 @@
|
||||
<updated>1481992380230</updated>
|
||||
<workItem from="1481992381575" duration="8284000" />
|
||||
<workItem from="1482003919097" duration="1224000" />
|
||||
<workItem from="1482071638456" duration="1188000" />
|
||||
<workItem from="1482071638456" duration="2869000" />
|
||||
<workItem from="1482150691487" duration="1272000" />
|
||||
<workItem from="1482152647060" duration="2108000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TimeTrackingManager">
|
||||
<option name="totallyTimeSpent" value="10696000" />
|
||||
<option name="totallyTimeSpent" value="15757000" />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="49" y="63" width="1605" height="968" extended-state="0" />
|
||||
<frame x="75" y="52" width="1605" height="893" extended-state="0" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
@@ -283,8 +276,8 @@
|
||||
<window_info id="Capture Analysis" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32948583" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Properties" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Capture Tool" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
@@ -295,8 +288,8 @@
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Theme Preview" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
@@ -320,6 +313,58 @@
|
||||
<option name="FILTER_TARGETS" value="false" />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1818">
|
||||
<caret line="104" column="13" lean-forward="false" selection-start-line="104" selection-start-column="13" selection-end-line="104" selection-end-column="13" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="360">
|
||||
<caret line="27" column="34" lean-forward="true" selection-start-line="27" selection-start-column="34" selection-end-line="27" selection-end-column="34" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="666">
|
||||
<caret line="44" column="62" lean-forward="false" selection-start-line="44" selection-start-column="62" selection-end-line="44" selection-end-column="62" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="468">
|
||||
<caret line="26" column="0" lean-forward="true" selection-start-line="26" selection-start-column="0" selection-end-line="26" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="25" column="8" lean-forward="true" selection-start-line="25" selection-start-column="8" selection-end-line="25" selection-end-column="8" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="936">
|
||||
@@ -334,7 +379,6 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="16632">
|
||||
<caret line="924" column="5" lean-forward="true" selection-start-line="924" selection-start-column="5" selection-end-line="924" selection-end-column="5" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -342,7 +386,6 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="30096">
|
||||
<caret line="1672" column="22" lean-forward="false" selection-start-line="1672" selection-start-column="22" selection-end-line="1672" selection-end-column="22" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -376,7 +419,6 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="30096">
|
||||
<caret line="1672" column="22" lean-forward="true" selection-start-line="1672" selection-start-column="22" selection-end-line="1672" selection-end-column="22" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -410,35 +452,10 @@
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="612">
|
||||
<caret line="41" column="21" lean-forward="false" selection-start-line="41" selection-start-column="21" selection-end-line="41" selection-end-column="21" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-core_2.10/1.6.0/spark-core_2.10-1.6.0-sources.jar!/org/apache/spark/rdd/RDD.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="18">
|
||||
<caret line="924" column="5" lean-forward="false" selection-start-line="924" selection-start-column="5" selection-end-line="924" selection-end-column="5" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-sql_2.10/1.6.0/spark-sql_2.10-1.6.0-sources.jar!/org/apache/spark/sql/DataFrame.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="18">
|
||||
<caret line="1672" column="22" lean-forward="false" selection-start-line="1672" selection-start-column="22" selection-end-line="1672" selection-end-column="22" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="144">
|
||||
<caret line="16" column="51" lean-forward="false" selection-start-line="16" selection-start-column="51" selection-end-line="16" selection-end-column="51" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -446,16 +463,43 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-2214">
|
||||
<caret line="42" column="58" lean-forward="true" selection-start-line="42" selection-start-column="58" selection-end-line="42" selection-end-column="58" />
|
||||
<folding>
|
||||
<element signature="e#576#614#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-sql_2.10/1.6.0/spark-sql_2.10-1.6.0-sources.jar!/org/apache/spark/sql/DataFrame.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="287">
|
||||
<caret line="328" column="6" lean-forward="false" selection-start-line="328" selection-start-column="6" selection-end-line="328" selection-end-column="6" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-sql_2.10/1.6.0/spark-sql_2.10-1.6.0-sources.jar!/org/apache/spark/sql/SQLContext.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="253">
|
||||
<caret line="486" column="6" lean-forward="false" selection-start-line="486" selection-start-column="6" selection-end-line="486" selection-end-column="6" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="360">
|
||||
<caret line="27" column="34" lean-forward="false" selection-start-line="27" selection-start-column="34" selection-end-line="27" selection-end-column="34" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-144">
|
||||
<caret line="44" column="62" lean-forward="false" selection-start-line="44" selection-start-column="62" selection-end-line="44" selection-end-column="62" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1152">
|
||||
<caret line="64" column="0" lean-forward="true" selection-start-line="64" selection-start-column="0" selection-end-line="64" selection-end-column="0" />
|
||||
<state relative-caret-position="491">
|
||||
<caret line="52" column="0" lean-forward="true" selection-start-line="52" selection-start-column="0" selection-end-line="52" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
|
||||
+77
-39
@@ -12,9 +12,9 @@ object KMeans {
|
||||
|
||||
|
||||
def train(dataset : DataFrame, iterations:Int) : Unit = {
|
||||
val K = 10
|
||||
val m = 4
|
||||
val K = 10 // Number of desired clusters
|
||||
val relevantData = dataset.select("Reputation", "Views", "UpVotes", "DownVotes")
|
||||
val m = relevantData.columns.length //number of features
|
||||
val rows = relevantData.rdd
|
||||
val rowsAsArray = rows.map(row => convertRow(row, m)).persist()
|
||||
|
||||
@@ -30,23 +30,53 @@ object KMeans {
|
||||
}
|
||||
|
||||
|
||||
var counts = Array[Int](K)
|
||||
|
||||
|
||||
|
||||
for (i <- 0 until iterations) {
|
||||
centres = clustering(centres, rowsAsArray, m, K)
|
||||
if (centres == null) {
|
||||
val clusterMap = clustering(centres, rowsAsArray, m, K).persist()
|
||||
centres = getCentres(clusterMap, m, K)
|
||||
counts = getCounts(clusterMap, K)
|
||||
clusterMap.unpersist()
|
||||
if (centres == null || counts == null) {
|
||||
println("Error, starting again")
|
||||
train(dataset, iterations)
|
||||
return
|
||||
}
|
||||
println("\niteration " + i + " :")
|
||||
for (j <- 0 until K) {
|
||||
println("centre " + j + " = " + centres(j).mkString("[",",","]") )
|
||||
println("centre " + j + " = " + centres(j).mkString("[",",","]") + " count = " + counts(j) )
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def clustering(centres :Array[Array[Float]], rowsAsArray : RDD[Array[Float]], m : Int, K : Int) : Array[Array[Float]] = {
|
||||
/*def clustering(centres :Array[Array[Float]], rowsAsArray : RDD[Array[Float]], m : Int, K : Int) : Array[Array[Float]] = {
|
||||
val clusterMap :RDD[(Int,Array[Float])]= rowsAsArray.map(row => (assignCluster(row,centres,m,K),row)).persist()
|
||||
val newCentres = clusterMap.reduceByKey((a,b) => getMeanVector(a,b,m))
|
||||
val arrayNewCentres = newCentres.collect()
|
||||
|
||||
var results = new Array[Array[Float]](K)
|
||||
for ((i,x) <- arrayNewCentres) {
|
||||
results(i) = x
|
||||
}
|
||||
//Check all results are valid (no null)
|
||||
for (i <- 0 until K) {
|
||||
if (results(i) == null) {
|
||||
return null
|
||||
}
|
||||
}
|
||||
return results
|
||||
}*/
|
||||
|
||||
def clustering(centres :Array[Array[Float]], rowsAsArray : RDD[Array[Float]], m : Int, K : Int) : RDD[(Int,Array[Float])] = {
|
||||
val clusterMap :RDD[(Int,Array[Float])]= rowsAsArray.map(row => (assignCluster(row,centres,m,K),row))
|
||||
return clusterMap
|
||||
|
||||
}
|
||||
|
||||
def getCentres(clusterMap: RDD[(Int,Array[Float])], m: Int, K: Int) : Array[Array[Float]] = {
|
||||
val newCentres = clusterMap.reduceByKey((a,b) => getMeanVector(a,b,m))
|
||||
val arrayNewCentres = newCentres.collect()
|
||||
|
||||
@@ -63,6 +93,23 @@ object KMeans {
|
||||
return results
|
||||
}
|
||||
|
||||
def getCounts(clusterMap: RDD[(Int,Array[Float])], K: Int) : Array[Int] = {
|
||||
val countable = clusterMap.map(x => (x._1, 1))
|
||||
val countsWithKeys = countable.reduceByKey((a, b) => a + b).collect()
|
||||
var counts = new Array[Int](K)
|
||||
for ((i, x) <- countsWithKeys) {
|
||||
counts(i) = x
|
||||
}
|
||||
for (i <- 0 until K) {
|
||||
if (counts(i) == null) {
|
||||
return null
|
||||
}
|
||||
}
|
||||
return counts
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def calculateNorm(datapoint : Array[Float], centre : Array[Float], m: Int): Double = {
|
||||
@@ -74,44 +121,35 @@ object KMeans {
|
||||
norm
|
||||
}
|
||||
|
||||
def assignCluster(row : Array[Float], centres: Array[Array[Float]], m : Int, K :Int): Int = {
|
||||
var smallestNorm = 999999.0
|
||||
var closestCentre = 0
|
||||
for (centreNumber <- 0 until K) {
|
||||
//val norm = Math.abs(row - centres(centreNumber))
|
||||
val norm = calculateNorm(row, centres(centreNumber), m)
|
||||
if (norm < smallestNorm) {
|
||||
smallestNorm = norm
|
||||
closestCentre = centreNumber
|
||||
def assignCluster(row : Array[Float], centres: Array[Array[Float]], m : Int, K :Int): Int = {
|
||||
var smallestNorm = 999999.0
|
||||
var closestCentre = 0
|
||||
for (centreNumber <- 0 until K) {
|
||||
//val norm = Math.abs(row - centres(centreNumber))
|
||||
val norm = calculateNorm(row, centres(centreNumber), m)
|
||||
if (norm < smallestNorm) {
|
||||
smallestNorm = norm
|
||||
closestCentre = centreNumber
|
||||
}
|
||||
}
|
||||
closestCentre
|
||||
}
|
||||
closestCentre
|
||||
}
|
||||
|
||||
def averageRow(a:List[Float], b:List[Float]) : List[Float] = {
|
||||
val means = new ArrayBuffer[Float]
|
||||
for (i <- 0 until a.size) {
|
||||
val mean = (a(i) + b(i)) /2.0f
|
||||
means(i) = mean
|
||||
|
||||
def getMeanVector(a: Array[Float], b: Array[Float], m: Int) : Array[Float] = {
|
||||
var means = new Array[Float](m)
|
||||
for (i <- 0 until m) {
|
||||
means(i) = (a(i) + b(i)) / 2
|
||||
}
|
||||
means
|
||||
}
|
||||
return means.toList
|
||||
}
|
||||
|
||||
|
||||
def getMeanVector(a: Array[Float], b: Array[Float], m: Int) : Array[Float] = {
|
||||
var means = new Array[Float](m)
|
||||
for (i <- 0 until m) {
|
||||
means(i) = (a(i) + b(i)) / 2
|
||||
def convertRow(row : Row, m: Int) : Array[Float] = {
|
||||
var dataArray = new Array[Float](m)
|
||||
for (i <- 0 until m) {
|
||||
dataArray(i) = row.getInt(i).toFloat
|
||||
}
|
||||
dataArray
|
||||
}
|
||||
means
|
||||
}
|
||||
|
||||
def convertRow(row : Row, m: Int) : Array[Float] = {
|
||||
var dataArray = new Array[Float](m)
|
||||
for (i <- 0 until m) {
|
||||
dataArray(i) = row.getInt(i).toFloat
|
||||
}
|
||||
dataArray
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -124,13 +124,13 @@ object XMLParser {
|
||||
// In this case, return a placeholder value of -1.
|
||||
case e: Exception => return -1
|
||||
}
|
||||
case DateType => return attribute
|
||||
/*// If the string is a date, convert from date string to long.
|
||||
case DateType =>
|
||||
// If the string is a date, convert from date string to long.
|
||||
var format = new java.text.SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS")
|
||||
var longTime = format.parse(attribute).getTime()
|
||||
// Then convert long to int representing days since epoch
|
||||
var longDays : Long = longTime / (1000*60*60*24)
|
||||
return longDays.toInt*/
|
||||
return longDays.toInt
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user