Merge branch 'joe-dev2' of https://github.com/Pezz89/Big_Data_Assignment_2 into joe-dev2

This commit is contained in:
Joe Darby
2016-12-16 12:34:07 +00:00
4 changed files with 230 additions and 4 deletions
+1 -1
View File
@@ -45,7 +45,7 @@
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8 (1)" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
<component name="masterDetails">
+220 -1
View File
@@ -1,8 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<<<<<<< HEAD
<list default="true" id="b41a9788-25b3-4e04-923f-17cde259631b" name="Default" comment="">
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/run_project.sh" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/Main.scala" afterPath="$PROJECT_DIR$/src/main/scala/Main.scala" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" afterPath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
=======
<list default="true" id="74fa95ce-dfd4-40da-a7a1-b336badfaea8" name="Default" comment="">
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/KMeans.scala" afterPath="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
</list>
<ignored path="$PROJECT_DIR$/target/" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
@@ -18,11 +26,16 @@
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="true">
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="684">
<caret line="40" column="3" lean-forward="false" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
=======
<state relative-caret-position="135">
<caret line="36" column="86" lean-forward="true" selection-start-line="36" selection-start-column="86" selection-end-line="36" selection-end-column="86" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
@@ -30,6 +43,14 @@
</provider>
</entry>
</file>
<<<<<<< HEAD
<file leaf-file-name="Main.scala" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="258">
<caret line="38" column="38" lean-forward="false" selection-start-line="38" selection-start-column="38" selection-end-line="38" selection-end-column="38" />
<folding />
=======
<file leaf-file-name="Row.scala" pinned="false" current-in-tab="false">
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-catalyst_2.10/1.6.0/spark-catalyst_2.10-1.6.0-sources.jar!/org/apache/spark/sql/Row.scala">
<provider selected="true" editor-type-id="text-editor">
@@ -39,6 +60,7 @@
<element signature="n#!!doc" expanded="false" />
<element signature="e#832#872#0" expanded="false" />
</folding>
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
</state>
</provider>
</entry>
@@ -46,8 +68,13 @@
<file leaf-file-name="XMLParser.scala" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="1044">
<caret line="65" column="35" lean-forward="false" selection-start-line="65" selection-start-column="35" selection-end-line="65" selection-end-column="35" />
=======
<state relative-caret-position="441">
<caret line="135" column="0" lean-forward="true" selection-start-line="135" selection-start-column="0" selection-end-line="135" selection-end-column="0" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
@@ -80,9 +107,14 @@
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<<<<<<< HEAD
=======
<option value="$PROJECT_DIR$/src/main/scala/Main.scala" />
<option value="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<option value="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
<option value="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
<option value="$PROJECT_DIR$/src/main/scala/Main.scala" />
</list>
</option>
</component>
@@ -93,10 +125,17 @@
<sorting>DEFINITION_ORDER</sorting>
</component>
<component name="ProjectFrameBounds">
<<<<<<< HEAD
<option name="x" value="65" />
<option name="y" value="24" />
<option name="width" value="1295" />
<option name="height" value="744" />
=======
<option name="x" value="77" />
<option name="y" value="122" />
<option name="width" value="1400" />
<option name="height" value="893" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
@@ -612,18 +651,31 @@
<option name="totallyTimeSpent" value="10296000" />
</component>
<component name="ToolWindowManager">
<<<<<<< HEAD
<frame x="65" y="24" width="1295" height="744" extended-state="6" />
=======
<frame x="77" y="122" width="1400" height="893" extended-state="0" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<editor active="true" />
<layout>
<window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Nl-Palette" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
<window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<<<<<<< HEAD
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
=======
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<window_info id="Properties" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Capture Tool" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<<<<<<< HEAD
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24555984" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
=======
<window_info id="Inspection Results" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32885087" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
@@ -639,6 +691,10 @@
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Theme Preview" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<<<<<<< HEAD
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
=======
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
@@ -660,8 +716,41 @@
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="720">
<caret line="40" column="3" lean-forward="false" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="684">
<caret line="38" column="38" lean-forward="true" selection-start-line="38" selection-start-column="38" selection-end-line="38" selection-end-column="38" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1170">
<caret line="65" column="35" lean-forward="false" selection-start-line="65" selection-start-column="35" selection-end-line="65" selection-end-column="35" />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
=======
<state relative-caret-position="918">
<caret line="51" column="5" lean-forward="true" selection-start-line="51" selection-start-column="5" selection-end-line="51" selection-end-column="5" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
@@ -701,18 +790,83 @@
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="168">
<caret line="15" column="6" lean-forward="false" selection-start-line="15" selection-start-column="6" selection-end-line="15" selection-end-column="6" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="245">
<caret line="27" column="27" lean-forward="true" selection-start-line="27" selection-start-column="27" selection-end-line="27" selection-end-column="27" />
<folding />
=======
<state relative-caret-position="360">
<caret line="20" column="31" lean-forward="true" selection-start-line="20" selection-start-column="31" selection-end-line="20" selection-end-column="31" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="396">
<caret line="40" column="3" lean-forward="true" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="612">
<caret line="52" column="3" lean-forward="true" selection-start-line="52" selection-start-column="3" selection-end-line="52" selection-end-column="3" />
=======
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270">
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="jar://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.10.5/scala-library-2.10.5.jar!/scala/collection/TraversableLike.class">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="168">
<caret line="15" column="6" lean-forward="false" selection-start-line="15" selection-start-column="6" selection-end-line="15" selection-end-column="6" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="537">
<caret line="84" column="5" lean-forward="true" selection-start-line="84" selection-start-column="5" selection-end-line="84" selection-end-column="5" />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
@@ -721,6 +875,10 @@
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="245">
<caret line="27" column="27" lean-forward="true" selection-start-line="27" selection-start-column="27" selection-end-line="27" selection-end-column="27" />
=======
<state relative-caret-position="108">
<caret line="13" column="7" lean-forward="false" selection-start-line="13" selection-start-column="7" selection-end-line="13" selection-end-column="7" />
<folding>
@@ -790,6 +948,7 @@
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1260">
<caret line="112" column="3" lean-forward="false" selection-start-line="112" selection-start-column="3" selection-end-line="112" selection-end-column="3" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<folding />
</state>
</provider>
@@ -827,13 +986,73 @@
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="396">
<caret line="40" column="3" lean-forward="false" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
=======
<state relative-caret-position="135">
<caret line="36" column="86" lean-forward="true" selection-start-line="36" selection-start-column="86" selection-end-line="36" selection-end-column="86" />
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="270">
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="jar://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.10.5/scala-library-2.10.5.jar!/scala/collection/TraversableLike.class">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="168">
<caret line="15" column="6" lean-forward="false" selection-start-line="15" selection-start-column="6" selection-end-line="15" selection-end-column="6" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="684">
<caret line="40" column="3" lean-forward="false" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
<folding>
<element signature="e#23#54#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1044">
<caret line="65" column="35" lean-forward="false" selection-start-line="65" selection-start-column="35" selection-end-line="65" selection-end-column="35" />
<folding>
<element signature="e#23#59#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="258">
<caret line="38" column="38" lean-forward="false" selection-start-line="38" selection-start-column="38" selection-end-line="38" selection-end-column="38" />
<folding />
</state>
</provider>
</entry>
</component>
</project>
+3 -2
View File
@@ -12,8 +12,9 @@ object KMeans {
//Create a map to store each data row with its closest cluster index as key
def train(dataset : DataFrame) : RDD[(Int,List[Float])] = {
val rows = dataset.rdd
val rowsAsArray = dataset.map(row => List(row.getInt(0).toFloat, row.getInt(1).toFloat, row.getInt(2).toFloat) )
val relevantData = dataset.select("Reputation", "CreationDate", "LastAccessDate")
val rows = relevantData.rdd
val rowsAsArray = rows.map(row => List(row.getInt(0).toFloat, row.getInt(1).toFloat, row.getInt(2).toFloat) )
val K = 5 //number of intended clusters
//val n = rows.count() //number of datapoints
val m = 3 //number of features
+6
View File
@@ -26,12 +26,15 @@ object Main {
val df = XMLParser.ParseData()
// get the users XML file
val users = df("users")
val centres = KMeans.train(users)
val centresArray = centres.collect()
val unwrap = centresArray.map(x => x._2)
unwrap.foreach(println)
/*val users = dataFrames("users")
/*val dataFrames = DataParser.ParseData()
// get the users XML file
@@ -47,6 +50,9 @@ object Main {
users.select("CreationDate").show()
*/
// Info on using DataFrames here: https://www.mapr.com/blog/using-apache-spark-dataframes-processing-tabular-data
}
}