Merge branch 'joe-dev2' of https://github.com/Pezz89/Big_Data_Assignment_2 into joe-dev2
This commit is contained in:
+3
-1
@@ -1,6 +1,8 @@
|
||||
.DS_Store
|
||||
bin/
|
||||
/target
|
||||
target/
|
||||
.idea/
|
||||
stackoverflow_data/
|
||||
# Compiled Object files
|
||||
*.slo
|
||||
*.lo
|
||||
|
||||
Generated
+16
@@ -48,4 +48,20 @@
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="1.8 (1)" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
<component name="masterDetails">
|
||||
<states>
|
||||
<state key="ProjectJDKs.UI">
|
||||
<settings>
|
||||
<last-edited>1.8</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
</states>
|
||||
</component>
|
||||
</project>
|
||||
Generated
+427
-35
@@ -1,11 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<<<<<<< HEAD
|
||||
<list default="true" id="b41a9788-25b3-4e04-923f-17cde259631b" name="Default" comment="">
|
||||
<change type="NEW" beforePath="" afterPath="$PROJECT_DIR$/run_project.sh" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/.idea/workspace.xml" afterPath="$PROJECT_DIR$/.idea/workspace.xml" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/Main.scala" afterPath="$PROJECT_DIR$/src/main/scala/Main.scala" />
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" afterPath="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
|
||||
=======
|
||||
<list default="true" id="74fa95ce-dfd4-40da-a7a1-b336badfaea8" name="Default" comment="">
|
||||
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/KMeans.scala" afterPath="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
</list>
|
||||
<ignored path="$PROJECT_DIR$/target/" />
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
@@ -24,8 +29,13 @@
|
||||
<file leaf-file-name="KMeans.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<<<<<<< HEAD
|
||||
<state relative-caret-position="684">
|
||||
<caret line="40" column="3" lean-forward="false" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
|
||||
=======
|
||||
<state relative-caret-position="135">
|
||||
<caret line="36" column="86" lean-forward="true" selection-start-line="36" selection-start-column="86" selection-end-line="36" selection-end-column="86" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -33,12 +43,24 @@
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<<<<<<< HEAD
|
||||
<file leaf-file-name="Main.scala" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="258">
|
||||
<caret line="38" column="38" lean-forward="false" selection-start-line="38" selection-start-column="38" selection-end-line="38" selection-end-column="38" />
|
||||
<folding />
|
||||
=======
|
||||
<file leaf-file-name="Row.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-catalyst_2.10/1.6.0/spark-catalyst_2.10-1.6.0-sources.jar!/org/apache/spark/sql/Row.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="38" column="34" lean-forward="false" selection-start-line="38" selection-start-column="34" selection-end-line="38" selection-end-column="34" />
|
||||
<folding>
|
||||
<element signature="n#!!doc" expanded="false" />
|
||||
<element signature="e#832#872#0" expanded="false" />
|
||||
</folding>
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
@@ -46,8 +68,13 @@
|
||||
<file leaf-file-name="XMLParser.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<<<<<<< HEAD
|
||||
<state relative-caret-position="1044">
|
||||
<caret line="65" column="35" lean-forward="false" selection-start-line="65" selection-start-column="35" selection-end-line="65" selection-end-column="35" />
|
||||
=======
|
||||
<state relative-caret-position="441">
|
||||
<caret line="135" column="0" lean-forward="true" selection-start-line="135" selection-start-column="0" selection-end-line="135" selection-end-column="0" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<folding>
|
||||
<element signature="e#23#59#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -55,6 +82,18 @@
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="Main.scala" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="419">
|
||||
<caret line="39" column="28" lean-forward="true" selection-start-line="39" selection-start-column="28" selection-end-line="39" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#22#58#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
@@ -68,24 +107,35 @@
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
<option value="$PROJECT_DIR$/src/main/scala/Main.scala" />
|
||||
<option value="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<option value="$PROJECT_DIR$/src/main/scala/KMeans.scala" />
|
||||
<option value="$PROJECT_DIR$/src/main/scala/XMLParser.scala" />
|
||||
<option value="$PROJECT_DIR$/src/main/scala/Main.scala" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="MavenImportPreferences">
|
||||
<option name="importingSettings">
|
||||
<MavenImportingSettings>
|
||||
<option name="importAutomatically" value="true" />
|
||||
</MavenImportingSettings>
|
||||
</option>
|
||||
<component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
|
||||
<component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
|
||||
<component name="JsGulpfileManager">
|
||||
<detection-done>true</detection-done>
|
||||
<sorting>DEFINITION_ORDER</sorting>
|
||||
</component>
|
||||
<component name="ProjectFrameBounds">
|
||||
<<<<<<< HEAD
|
||||
<option name="x" value="65" />
|
||||
<option name="y" value="24" />
|
||||
<option name="width" value="1295" />
|
||||
<option name="height" value="744" />
|
||||
=======
|
||||
<option name="x" value="77" />
|
||||
<option name="y" value="122" />
|
||||
<option name="width" value="1400" />
|
||||
<option name="height" value="893" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
</component>
|
||||
<component name="ProjectView">
|
||||
<navigator currentView="ProjectPane" proportions="" version="1">
|
||||
@@ -143,6 +193,12 @@
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="WebServerToolWindowFactoryState" value="false" />
|
||||
<property name="aspect.path.notification.shown" value="true" />
|
||||
<property name="js.eslint.eslintPackage" value="" />
|
||||
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="RunManager">
|
||||
<configuration default="true" type="#org.jetbrains.idea.devkit.run.PluginConfigurationType" factoryName="Plugin">
|
||||
<module name="" />
|
||||
@@ -229,6 +285,136 @@
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="ArquillianJUnit" factoryName="" nameIsGenerated="true">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<module name="" />
|
||||
<option name="arquillianRunConfiguration">
|
||||
<value>
|
||||
<option name="containerStateName" value="" />
|
||||
</value>
|
||||
</option>
|
||||
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
|
||||
<option name="ALTERNATIVE_JRE_PATH" />
|
||||
<option name="PACKAGE_NAME" />
|
||||
<option name="MAIN_CLASS_NAME" />
|
||||
<option name="METHOD_NAME" />
|
||||
<option name="TEST_OBJECT" value="class" />
|
||||
<option name="VM_PARAMETERS" />
|
||||
<option name="PARAMETERS" />
|
||||
<option name="WORKING_DIRECTORY" />
|
||||
<option name="ENV_VARIABLES" />
|
||||
<option name="PASS_PARENT_ENVS" value="true" />
|
||||
<option name="TEST_SEARCH_SCOPE">
|
||||
<value defaultName="singleModule" />
|
||||
</option>
|
||||
<envs />
|
||||
<patterns />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="ArquillianTestNG" factoryName="">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<module name="" />
|
||||
<option name="arquillianRunConfiguration">
|
||||
<value>
|
||||
<option name="containerStateName" value="" />
|
||||
</value>
|
||||
</option>
|
||||
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
|
||||
<option name="ALTERNATIVE_JRE_PATH" />
|
||||
<option name="SUITE_NAME" />
|
||||
<option name="PACKAGE_NAME" />
|
||||
<option name="MAIN_CLASS_NAME" />
|
||||
<option name="METHOD_NAME" />
|
||||
<option name="GROUP_NAME" />
|
||||
<option name="TEST_OBJECT" value="CLASS" />
|
||||
<option name="VM_PARAMETERS" />
|
||||
<option name="PARAMETERS" />
|
||||
<option name="WORKING_DIRECTORY" />
|
||||
<option name="OUTPUT_DIRECTORY" />
|
||||
<option name="ANNOTATION_TYPE" />
|
||||
<option name="ENV_VARIABLES" />
|
||||
<option name="PASS_PARENT_ENVS" value="true" />
|
||||
<option name="TEST_SEARCH_SCOPE">
|
||||
<value defaultName="singleModule" />
|
||||
</option>
|
||||
<option name="USE_DEFAULT_REPORTERS" value="false" />
|
||||
<option name="PROPERTIES_FILE" />
|
||||
<envs />
|
||||
<properties />
|
||||
<listeners />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="Cold Fusion runner description" factoryName="Cold Fusion" custom_browser="" web_path="">
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="CucumberJavaRunConfigurationType" factoryName="Cucumber java">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<option name="myFilePath" />
|
||||
<option name="GLUE" />
|
||||
<option name="myNameFilter" />
|
||||
<option name="myGeneratedName" />
|
||||
<option name="MAIN_CLASS_NAME" />
|
||||
<option name="VM_PARAMETERS" />
|
||||
<option name="PROGRAM_PARAMETERS" />
|
||||
<option name="WORKING_DIRECTORY" />
|
||||
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
|
||||
<option name="ALTERNATIVE_JRE_PATH" />
|
||||
<option name="ENABLE_SWING_INSPECTOR" value="false" />
|
||||
<option name="ENV_VARIABLES" />
|
||||
<option name="PASS_PARENT_ENVS" value="true" />
|
||||
<module name="" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="FlashRunConfigurationType" factoryName="Flash App">
|
||||
<option name="BCName" value="" />
|
||||
<option name="IOSSimulatorSdkPath" value="" />
|
||||
<option name="adlOptions" value="" />
|
||||
<option name="airProgramParameters" value="" />
|
||||
<option name="appDescriptorForEmulator" value="Android" />
|
||||
<option name="debugTransport" value="USB" />
|
||||
<option name="debuggerSdkRaw" value="BC SDK" />
|
||||
<option name="emulator" value="NexusOne" />
|
||||
<option name="emulatorAdlOptions" value="" />
|
||||
<option name="fastPackaging" value="true" />
|
||||
<option name="fullScreenHeight" value="0" />
|
||||
<option name="fullScreenWidth" value="0" />
|
||||
<option name="launchUrl" value="false" />
|
||||
<option name="launcherParameters">
|
||||
<LauncherParameters>
|
||||
<option name="browser" value="a7bb68e0-33c0-4d6f-a81a-aac1fdb870c8" />
|
||||
<option name="launcherType" value="OSDefault" />
|
||||
<option name="newPlayerInstance" value="false" />
|
||||
<option name="playerPath" value="/usr/bin/flashplayerdebugger" />
|
||||
</LauncherParameters>
|
||||
</option>
|
||||
<option name="mobileRunTarget" value="Emulator" />
|
||||
<option name="moduleName" value="" />
|
||||
<option name="overriddenMainClass" value="" />
|
||||
<option name="overriddenOutputFileName" value="" />
|
||||
<option name="overrideMainClass" value="false" />
|
||||
<option name="runTrusted" value="true" />
|
||||
<option name="screenDpi" value="0" />
|
||||
<option name="screenHeight" value="0" />
|
||||
<option name="screenWidth" value="0" />
|
||||
<option name="url" value="http://" />
|
||||
<option name="usbDebugPort" value="7936" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="FlexUnitRunConfigurationType" factoryName="FlexUnit" appDescriptorForEmulator="Android" class_name="" emulatorAdlOptions="" method_name="" package_name="" scope="Class">
|
||||
<option name="BCName" value="" />
|
||||
<option name="launcherParameters">
|
||||
<LauncherParameters>
|
||||
<option name="browser" value="a7bb68e0-33c0-4d6f-a81a-aac1fdb870c8" />
|
||||
<option name="launcherType" value="OSDefault" />
|
||||
<option name="newPlayerInstance" value="false" />
|
||||
<option name="playerPath" value="/usr/bin/flashplayerdebugger" />
|
||||
</LauncherParameters>
|
||||
</option>
|
||||
<option name="moduleName" value="" />
|
||||
<option name="trusted" value="true" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="GradleRunConfiguration" factoryName="Gradle">
|
||||
<ExternalSystemSettings>
|
||||
<option name="executionName" />
|
||||
@@ -245,6 +431,15 @@
|
||||
</ExternalSystemSettings>
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="GrailsRunConfigurationType" factoryName="Grails">
|
||||
<setting name="vmparams" value="" />
|
||||
<setting name="cmdLine" value="run-app" />
|
||||
<setting name="passParentEnv" value="true" />
|
||||
<setting name="launchBrowser" value="true" />
|
||||
<setting name="launchBrowserUrl" value="" />
|
||||
<setting name="depsClasspath" value="false" />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="JUnit" factoryName="JUnit">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<module name="" />
|
||||
@@ -287,6 +482,15 @@
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="JavaScriptTestRunnerProtractor" factoryName="Protractor">
|
||||
<config-file value="" />
|
||||
<node-interpreter value="project" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="JavascriptDebugType" factoryName="JavaScript Debug">
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="JetRunConfigurationType" factoryName="Kotlin">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<option name="MAIN_CLASS_NAME" />
|
||||
@@ -350,6 +554,12 @@
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<module name="" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="TestNG" factoryName="TestNG">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<module name="" />
|
||||
@@ -378,6 +588,28 @@
|
||||
<listeners />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="js.build_tools.gulp" factoryName="Gulp.js">
|
||||
<node-interpreter>project</node-interpreter>
|
||||
<node-options />
|
||||
<gulpfile />
|
||||
<tasks />
|
||||
<arguments />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="js.build_tools.npm" factoryName="npm">
|
||||
<command value="run" />
|
||||
<scripts />
|
||||
<node-interpreter value="project" />
|
||||
<envs />
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="osgi.bnd.run" factoryName="Run Launcher">
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="osgi.bnd.run" factoryName="Test Launcher (JUnit)">
|
||||
<method />
|
||||
</configuration>
|
||||
<configuration default="true" type="uTestRunConfiguration" factoryName="utest">
|
||||
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
|
||||
<module name="" />
|
||||
@@ -404,47 +636,76 @@
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="b41a9788-25b3-4e04-923f-17cde259631b" name="Default" comment="" />
|
||||
<created>1481799944130</created>
|
||||
<changelist id="74fa95ce-dfd4-40da-a7a1-b336badfaea8" name="Default" comment="" />
|
||||
<created>1481830590764</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1481799944130</updated>
|
||||
<updated>1481830590764</updated>
|
||||
<workItem from="1481830593703" duration="700000" />
|
||||
<workItem from="1481831304788" duration="5133000" />
|
||||
<workItem from="1481837779668" duration="4463000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="TimeTrackingManager">
|
||||
<option name="totallyTimeSpent" value="10296000" />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<<<<<<< HEAD
|
||||
<frame x="65" y="24" width="1295" height="744" extended-state="6" />
|
||||
=======
|
||||
<frame x="77" y="122" width="1400" height="893" extended-state="0" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info id="Palette" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Nl-Palette" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Palette	" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Image Layers" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Capture Analysis" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Maven Projects" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<<<<<<< HEAD
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
=======
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<window_info id="Properties" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Capture Tool" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<<<<<<< HEAD
|
||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24555984" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
|
||||
=======
|
||||
<window_info id="Inspection Results" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32885087" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="UI Designer" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Theme Preview" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Palette	" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Image Layers" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Java Enterprise" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Capture Analysis" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Theme Preview" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
<<<<<<< HEAD
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
|
||||
=======
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
|
||||
</layout>
|
||||
</component>
|
||||
<component name="TypeScriptGeneratedFilesManager">
|
||||
<option name="processedProjectFiles" value="true" />
|
||||
</component>
|
||||
<component name="VcsContentAnnotationSettings">
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
@@ -455,6 +716,7 @@
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<<<<<<< HEAD
|
||||
<state relative-caret-position="720">
|
||||
<caret line="40" column="3" lean-forward="false" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
|
||||
<folding>
|
||||
@@ -485,24 +747,50 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
=======
|
||||
<state relative-caret-position="918">
|
||||
<caret line="51" column="5" lean-forward="true" selection-start-line="51" selection-start-column="5" selection-end-line="51" selection-end-column="5" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-catalyst_2.10/1.6.0/spark-catalyst_2.10-1.6.0-sources.jar!/org/apache/spark/sql/Row.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="38" column="34" lean-forward="false" selection-start-line="38" selection-start-column="34" selection-end-line="38" selection-end-column="34" />
|
||||
<folding>
|
||||
<element signature="n#!!doc" expanded="false" />
|
||||
<element signature="e#832#872#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="270">
|
||||
<caret line="15" column="7" lean-forward="false" selection-start-line="15" selection-start-column="7" selection-end-line="15" selection-end-column="7" />
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#23#59#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.10.5/scala-library-2.10.5.jar!/scala/collection/TraversableLike.class">
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="180">
|
||||
<caret line="10" column="3" lean-forward="true" selection-start-line="10" selection-start-column="3" selection-end-line="10" selection-end-column="3" />
|
||||
<folding>
|
||||
<element signature="e#22#58#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<<<<<<< HEAD
|
||||
<state relative-caret-position="168">
|
||||
<caret line="15" column="6" lean-forward="false" selection-start-line="15" selection-start-column="6" selection-end-line="15" selection-end-column="6" />
|
||||
</state>
|
||||
@@ -513,11 +801,19 @@
|
||||
<state relative-caret-position="245">
|
||||
<caret line="27" column="27" lean-forward="true" selection-start-line="27" selection-start-column="27" selection-end-line="27" selection-end-column="27" />
|
||||
<folding />
|
||||
=======
|
||||
<state relative-caret-position="360">
|
||||
<caret line="20" column="31" lean-forward="true" selection-start-line="20" selection-start-column="31" selection-end-line="20" selection-end-column="31" />
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<<<<<<< HEAD
|
||||
<state relative-caret-position="396">
|
||||
<caret line="40" column="3" lean-forward="true" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
|
||||
<folding>
|
||||
@@ -530,6 +826,10 @@
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="612">
|
||||
<caret line="52" column="3" lean-forward="true" selection-start-line="52" selection-start-column="3" selection-end-line="52" selection-end-column="3" />
|
||||
=======
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<folding>
|
||||
<element signature="e#23#59#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -575,16 +875,124 @@
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<<<<<<< HEAD
|
||||
<state relative-caret-position="245">
|
||||
<caret line="27" column="27" lean-forward="true" selection-start-line="27" selection-start-column="27" selection-end-line="27" selection-end-column="27" />
|
||||
=======
|
||||
<state relative-caret-position="108">
|
||||
<caret line="13" column="7" lean-forward="false" selection-start-line="13" selection-start-column="7" selection-end-line="13" selection-end-column="7" />
|
||||
<folding>
|
||||
<element signature="e#22#58#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-sql_2.10/1.6.0/spark-sql_2.10-1.6.0.jar!/org/apache/spark/sql/DataFrame.class">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="36">
|
||||
<caret line="2" column="6" lean-forward="false" selection-start-line="2" selection-start-column="6" selection-end-line="2" selection-end-column="6" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-catalyst_2.10/1.6.0/spark-catalyst_2.10-1.6.0.jar!/org/apache/spark/sql/Row.class">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="18">
|
||||
<caret line="1" column="6" lean-forward="false" selection-start-line="1" selection-start-column="6" selection-end-line="1" selection-end-column="6" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-core_2.10/1.6.0/spark-core_2.10-1.6.0.jar!/org/apache/spark/rdd/PairRDDFunctions.class">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="162">
|
||||
<caret line="9" column="6" lean-forward="false" selection-start-line="9" selection-start-column="6" selection-end-line="9" selection-end-column="6" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-core_2.10/1.6.0/spark-core_2.10-1.6.0-sources.jar!/org/apache/spark/rdd/PairRDDFunctions.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-2196">
|
||||
<caret line="57" column="19" lean-forward="false" selection-start-line="57" selection-start-column="19" selection-end-line="57" selection-end-column="19" />
|
||||
<folding>
|
||||
<element signature="n#!!doc" expanded="false" />
|
||||
<element signature="e#832#858#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-catalyst_2.10/1.6.0/spark-catalyst_2.10-1.6.0-sources.jar!/org/apache/spark/sql/Row.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="324">
|
||||
<caret line="38" column="34" lean-forward="false" selection-start-line="38" selection-start-column="34" selection-end-line="38" selection-end-column="34" />
|
||||
<folding>
|
||||
<element signature="n#!!doc" expanded="false" />
|
||||
<element signature="e#832#872#0" expanded="false" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.10.5/scala-library-2.10.5.jar!/scala/package.class">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="262">
|
||||
<caret line="26" column="7" lean-forward="false" selection-start-line="26" selection-start-column="7" selection-end-line="26" selection-end-column="7" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/scala-lang/scala-library/2.10.5/scala-library-2.10.5-sources.jar!/scala/package.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="468">
|
||||
<caret line="50" column="9" lean-forward="false" selection-start-line="50" selection-start-column="9" selection-end-line="50" selection-end-column="9" />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar://$MAVEN_REPOSITORY$/org/apache/spark/spark-sql_2.10/1.6.0/spark-sql_2.10-1.6.0-sources.jar!/org/apache/spark/sql/DataFrame.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="1260">
|
||||
<caret line="112" column="3" lean-forward="false" selection-start-line="112" selection-start-column="3" selection-end-line="112" selection-end-column="3" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="jar:///usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar!/java/util/Date.class">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="262">
|
||||
<caret line="434" column="16" lean-forward="false" selection-start-line="434" selection-start-column="16" selection-end-line="434" selection-end-column="16" />
|
||||
<folding>
|
||||
<element signature="e#15761#15762#0" expanded="true" />
|
||||
<element signature="e#15802#15803#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/Main.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="419">
|
||||
<caret line="39" column="28" lean-forward="true" selection-start-line="39" selection-start-column="28" selection-end-line="39" selection-end-column="28" />
|
||||
<folding>
|
||||
<element signature="e#22#58#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/XMLParser.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="441">
|
||||
<caret line="135" column="0" lean-forward="true" selection-start-line="135" selection-start-column="0" selection-end-line="135" selection-end-column="0" />
|
||||
<folding>
|
||||
<element signature="e#23#59#0" expanded="true" />
|
||||
</folding>
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/src/main/scala/KMeans.scala">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<<<<<<< HEAD
|
||||
<state relative-caret-position="396">
|
||||
<caret line="40" column="3" lean-forward="false" selection-start-line="40" selection-start-column="3" selection-end-line="40" selection-end-column="3" />
|
||||
=======
|
||||
<state relative-caret-position="135">
|
||||
<caret line="36" column="86" lean-forward="true" selection-start-line="36" selection-start-column="86" selection-end-line="36" selection-end-column="86" />
|
||||
>>>>>>> 23a3c3f3fde97ee499a7fbcbe16e1c28c3297e05
|
||||
<folding>
|
||||
<element signature="e#23#54#0" expanded="true" />
|
||||
</folding>
|
||||
@@ -647,20 +1055,4 @@
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
<component name="masterDetails">
|
||||
<states>
|
||||
<state key="ProjectJDKs.UI">
|
||||
<settings>
|
||||
<last-edited>1.8 (1)</last-edited>
|
||||
<splitter-proportions>
|
||||
<option name="proportions">
|
||||
<list>
|
||||
<option value="0.2" />
|
||||
</list>
|
||||
</option>
|
||||
</splitter-proportions>
|
||||
</settings>
|
||||
</state>
|
||||
</states>
|
||||
</component>
|
||||
</project>
|
||||
+30
-14
@@ -2,6 +2,7 @@ package ClusterSOData
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.sql._
|
||||
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
object KMeans {
|
||||
@@ -10,10 +11,11 @@ object KMeans {
|
||||
*/
|
||||
//Create a map to store each data row with its closest cluster index as key
|
||||
|
||||
def train(dataset : DataFrame) : RDD[(Int,ArrayBuffer[Float])] = {
|
||||
def train(dataset : DataFrame) : RDD[(Int,List[Float])] = {
|
||||
val rows = dataset.rdd
|
||||
val rowsAsArray = dataset.map(row => List(row.getInt(0).toFloat, row.getInt(1).toFloat, row.getInt(2).toFloat) )
|
||||
val K = 5 //number of intended clusters
|
||||
val n = rows.count() //number of datapoints
|
||||
//val n = rows.count() //number of datapoints
|
||||
val m = 3 //number of features
|
||||
//var centres = new ArrayBuffer[Row]
|
||||
|
||||
@@ -24,23 +26,24 @@ object KMeans {
|
||||
for (a <- 0 until K) {
|
||||
centres(a) = rows(r.ne
|
||||
}*/
|
||||
val centres = rows.takeSample(false, K, System.nanoTime().toInt)
|
||||
val clusterMap :RDD[(Int,Row)]= rows.map(row => (assignCluster(row,centres,m,K),row))
|
||||
//val centres = rowsAsArray.takeSample(false, K, System.nanoTime().toInt)
|
||||
val centres : Array[List[Float]] = Array(List(0.0f, 0.0f, 0.0f), List(10.0f, 10.0f, 10.0f), List(20.0f, 20.0f, 20.0f))
|
||||
val clusterMap :RDD[(Int,List[Float])]= rowsAsArray.map(row => (assignCluster(row,centres,m,K),row))
|
||||
val newCentres = calculateNewCentres(clusterMap)
|
||||
newCentres
|
||||
|
||||
}
|
||||
|
||||
def calculateNorm(datapoint : Row, centre : Row, m: Int): Double = {
|
||||
def calculateNorm(datapoint : List[Float], centre : List[Float], m: Int): Double = {
|
||||
var norm : Double = 0.0
|
||||
for (a <- 0 to m) {
|
||||
norm = norm + Math.pow(datapoint.getFloat(a) - centre.getFloat(a), 2.0)
|
||||
for (a <- 0 until m) {
|
||||
norm = norm + Math.pow(datapoint(a) - centre(a), 2.0)
|
||||
}
|
||||
norm = Math.pow(norm, 0.5)
|
||||
norm
|
||||
}
|
||||
|
||||
def assignCluster(row : Row, centres: Array[Row], m : Int, K :Int): Int = {
|
||||
def assignCluster(row : List[Float], centres: Array[List[Float]], m : Int, K :Int): Int = {
|
||||
var smallestNorm = 99999999999.0
|
||||
var closestCentre = 0
|
||||
for (centreNumber <- 0 until K) {
|
||||
@@ -53,14 +56,17 @@ object KMeans {
|
||||
closestCentre
|
||||
}
|
||||
|
||||
def calculateNewCentres(clusterMap : RDD[(Int,Row)]): RDD[(Int,ArrayBuffer[Float])] = {
|
||||
val data = clusterMap.map(x => (x._1, x._2.asInstanceOf[ArrayBuffer[Float]]))
|
||||
val newCentres = data.reduceByKey((a, b) => averageRow(a, b))
|
||||
def calculateNewCentres(clusterMap : RDD[(Int,List[Float])]): RDD[(Int,List[Float])] = {
|
||||
//val data = clusterMap.map(x => (x._1, x._2.asInstanceOf[ArrayBuffer[Double]]))
|
||||
val newCentres = clusterMap.reduceByKey((a, b) => averageRow(a, b))
|
||||
//val singleCluster = clusterMap.filter(x => x._1 == 0)
|
||||
//val singleClusterAsArray = singleCluster.reduce()
|
||||
newCentres
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*for (a <- 0 until K) {
|
||||
var cluster = clusterMap.filter{case (a,_) => a == 0}
|
||||
var data = cluster.map((_,a) => a :Row)*/
|
||||
@@ -73,12 +79,22 @@ object KMeans {
|
||||
return features
|
||||
}*/
|
||||
|
||||
def averageRow(a :ArrayBuffer[Float], b:ArrayBuffer[Float]) : ArrayBuffer[Float] = {
|
||||
val newRow = new ArrayBuffer[Float]()
|
||||
for (i <- 0 until a.length) {
|
||||
/*def averageRow(a :ArrayBuffer[Float], b:ArrayBuffer[Float]) : ArrayBuffer[Float] = {
|
||||
val newRow = Row.apply()
|
||||
for (i <- a.indices) {
|
||||
val avgI = (a(i) + b(i)) /2
|
||||
newRow(i) = avgI
|
||||
}
|
||||
newRow
|
||||
}*/
|
||||
|
||||
def averageRow(a:List[Float], b:List[Float]) : List[Float] = {
|
||||
val means = new ArrayBuffer[Float]
|
||||
for (i <- 0 until a.size) {
|
||||
val mean = (a(i) + b(i)) /2.0f
|
||||
means(i) = mean
|
||||
}
|
||||
return means.toList
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -22,17 +22,23 @@ object Main {
|
||||
def main(args: Array[String]) {
|
||||
// Retrieve data from StackOverflow dataset XMLs. Format into DataFrames
|
||||
// for easy access to data elements.
|
||||
val dataFrames = DataParser.ParseData()
|
||||
|
||||
val df = XMLParser.ParseData()
|
||||
|
||||
// get the users XML file
|
||||
|
||||
val users = dataFrames("users")
|
||||
val users = df("users")
|
||||
val centres = KMeans.train(users)
|
||||
val centresArray = centres.collect()
|
||||
val unwrap = centresArray.map(x => x._2)
|
||||
unwrap.foreach(println)
|
||||
|
||||
/*val users = dataFrames("users")
|
||||
|
||||
/*val dataFrames = DataParser.ParseData()
|
||||
|
||||
// get the users XML file
|
||||
val users = dataFrames("users")
|
||||
users.persist()
|
||||
// Show 20 entries from the user dataset
|
||||
users.show()
|
||||
@@ -42,9 +48,10 @@ object Main {
|
||||
|
||||
// create new dataframe with only the reputation of the users
|
||||
users.select("CreationDate").show()
|
||||
|
||||
// Info on using DataFrames here: https://www.mapr.com/blog/using-apache-spark-dataframes-processing-tabular-data
|
||||
*/
|
||||
// Info on using DataFrames here: https://www.mapr.com/blog/using-apache-spark-dataframes-processing-tabular-data
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ import org.apache.spark.sql.types._
|
||||
* Format and parse XML data to datasets, ready for further processing using
|
||||
* spark
|
||||
*/
|
||||
object DataParser {
|
||||
object XMLParser {
|
||||
|
||||
/*
|
||||
* Generate array of DataFrames from XML content
|
||||
@@ -23,14 +23,18 @@ object DataParser {
|
||||
// Define XML file locations and a string of attribute tags to retrieve
|
||||
// from each xml element.
|
||||
val xmlInfos = Array(
|
||||
("badges", "../stackoverflow_dataset/badges.txt", "Id UserId Name Date", Array[DataType](IntegerType, IntegerType, StringType, DateType)),
|
||||
("comments", "../stackoverflow_dataset/comments.txt", "Id PostId Score Text CreationDate UserId", Array[DataType](IntegerType, IntegerType, IntegerType, StringType, DateType, IntegerType)),
|
||||
("posts", "../stackoverflow_dataset/posts.txt", "Id PostTypeId ParentID AcceptedAnswerId CreationDate Score ViewCount Body OwnerUserId LastEditorUserId LastEditorDisplayName LastEditDate LastActivityDate CommunityOwnedDate ClosedDate Title Tags AnswerCount CommentCount FavoriteCount", Array[DataType](IntegerType, IntegerType, IntegerType, IntegerType, DateType, IntegerType, IntegerType, StringType, IntegerType, IntegerType, StringType, DateType, DateType, DateType, DateType, StringType, StringType, IntegerType, IntegerType, IntegerType)),
|
||||
("postHistory", "../stackoverflow_dataset/postHistory.txt","Id PostHistoryTypeId PostId RevisionGUID CreationDate UserId UserDisplayName Comment Text CloseReasonId", Array[DataType](IntegerType, IntegerType, IntegerType,IntegerType, DateType, IntegerType, StringType, StringType, StringType, IntegerType)),
|
||||
("postLinks", "../stackoverflow_dataset/postLinks.txt", "Id CreationDate PostId RelatedPostId PostLinkTypeId", Array[DataType](IntegerType, DateType, IntegerType, IntegerType, IntegerType)),
|
||||
("users", "../stackoverflow_dataset/users.txt", "Reputation CreationDate DisplayName EmailHash LastAccessDate WebsiteUrl Location Age AboutMe Views UpVotes DownVotes", Array[DataType](IntegerType, DateType, StringType, StringType, DateType, StringType, StringType, IntegerType, StringType, IntegerType, IntegerType, IntegerType)),
|
||||
("votes", "../stackoverflow_dataset/votes.txt", "Id PostId VoteTypeId UserId CreationDate", Array[DataType](IntegerType, IntegerType, IntegerType, IntegerType, DateType))
|
||||
)
|
||||
/*
|
||||
("badges", "/data/stackoverflow/Badges", "Id UserId Name Date", Array[DataType](IntegerType, IntegerType, StringType, DateType)),
|
||||
("comments", "/data/stackoverflow/Comments", "Id PostId Score Text CreationDate UserId", Array[DataType](IntegerType, IntegerType, IntegerType, StringType, DateType, IntegerType)),
|
||||
("posts", "data/stackoverflow/Posts", "Id PostTypeId ParentID AcceptedAnswerId CreationDate Score ViewCount Body OwnerUserId LastEditorUserId LastEditorDisplayName LastEditDate LastActivityDate CommunityOwnedDate ClosedDate Title Tags AnswerCount CommentCount FavoriteCount", Array[DataType](IntegerType, IntegerType, IntegerType, IntegerType, DateType, IntegerType, IntegerType, StringType, IntegerType, IntegerType, StringType, DateType, DateType, DateType, DateType, StringType, StringType, IntegerType, IntegerType, IntegerType)),
|
||||
("postHistory", "/data/stackoverflow/PostHistory","Id PostHistoryTypeId PostId RevisionGUID CreationDate UserId UserDisplayName Comment Text CloseReasonId", Array[DataType](IntegerType, IntegerType, IntegerType,IntegerType, DateType, IntegerType, StringType, StringType, StringType, IntegerType)),
|
||||
("postLinks", "data/stackoverflow/PostLinks", "Id CreationDate PostId RelatedPostId PostLinkTypeId", Array[DataType](IntegerType, DateType, IntegerType, IntegerType, IntegerType)),
|
||||
*/
|
||||
("users", "stackoverflow_dataset/users.txt", "Reputation CreationDate DisplayName EmailHash LastAccessDate WebsiteUrl Location Age AboutMe Views UpVotes DownVotes", Array[DataType](IntegerType, DateType, StringType, StringType, DateType, StringType, StringType, IntegerType, StringType, IntegerType, IntegerType, IntegerType))
|
||||
/*
|
||||
("votes", "/data/stackoverflow/Votes", "Id PostId VoteTypeId UserId CreationDate", Array[DataType](IntegerType, IntegerType, IntegerType, IntegerType, DateType))
|
||||
*/
|
||||
)
|
||||
|
||||
// Store each file's DataFrame in an array of DataFrames.
|
||||
val parsedData = xmlInfos.map(x => (x._1, ParseXMLInfo((x._2, x._3, x._4)))).toMap
|
||||
@@ -98,18 +102,12 @@ object DataParser {
|
||||
|
||||
private def ParsingFunc(line: String, schemaString: String, schemaType: Array[DataType]) : Row = {
|
||||
// Parse line of XML using Scala's built in XML library
|
||||
try {
|
||||
val xmlLine = scala.xml.XML.loadString(line)
|
||||
var schemaPairs = schemaString.split(" ") zip schemaType
|
||||
// Create array of values with element for each attribute in schemaString
|
||||
var lineData = schemaPairs.map { case (fieldName: String, dType: DataType) => castToDType(getXMLAttribute(xmlLine, fieldName), dType) }
|
||||
val xmlLine = scala.xml.XML.loadString(line)
|
||||
var schemaPairs = schemaString.split(" ") zip schemaType
|
||||
// Create array of values with element for each attribute in schemaString
|
||||
var lineData = schemaPairs.map { case (fieldName: String, dType: DataType) => castToDType(getXMLAttribute(xmlLine, fieldName), dType) }
|
||||
|
||||
return Row.fromSeq(lineData)
|
||||
} catch {
|
||||
case e:Exception=>
|
||||
println(line)
|
||||
throw new Exception("failed to load")
|
||||
}
|
||||
return Row.fromSeq(lineData)
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -129,7 +127,10 @@ object DataParser {
|
||||
case DateType =>
|
||||
// If the string is a date, convert from date string to long.
|
||||
var format = new java.text.SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS")
|
||||
return format.parse(attribute).getTime()
|
||||
var longTime = format.parse(attribute).getTime()
|
||||
// Then convert long to int representing days since epoch
|
||||
var longDays : Long = longTime / (1000*60*60*24)
|
||||
return longDays.toInt
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user