one feature, one cluster

This commit is contained in:
joedarby
2016-12-16 13:15:23 +00:00
parent a9231d4329
commit ab921cb298
2 changed files with 18 additions and 12 deletions
+15 -9
View File
@@ -11,12 +11,12 @@ object KMeans {
*/
//Create a map to store each data row with its closest cluster index as key
def train(dataset : DataFrame) : RDD[(Int,List[Float])] = {
def train(dataset : DataFrame) : Unit = {
val relevantData = dataset.select("Reputation")
val rows = relevantData.rdd
//val rowsAsArray = rows.map(row => List(row.getInt(0).toFloat, row.getInt(1).toFloat, row.getInt(2).toFloat) )
val rowsAsArray = rows.map(row => List(row.getInt(0).toFloat) )
val K = 5 //number of intended clusters
val rowsAsArray = rows.map(row => row.getInt(0).toFloat )
val K = 1 //number of intended clusters
//val n = rows.count() //number of datapoints
val m = 1 //number of features
//var centres = new ArrayBuffer[Row]
@@ -30,13 +30,15 @@ object KMeans {
}*/
//val centres = rowsAsArray.takeSample(false, K, System.nanoTime().toInt)
//val centres : Array[List[Float]] = Array(List(0.0f, 0.0f, 0.0f), List(10.0f, 10.0f, 10.0f), List(20.0f, 20.0f, 20.0f))
val centres : Array[List[Float]] = Array(List(0.0f), List(0.0f), List(0.0f), List(0.0f), List(0.0f))
val clusterMap :RDD[(Int,List[Float])]= rowsAsArray.map(row => (assignCluster(row,centres,m,K),row))
val newCentres = calculateNewCentres(clusterMap)
newCentres
//val centres : Array[List[Float]] = Array(List(0.0f), List(0.0f), List(0.0f), List(0.0f), List(0.0f))
val centre = 0.0f
//val clusterMap :RDD[(Int,List[Float])]= rowsAsArray.map(row => (assignCluster(row,centres,m,K),row))
//val newCentres = calculateNewCentres(clusterMap)
val newCentre = rowsAsArray.reduce((a,b) => getAverage(a,b))
println(newCentre)
}
/*
def calculateNorm(datapoint : List[Float], centre : List[Float], m: Int): Double = {
var norm : Double = 0.0
for (a <- 0 until m) {
@@ -65,7 +67,7 @@ object KMeans {
//val singleCluster = clusterMap.filter(x => x._1 == 0)
//val singleClusterAsArray = singleCluster.reduce()
newCentres
}
}*/
@@ -100,4 +102,8 @@ object KMeans {
return means.toList
}
def getAverage(a: Float, b:Float) : Float = {
return ((a+b)/2)
}
}
+3 -3
View File
@@ -30,9 +30,9 @@ object Main {
val users = df("users")
val centres = KMeans.train(users)
val centresArray = centres.collect()
val unwrap = centresArray.map(x => x._2)
unwrap.foreach(println)
//val centresArray = centres.collect()
//val unwrap = centresArray.map(x => x._2)
//unwrap.foreach(println)
}
}