diff --git a/fast_kmeans/Makefile b/fast_kmeans/Makefile index f648746..b44fa33 100644 --- a/fast_kmeans/Makefile +++ b/fast_kmeans/Makefile @@ -7,13 +7,15 @@ CPPFLAGS += -g CPPFLAGS += -O3 CPPFLAGS += -Wno-long-long +CPPFLAGS += -std=c++0x + # Verify algorithm correctness while debugging #CPPFLAGS += -DDEBUG #CPPFLAGS += -DVERIFY_ASSIGNMENTS # To use pthreads, uncomment both lines below. #CPPFLAGS += -DUSE_THREADS -#LDFLAGS += -lpthread +#CPPFLAGS += -lpthread # Monitor internal algorithm effectiveness #CPPFLAGS += -DCOUNT_DISTANCES diff --git a/fast_kmeans/annulus_kmeans_modified.cpp b/fast_kmeans/annulus_kmeans_modified.cpp new file mode 100644 index 0000000..9abf73e --- /dev/null +++ b/fast_kmeans/annulus_kmeans_modified.cpp @@ -0,0 +1,136 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "annulus_kmeans_modified.h" +#include "general_functions.h" +#include +#include +#include + +void AnnulusKmeansModified::free() { + HamerlyKmeansModified::free(); + + delete [] guard; + delete [] xNorm; + delete [] cOrder; + guard = NULL; + xNorm = NULL; + cOrder = NULL; +} + +int AnnulusKmeansModified::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + // here we need to calculate s & the centroid-centroid distances before the first iteration + // the remaining calls to this method are hidden by move_centers + update_s(threadId); + synchronizeAllThreads(); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + if (threadId == 0) { + sort_means_by_norm(); + } + synchronizeAllThreads(); + + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + + double upper_comparison_bound = std::max(s[closest], lower[i]); + + if (upper[i] <= upper_comparison_bound) { + continue; + } + + double u2 = pointCenterDist2(i, closest); + upper[i] = sqrt(u2); + + if (upper[i] <= upper_comparison_bound) { + continue; + } + + double l2 = pointCenterDist2(i, guard[i]); + lower[i] = sqrt(l2); + + double beta = std::max(lower[i], upper[i]); + + std::pair* begin = std::lower_bound(cOrder, cOrder + k, std::make_pair(xNorm[i] - beta, k)); + std::pair* end = std::lower_bound(begin, cOrder + k, std::make_pair(xNorm[i] + beta, k)); + + for (std::pair* jp = begin; jp != end; ++jp) { + if (jp->second == closest) continue; + + double dist2 = pointCenterDist2(i, jp->second); + if (dist2 <= u2) { + if (dist2 == u2) { + if (jp->second < closest) closest = jp->second; + } else { + l2 = u2; + u2 = dist2; + guard[i] = closest; + closest = jp->second; + } + } else if (dist2 < l2) { + l2 = dist2; + guard[i] = jp->second; + } + } + + lower[i] = sqrt(l2); + + if (assignment[i] != closest) { + upper[i] = sqrt(u2); + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + if (!converged) { + update_bounds(startNdx, endNdx); + } + + synchronizeAllThreads(); + } + + return iterations; +} + +void AnnulusKmeansModified::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + HamerlyKmeansModified::initialize(aX, aK, initialAssignment, aNumThreads); + + guard = new unsigned short[n]; + xNorm = new double[n]; + cOrder = new std::pair[k]; + + for (int i = 0; i < k; ++i) { + cOrder[i].first = 0.0; + cOrder[i].second = i; + } + + std::fill(guard, guard + n, 1); + for (int i = 0; i < n; ++i) { + xNorm[i] = sqrt(pointPointInnerProduct(i, i)); + } +} + +void AnnulusKmeansModified::sort_means_by_norm() { + for (int c1 = 0; c1 < k; ++c1) { + cOrder[c1].first = sqrt(centerCenterInnerProduct(c1, c1)); + cOrder[c1].second = c1; + } + std::sort(cOrder, cOrder + k); +} + + diff --git a/fast_kmeans/annulus_kmeans_modified.h b/fast_kmeans/annulus_kmeans_modified.h new file mode 100644 index 0000000..3fb62da --- /dev/null +++ b/fast_kmeans/annulus_kmeans_modified.h @@ -0,0 +1,39 @@ +#ifndef ANNULUS_KMEANS_MODIFIED_H +#define ANNULUS_KMEANS_MODIFIED_H + +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * This version of Annulus algorithm implements the tighter update. Most of the + * code is copied from the default version of Annulus algorithm, only the parent + * class is HamerlyKmeansModified. + */ + +#include "hamerly_kmeans_modified.h" +#include + +class AnnulusKmeansModified; + +class AnnulusKmeansModified : public HamerlyKmeansModified { + public: + AnnulusKmeansModified() : xNorm(NULL), cOrder(NULL), guard(NULL) {} + virtual ~AnnulusKmeansModified() { free(); } + virtual void free(); + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + virtual std::string getName() const { return "annulusmodified"; } + + protected: + virtual int runThread(int threadId, int maxIterations); + void sort_means_by_norm(); + + double *xNorm; + + std::pair *cOrder; + + unsigned short *guard; +}; + +#endif + diff --git a/fast_kmeans/dataset.h b/fast_kmeans/dataset.h index f7a16a7..70e1d97 100644 --- a/fast_kmeans/dataset.h +++ b/fast_kmeans/dataset.h @@ -37,10 +37,9 @@ class Dataset { // destroys the dataset safely ~Dataset() { n = d = nd = 0; - double *dp = data, *sdsp = sumDataSquared; + delete [] data; + delete [] sumDataSquared; data = sumDataSquared = NULL; - delete [] dp; - delete [] sdsp; } // operator= is the standard deep-copy assignment operator, which diff --git a/fast_kmeans/driver.cpp b/fast_kmeans/driver.cpp index 6b321c4..16fb5f8 100644 --- a/fast_kmeans/driver.cpp +++ b/fast_kmeans/driver.cpp @@ -38,16 +38,28 @@ #include "dataset.h" #include "general_functions.h" +#include "stats_functions.h" #include "hamerly_kmeans.h" +#include "hamerly_kmeans_modified.h" +#include "hamerly_kmeans_neighbors.h" +#include "hamerly_kmeans_neighbors1st.h" +#include "hamerly_kmeans_neighbors_only.h" #include "annulus_kmeans.h" +#include "annulus_kmeans_modified.h" #include "drake_kmeans.h" #include "naive_kmeans.h" #include "elkan_kmeans.h" #include "compare_kmeans.h" #include "sort_kmeans.h" #include "heap_kmeans.h" +#include "heap_kmeans_modified.h" +#include "heap_kmeans_ubarr.h" +#include "heap_kmeans_ubarr_neighbors.h" #include "naive_kernel_kmeans.h" #include "elkan_kernel_kmeans.h" +#include "elkan_kmeans_modified.h" +#include "elkan_kmeans_neighbors.h" +#include "elkan_kmeans_neighbors_rel.h" #include #include #include @@ -59,7 +71,7 @@ #include #include -void execute(std::string command, Kmeans *algorithm, Dataset const *x, unsigned short k, unsigned short const *assignment, +eval_result* execute(std::string command, Kmeans *algorithm, Dataset const *x, unsigned short k, unsigned short const *assignment, int xcNdx, int numThreads, int maxIterations, std::vector *numItersHistory #ifdef MONITOR_ACCURACY @@ -84,21 +96,10 @@ int main(int argc, char **argv) { int numThreads = 1; int maxIterations = std::numeric_limits::max(); + int repeats = 1; // Print header row - std::cout << std::setw(35) << "algorithm" << "\t" - << std::setw(5) << "iters" << "\t" - << std::setw(10) << "numThreads" << "\t" - << std::setw(10) << "cpu_secs" << "\t" - << std::setw(10) << "wall_secs" << "\t" - << std::setw(8) << "MB" - #ifdef MONITOR_ACCURACY - << "\t" << std::setw(8) << "sse" - #endif - #ifdef COUNT_DISTANCES - << "\t" << std::setw(11) << "#distances" - #endif - << std::endl; + print_header_row(); // Read the command file for (std::string command; std::cin >> command; ) { @@ -115,6 +116,8 @@ int main(int argc, char **argv) { if (maxIterations < 0) { maxIterations = std::numeric_limits::max(); } + } else if (command == "repeats") { + std::cin >> repeats; } else if (command == "dataset" || command == "data") { xcNdx++; @@ -161,7 +164,7 @@ int main(int argc, char **argv) { if (method == "kpp" || method == "kmeansplusplus") { // Perform k-means++ initialization std::cout << "initializing with kmeans++: k = " << k << std::endl; - c = init_centers_kmeanspp_v2(*x, k); + c = init_centers_kmeanspp(*x, k); } else if (method == "random") { // Perform random initialization std::cout << "initializing with random: k = " << k << std::endl; @@ -184,10 +187,26 @@ int main(int argc, char **argv) { algorithm = new NaiveKmeans(); } else if (command == "hamerly") { algorithm = new HamerlyKmeans(); + } else if (command == "hamerlymodified") { + algorithm = new HamerlyKmeansModified(); + } else if (command == "hamerlyneighbors") { + algorithm = new HamerlyKmeansNeighbors(); + } else if (command == "hamerlyneighbors1st") { + algorithm = new HamerlyKmeansNeighbors1st(); + } else if (command == "hamerlyneighborsonly") { + algorithm = new HamerlyKmeansNeighborsOnly(); } else if (command == "annulus" || command == "norm") { algorithm = new AnnulusKmeans(); + } else if (command == "annulusmodified") { + algorithm = new AnnulusKmeansModified(); } else if (command == "elkan") { algorithm = new ElkanKmeans(); + } else if (command == "elkanmodified") { + algorithm = new ElkanKmeansModified(); + } else if (command == "elkanneighbors") { + algorithm = new ElkanKmeansNeighbors(); + } else if (command == "elkanneighborsrel") { + algorithm = new ElkanKmeansNeighborsRel(); } else if (command == "drake") { // Read the number of bounds int b; @@ -215,6 +234,12 @@ int main(int argc, char **argv) { algorithm = new SortKmeans(); } else if (command == "heap") { algorithm = new HeapKmeans(); + } else if (command == "heapmodified") { + algorithm = new HeapKmeansModified(); + } else if (command == "heapubarr") { + algorithm = new HeapKmeansUBarr(); + } else if (command == "heapubarrneighbors") { + algorithm = new HeapKmeansUBarrNeighbors(); } else if (command == "kernel" || command == "elkan_kernel") { std::string kernelType; std::cin >> kernelType; @@ -252,11 +277,33 @@ int main(int argc, char **argv) { } if (algorithm) { - execute(command, algorithm, x, k, assignment, xcNdx, numThreads, maxIterations, &numItersHistory - #ifdef MONITOR_ACCURACY - , &sseHistory - #endif - ); + if (repeats <= 1) { + execute(command, algorithm, x, k, assignment, xcNdx, numThreads, maxIterations, &numItersHistory + #ifdef MONITOR_ACCURACY + , &sseHistory + #endif + ); + } else { + std::vector results; + bool valid = true; + for (int i = 0; i < repeats; ++i) { + eval_result* result = execute(command, algorithm, x, k, assignment, xcNdx, numThreads, maxIterations, &numItersHistory + #ifdef MONITOR_ACCURACY + , &sseHistory + #endif + ); + if (result == NULL) { + valid = false; + break; + } + results.push_back(result); + } + if(valid) { + print_summary(&results); + } + for(unsigned i = 0; i < results.size(); ++i) + delete results.at(i); + } delete algorithm; algorithm = NULL; } @@ -316,11 +363,11 @@ double elapsed_time(rusage *start) { return (double)diff.tv_sec + (double)diff.tv_usec / 1e6; } -void execute(std::string command, Kmeans *algorithm, Dataset const *x, unsigned short k, unsigned short const *assignment, - int xcNdx, - int numThreads, - int maxIterations, - std::vector *numItersHistory +eval_result* execute(std::string command, Kmeans *algorithm, Dataset const *x, unsigned short k, unsigned short const *assignment, + int xcNdx, + int numThreads, + int maxIterations, + std::vector *numItersHistory #ifdef MONITOR_ACCURACY , std::vector *sseHistory #endif @@ -328,11 +375,11 @@ void execute(std::string command, Kmeans *algorithm, Dataset const *x, unsigned // Check for missing initialization if (assignment == NULL) { std::cerr << "initialize centers first!" << std::endl; - return; + return NULL; } if (x == NULL) { std::cerr << "load a dataset first!\n" << std::endl; - return; + return NULL; } #ifdef COUNT_DISTANCES @@ -350,30 +397,31 @@ void execute(std::string command, Kmeans *algorithm, Dataset const *x, unsigned unsigned short *workingAssignment = new unsigned short[x->n]; std::copy(assignment, assignment + x->n, workingAssignment); + // here we will store the result of the run + eval_result* run_result = new eval_result(); + run_result->num_threads = numThreads; + // Time the execution and get the number of iterations rusage start_clustering_time = get_time(); double start_clustering_wall_time = get_wall_time(); algorithm->initialize(x, k, workingAssignment, numThreads); - int iterations = algorithm->run(maxIterations); + run_result->iterations = algorithm->run(maxIterations); #ifdef COUNT_DISTANCES - long long numDistances = algorithm->numDistances; + run_result->num_distances = algorithm->numDistances; + run_result->inner_products = algorithm->numInnerProducts; + run_result->assignment_changes = algorithm->assignmentChanges; + run_result->bounds_updates = algorithm->boundsUpdates; #endif - double cluster_time = elapsed_time(&start_clustering_time); - double cluster_wall_time = get_wall_time() - start_clustering_wall_time; - - // Report results - std::cout << std::setw(5) << iterations << "\t"; - std::cout << std::setw(10) << numThreads << "\t"; - std::cout << std::setw(10) << cluster_time << "\t"; - std::cout << std::setw(10) << cluster_wall_time << "\t"; - std::cout << std::setw(8) << (getMemoryUsage() / 1024.0); // from kilo to mega + run_result->cluster_time = elapsed_time(&start_clustering_time); + run_result->cluster_wall_time = get_wall_time() - start_clustering_wall_time; + run_result->memory_usage = getMemoryUsage() / 1024.0; + #ifdef MONITOR_ACCURACY { - double sse = algorithm->getSSE(); - std::cout << "\t" << std::setw(11) << sse; + run_result->sse = algorithm->getSSE(); // verification that we get the same distortion with different algorithms - while (sseHistory->size() <= (size_t)xcNdx) { + while (sseHistory->size() <= (size_t) xcNdx) { sseHistory->push_back(sse); } if (sse != sseHistory->back()) { @@ -381,22 +429,25 @@ void execute(std::string command, Kmeans *algorithm, Dataset const *x, unsigned } } #endif - #ifdef COUNT_DISTANCES - { - std::cout << "\t" << std::setw(11) << numDistances; - } - #endif + + // output the result + print_result(run_result); // verification that we get the same number of iterations with different algorithms - while (numItersHistory->size() <= (size_t)xcNdx) { - numItersHistory->push_back(iterations); + while (numItersHistory->size() <= (size_t) xcNdx) { + numItersHistory->push_back(run_result->iterations); } - if (iterations != numItersHistory->back()) { - std::cerr << "ERROR: iterations = " << iterations << " but last iterations was " << numItersHistory->back() << std::endl; + if (run_result->iterations != numItersHistory->back()) { + std::cerr << "ERROR: iterations = " << run_result->iterations << " but last iterations was " << numItersHistory->back() << std::endl; } std::cout << std::endl; + #ifdef ITERATION_STATS + print_iteration_stats(&(algorithm->stats)); + #endif + delete [] workingAssignment; + return run_result; } diff --git a/fast_kmeans/elkan_kmeans_modified.cpp b/fast_kmeans/elkan_kmeans_modified.cpp new file mode 100644 index 0000000..7e5392e --- /dev/null +++ b/fast_kmeans/elkan_kmeans_modified.cpp @@ -0,0 +1,122 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "elkan_kmeans_modified.h" +#include "general_functions.h" +#include + +void ElkanKmeansModified::calculate_lower_bound_update(int threadId) { + // big C is the point for that we calculate the update + for (int C = 0; C < k; ++C) + #ifdef USE_THREADS + if (C % numThreads == threadId) + #endif + // and small c is the other point that moved + for (int c = 0; c < k; ++c) + // ignore the diagonal of the matrix, the bound is not used + if (c != C) { + // calculate the update and store it on the place + double update = 0.0; + if (centerMovement[c] != 0.0) + update = calculate_update(C, c, true); + lowerBoundUpdate[C * k + c] = update; + } +} + +void ElkanKmeansModified::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + numLowerBounds = aK; + ModifiedUpdateTriangleBasedKmeans::initialize(aX, aK, initialAssignment, aNumThreads); + + std::fill(lowerBoundUpdate, lowerBoundUpdate + k * k, 0.0); +} + +int ElkanKmeansModified::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + // here we need to calculate s & the centroid-centroid distances before the first iteration + // the remaining calls to this method are hidden by move_centers + update_s(threadId); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + synchronizeAllThreads(); + + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + bool r = true; + + if (upper[i] <= s[closest]) { + continue; + } + + for (int j = 0; j < k; ++j) { + if (j == closest) { + continue; + } + if (upper[i] <= lower[i * k + j]) { + continue; + } + if (upper[i] <= centerCenterDistDiv2[closest * k + j]) { + continue; + } + + // ELKAN 3(a) + if (r) { + upper[i] = sqrt(pointCenterDist2(i, closest)); + lower[i * k + closest] = upper[i]; + r = false; + if ((upper[i] <= lower[i * k + j]) || (upper[i] <= centerCenterDistDiv2[closest * k + j])) { + continue; + } + } + + // ELKAN 3(b) + lower[i * k + j] = sqrt(pointCenterDist2(i, j)); + if (lower[i * k + j] < upper[i]) { + closest = j; + upper[i] = lower[i * k + j]; + } + } + if (assignment[i] != closest) { + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + // ELKAN 4, 5, AND 6 + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + if (!converged) { + update_bounds(startNdx, endNdx); + } + synchronizeAllThreads(); + } + + return iterations; +} + +void ElkanKmeansModified::update_bounds(int startNdx, int endNdx) { + #ifdef COUNT_DISTANCES + for (int i = 0; i < k; ++i) + for (int j = 0; j < numLowerBounds; ++j) { + boundsUpdates += ((double) clusterSize[0][i]) * (lowerBoundUpdate[i * numLowerBounds + j]); + } + #endif + for (int i = startNdx; i < endNdx; ++i) { + upper[i] += centerMovement[assignment[i]]; + for (int j = 0; j < k; ++j) { + // each lower bound is updated by specified value, not by the center movement + lower[i * numLowerBounds + j] -= lowerBoundUpdate[assignment[i] * numLowerBounds + j]; + } + } +} \ No newline at end of file diff --git a/fast_kmeans/elkan_kmeans_modified.h b/fast_kmeans/elkan_kmeans_modified.h new file mode 100644 index 0000000..426ed05 --- /dev/null +++ b/fast_kmeans/elkan_kmeans_modified.h @@ -0,0 +1,45 @@ +#ifndef ELKAN_KMEANS_MODIFIED_H +#define ELKAN_KMEANS_MODIFIED_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * Elkan's k-means algorithm that uses k lower bounds per point to prune + * distance calculations. + * + * This adds to the Elkan's algorithm tighter lower bound udpate. + */ + +#include "modified_update_triangle_based_kmeans.h" + +class ElkanKmeansModified : public ModifiedUpdateTriangleBasedKmeans { +public: + + ElkanKmeansModified() {}; + + virtual ~ElkanKmeansModified() { + free(); + } + + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + + virtual std::string getName() const { + return "elkanmodified"; + } + +protected: + + // override this as we are calculating each bound explicitly + // this is different from the default implemntation + virtual void calculate_lower_bound_update(int threadId); + + virtual int runThread(int threadId, int maxIterations); + + // Update the upper and lower bounds for the range of points given. + void update_bounds(int startNdx, int endNdx); + +}; + +#endif diff --git a/fast_kmeans/elkan_kmeans_neighbors.cpp b/fast_kmeans/elkan_kmeans_neighbors.cpp new file mode 100644 index 0000000..0dfa297 --- /dev/null +++ b/fast_kmeans/elkan_kmeans_neighbors.cpp @@ -0,0 +1,105 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "elkan_kmeans_neighbors.h" +#include "general_functions.h" +#include + +int ElkanKmeansNeighbors::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + // here we need to calculate s & the centroid-centroid distances before the first iteration + // the remaining calls to this method are hidden by move_centers + update_s(threadId); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + // now we need to filter neighbors so that we remain only with those + // that fulfil the stronger condition that is used for elkan_kmeans + if (iterations != 1) + filter_neighbors(threadId); + + synchronizeAllThreads(); + + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + bool r = true; + + if (upper[i] <= s[closest]) { + continue; + } + + // iterate only over centroids that can be closest to some x in the dataset + for (int* ptr = neighbours[closest]; (*ptr) != -1; ++ptr) { + // now j has the same meaning as before + const int j = (*ptr); + if (upper[i] <= lower[i * k + j]) { + continue; + } + if (upper[i] <= centerCenterDistDiv2[closest * k + j]) { + continue; + } + + // ELKAN 3(a) + if (r) { + upper[i] = sqrt(pointCenterDist2(i, closest)); + lower[i * k + closest] = upper[i]; + r = false; + if ((upper[i] <= lower[i * k + j]) || (upper[i] <= centerCenterDistDiv2[closest * k + j])) { + continue; + } + } + + // ELKAN 3(b) + lower[i * k + j] = sqrt(pointCenterDist2(i, j)); + if (lower[i * k + j] < upper[i]) { + closest = j; + upper[i] = lower[i * k + j]; + } + } + if (assignment[i] != closest) { + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + // ELKAN 4, 5, AND 6 + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + if (!converged) { + update_bounds(startNdx, endNdx); + } + synchronizeAllThreads(); + } + + return iterations; +} + +void ElkanKmeansNeighbors::filter_neighbors(int threadId) { + for (int c = 0; c < k; ++c) + #ifdef USE_THREADS + if (c % numThreads == threadId) + #endif + { + // use the stronger condition without s[c] on the left + double boundOnOtherDistance = maxUpperBound[c] + centerMovement[c]; + int neighboursPos = 0; + // fill the array in the similar manner as we did before + for (int C = 0; C < k; ++C) { + // select only centroids that fulfil the stronger condition + if (c != C && boundOnOtherDistance > centerCenterDistDiv2[c * k + C]) + neighbours[c][neighboursPos++] = C; + } + neighbours[c][neighboursPos] = -1; + } +} \ No newline at end of file diff --git a/fast_kmeans/elkan_kmeans_neighbors.h b/fast_kmeans/elkan_kmeans_neighbors.h new file mode 100644 index 0000000..6bd1c7a --- /dev/null +++ b/fast_kmeans/elkan_kmeans_neighbors.h @@ -0,0 +1,41 @@ +#ifndef ELKAN_KMEANS_NEIGHBORS_H +#define ELKAN_KMEANS_NEIGHBORS_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * Elkan's k-means algorithm that uses k lower bounds per point to prune + * distance calculations. This adds to the Elkan's algorithm tighter + * lower bound udpate and iteration over neighbors. Note that for Elkan's + * algorithm this condition is more strong than for other algorithms. + */ + +#include "elkan_kmeans_modified.h" + +class ElkanKmeansNeighbors : public ElkanKmeansModified { +public: + + ElkanKmeansNeighbors() {}; + + virtual ~ElkanKmeansNeighbors() { free(); } + + virtual std::string getName() const { + return "elkanneighbors"; + } + +protected: + + /* + * This method filter neighbors so that they contain only points + * that fulfil the stronger condition for elkan kmeans, i.e. + * m(cj) > 1/2 \| ci - cj \|. + */ + void filter_neighbors(int threadId); + + virtual int runThread(int threadId, int maxIterations); + +}; + +#endif diff --git a/fast_kmeans/elkan_kmeans_neighbors_rel.cpp b/fast_kmeans/elkan_kmeans_neighbors_rel.cpp new file mode 100644 index 0000000..c0e1a80 --- /dev/null +++ b/fast_kmeans/elkan_kmeans_neighbors_rel.cpp @@ -0,0 +1,138 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "elkan_kmeans_neighbors_rel.h" +#include "general_functions.h" +#include + +void ElkanKmeansNeighborsRel::free() { + ElkanKmeansNeighbors::free(); + delete [] upperRel; + delete [] lowerRel; + upperRel = NULL; + lowerRel = NULL; +} + +/* This function initializes the the upper and lower bound arrays that + * are used for storing the sum of center movement. + */ +void ElkanKmeansNeighborsRel::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + ElkanKmeansNeighbors::initialize(aX, aK, initialAssignment, aNumThreads); + upperRel = new double[k]; + std::fill(upperRel, upperRel + k, 0.0); + lowerRel = new double[k * k]; + std::fill(lowerRel, lowerRel + k*k, 0.0); +} + +void ElkanKmeansNeighborsRel::calculate_max_upper_bound(int threadId) { + ElkanKmeansNeighbors::calculate_max_upper_bound(threadId); + synchronizeAllThreads(); + // do not forget that the upper bound array is smaller by upperRel + addVectors(maxUpperBound, upperRel, k); +} + +int ElkanKmeansNeighborsRel::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + // here we need to calculate s & the centroid-centroid distances before the first iteration + // the remaining calls to this method are hidden by move_centers + update_s(threadId); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + // now we need to filter neighbors so that we remain only with those + // that fulfil the stronger condition that is used for elkan_kmeans + if (iterations != 1) + filter_neighbors(threadId); + + synchronizeAllThreads(); + + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + bool r = true; + // the true value of the upper bound + double upperI = upper[i] + upperRel[closest]; + + if (upperI <= s[closest]) { + continue; + } + + // iterate only over centroids that can be closest to some x in the dataset + for (int* ptr = neighbours[closest]; (*ptr) != -1; ++ptr) { + // now j has the same meaning as before + int j = (*ptr); + // here is the true value of the lower bound + double lowerIJ = lower[i * k + j] - lowerRel[assignment[i] * k + j]; + if (upperI <= lowerIJ) { + continue; + } + if (upperI <= centerCenterDistDiv2[closest * k + j]) { + continue; + } + + // ELKAN 3(a) + if (r) { + upperI = sqrt(pointCenterDist2(i, closest)); + // the lower bound has to be stored relative to the movement + lower[i * k + closest] = upperI + lowerRel[assignment[i] * k + closest]; + r = false; + if ((upperI <= lowerIJ) || (upperI <= centerCenterDistDiv2[closest * k + j])) { + continue; + } + } + + // ELKAN 3(b) + lowerIJ = sqrt(pointCenterDist2(i, j)); + if (lowerIJ < upperI) { + closest = j; + upperI = lowerIJ; + } + // agan, the lower bound needs to be stored relative to the movement + lower[i * k + j] = lowerIJ + lowerRel[assignment[i] * k + j]; + } + // and the upper as well + upper[i] = upperI - upperRel[assignment[i]]; + if (assignment[i] != closest) { + // update the upper bound so that it is relative to the new assignment + upper[i] += upperRel[assignment[i]] - upperRel[closest]; + // do the same for all k lower bounds + for (int j = 0; j < k; ++j) + lower[i * k + j] -= lowerRel[assignment[i] * k + j] - lowerRel[closest * k + j]; + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + // ELKAN 4, 5, AND 6 + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + if (threadId == 0 && !converged) + update_bounds(startNdx, endNdx); + synchronizeAllThreads(); + } + + return iterations; +} + +void ElkanKmeansNeighborsRel::update_bounds(int startNdx, int endNdx) { + #ifdef COUNT_DISTANCES + for (int i = 0; i < k; ++i) + for (int j = 0; j < k; ++j) { + boundsUpdates += ((double) clusterSize[0][i]) * (lowerBoundUpdate[i * k + j]); + } + #endif + + // only add the center movement to the upperRel and lower bound update to the lowerRel + addVectors(upperRel, centerMovement, k); + addVectors(lowerRel, lowerBoundUpdate, k * k); +} \ No newline at end of file diff --git a/fast_kmeans/elkan_kmeans_neighbors_rel.h b/fast_kmeans/elkan_kmeans_neighbors_rel.h new file mode 100644 index 0000000..4eab219 --- /dev/null +++ b/fast_kmeans/elkan_kmeans_neighbors_rel.h @@ -0,0 +1,52 @@ +#ifndef ELKAN_KMEANS_NEIGHBORS_REL_H +#define ELKAN_KMEANS_NEIGHBORS_REL_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * Elkan's k-means algorithm that uses k lower bounds per point to prune + * distance calculations. This adds to the Elkan's algorithm tighter + * lower bound udpate, iteration over neighbors and also stores the upper/lower + * bounds relative to the center movements. + */ + +#include "elkan_kmeans_neighbors.h" + +class ElkanKmeansNeighborsRel : public ElkanKmeansNeighbors { +public: + + ElkanKmeansNeighborsRel() : upperRel(NULL), lowerRel(NULL) {}; + + virtual ~ElkanKmeansNeighborsRel() { free(); } + + virtual std::string getName() const { + return "elkanneighborsrel"; + } + + virtual void free(); + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + +protected: + + virtual int runThread(int threadId, int maxIterations); + + // now we have effectively to update k bounds, instead of n + virtual void update_bounds(int startNdx, int endNdx); + + // the maximum upper bound needs to be updated by the relative movement + virtual void calculate_max_upper_bound(int threadId); + + // in those two arrays we will store how much the upper/lower bound + // is changed so that we do not need to update them, but only the accumulated + // center movement + // the upper bound is relative to the movement of the assigned centroid + double * upperRel; + // and each lower bound is relative to the sum of all lower bound updates + // that were calculated for this bound + double * lowerRel; + +}; + +#endif diff --git a/fast_kmeans/general_functions.cpp b/fast_kmeans/general_functions.cpp index 63b6cd1..7fba838 100644 --- a/fast_kmeans/general_functions.cpp +++ b/fast_kmeans/general_functions.cpp @@ -39,7 +39,6 @@ double distance2silent(double const *a, double const *b, int d) { return d2; } - void centerDataset(Dataset *x) { double *xCentroid = new double[x->d]; @@ -55,7 +54,7 @@ void centerDataset(Dataset *x) { for (int d = 0; d < x->d; ++d) { xCentroid[d] /= x->n; } - + // re-center the dataset const double *xEnd = x->data + x->n * x->d; for (double *xp = x->data; xp != xEnd; xp += x->d) { @@ -79,7 +78,7 @@ Dataset *init_centers(Dataset const &x, unsigned short k) { break; } } - } while (! acceptable); + } while (!acceptable); double *cdp = c->data + i * x.d; memcpy(cdp, x.data + chosen_pts[i] * x.d, sizeof(double) * x.d); if (c->sumDataSquared) { @@ -92,89 +91,17 @@ Dataset *init_centers(Dataset const &x, unsigned short k) { return c; } - Dataset *init_centers_kmeanspp(Dataset const &x, unsigned short k) { int *chosen_pts = new int[k]; - std::pair *dist2 = new std::pair[x.n]; - double *distribution = new double[x.n]; - - // initialize dist2 - for (int i = 0; i < x.n; ++i) { - dist2[i].first = std::numeric_limits::max(); - dist2[i].second = i; - } - - // choose the first point randomly - int ndx = 1; - chosen_pts[ndx - 1] = rand() % x.n; - - while (ndx < k) { - double sum_distribution = 0.0; - // look for the point that is furthest from any center - for (int i = 0; i < x.n; ++i) { - int example = dist2[i].second; - double d2 = 0.0, diff; - for (int j = 0; j < x.d; ++j) { - diff = x(example,j) - x(chosen_pts[ndx - 1],j); - d2 += diff * diff; - } - if (d2 < dist2[i].first) { - dist2[i].first = d2; - } - - sum_distribution += dist2[i].first; - } - - // sort the examples by their distance from centers - sort(dist2, dist2 + x.n); - - // turn distribution into a CDF - distribution[0] = dist2[0].first / sum_distribution; - for (int i = 1; i < x.n; ++i) { - distribution[i] = distribution[i - 1] + dist2[i].first / sum_distribution; - } - - // choose a random interval according to the new distribution - double r = (double)rand() / (double)RAND_MAX; - double *new_center_ptr = std::lower_bound(distribution, distribution + x.n, r); - int distribution_ndx = (int)(new_center_ptr - distribution); - chosen_pts[ndx] = dist2[distribution_ndx].second; - /* - cout << "chose " << distribution_ndx << " which is actually " - << chosen_pts[ndx] << " with distance " - << dist2[distribution_ndx].first << std::endl; - */ - - ++ndx; - } - - Dataset *c = new Dataset(k, x.d); - - for (int i = 0; i < k; ++i) { - double *cdp = c->data + i * x.d; - memcpy(cdp, x.data + chosen_pts[i] * x.d, sizeof(double) * x.d); - if (c->sumDataSquared) { - c->sumDataSquared[i] = std::inner_product(cdp, cdp + x.d, cdp, 0.0); - } - } - - delete [] chosen_pts; - delete [] dist2; - delete [] distribution; - - return c; -} - - -Dataset *init_centers_kmeanspp_v2(Dataset const &x, unsigned short k) { - int *chosen_pts = new int[k]; - std::pair *dist2 = new std::pair[x.n]; + double *dist2 = new double[x.n]; + int *closest = new int[x.n]; // index of centroid that is closest to a point + // distances between new centroid and all others divided by 2 + double *centroid_dist2_div4 = new double[k - 1]; // initialize dist2 - for (int i = 0; i < x.n; ++i) { - dist2[i].first = std::numeric_limits::max(); - dist2[i].second = i; - } + std::fill(dist2, dist2 + x.n, std::numeric_limits::max()); + std::fill(closest, closest + x.n, 0); + std::fill(centroid_dist2_div4, centroid_dist2_div4 + k - 1, 0.0); // choose the first point randomly int ndx = 1; @@ -185,39 +112,44 @@ Dataset *init_centers_kmeanspp_v2(Dataset const &x, unsigned short k) { // look for the point that is furthest from any center double max_dist = 0.0; for (int i = 0; i < x.n; ++i) { - int example = dist2[i].second; - double d2 = 0.0, diff; - for (int j = 0; j < x.d; ++j) { - diff = x(example,j) - x(chosen_pts[ndx - 1],j); - d2 += diff * diff; - } - if (d2 < dist2[i].first) { - dist2[i].first = d2; + // we need to consider the new centroid (ndx) as closest to i + if (dist2[i] > centroid_dist2_div4[closest[i]]) { + double d2 = distance2silent(x.data + i * x.d, x.data + chosen_pts[ndx - 1] * x.d, x.d); + + if (d2 < dist2[i]) { + dist2[i] = d2; + closest[i] = ndx - 1; + } } - if (dist2[i].first > max_dist) { - max_dist = dist2[i].first; + if (dist2[i] > max_dist) { + max_dist = dist2[i]; } - sum_distribution += dist2[i].first; + sum_distribution += dist2[i]; } bool unique = true; do { // choose a random interval according to the new distribution - double r = sum_distribution * (double)rand() / (double)RAND_MAX; - double sum_cdf = dist2[0].first; + double r = sum_distribution * (double) rand() / (double) RAND_MAX; + double sum_cdf = dist2[0]; int cdf_ndx = 0; while (sum_cdf < r) { - sum_cdf += dist2[++cdf_ndx].first; + sum_cdf += dist2[++cdf_ndx]; } chosen_pts[ndx] = cdf_ndx; for (int i = 0; i < ndx; ++i) { unique = unique && (chosen_pts[ndx] != chosen_pts[i]); } - } while (! unique); + } while (!unique); + + // calculate the squared distance between the new point and all others div 4 + for (int i = 0; i < ndx; ++i) { + centroid_dist2_div4[i] = distance2silent(x.data + chosen_pts[i] * x.d, x.data + chosen_pts[ndx] * x.d, x.d) / 4.0; + } ++ndx; } @@ -237,13 +169,12 @@ Dataset *init_centers_kmeanspp_v2(Dataset const &x, unsigned short k) { return c; } - /** * in MB */ double getMemoryUsage() { char buf[30]; - snprintf(buf, 30, "/proc/%u/statm", (unsigned)getpid()); + snprintf(buf, 30, "/proc/%u/statm", (unsigned) getpid()); FILE* pf = fopen(buf, "r"); unsigned int totalProgramSizeInPages = 0; unsigned int residentSetSizeInPages = 0; @@ -253,17 +184,16 @@ double getMemoryUsage() { return 0.0; } } - + fclose(pf); pf = NULL; - + double sizeInKilobytes = residentSetSizeInPages * 4.0; // assumes 4096 byte page // getconf PAGESIZE - + return sizeInKilobytes; } - void assign(Dataset const &x, Dataset const &c, unsigned short *assignment) { for (int i = 0; i < x.n; ++i) { double shortestDist2 = std::numeric_limits::max(); diff --git a/fast_kmeans/hamerly_kmeans_modified.cpp b/fast_kmeans/hamerly_kmeans_modified.cpp new file mode 100644 index 0000000..f7ea2f9 --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_modified.cpp @@ -0,0 +1,103 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "hamerly_kmeans_modified.h" +#include "general_functions.h" +#include + +/* This class is extension to Hamerly's algorithm with tighter lower bound update. + * - For information about Hamerly's algorithm see hamerly_kmeans.cpp + * - For information about tighter lower bound update see + * modified_update_triangle_based_kmeans.h. + */ +int HamerlyKmeansModified::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + // here we need to calculate s & the centroid-centroid distances before the first iteration + // the remaining calls to this method are hidden by move_centers + update_s(threadId); + synchronizeAllThreads(); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + + double upper_comparison_bound = std::max(s[closest], lower[i]); + + if (upper[i] <= upper_comparison_bound) { + continue; + } + + double u2 = pointCenterDist2(i, closest); + upper[i] = sqrt(u2); + + if (upper[i] <= upper_comparison_bound) { + continue; + } + + double l2 = std::numeric_limits::max(); // the squared lower bound + for (int j = 0; j < k; ++j) { + if (j == closest) { + continue; + } + double dist2 = pointCenterDist2(i, j); + + if (dist2 < u2) { + l2 = u2; + u2 = dist2; + closest = j; + } else if (dist2 < l2) { + l2 = dist2; + } + } + + lower[i] = sqrt(l2); + + if (assignment[i] != closest) { + upper[i] = sqrt(u2); + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + + if (!converged) { + update_bounds(startNdx, endNdx); + } + + synchronizeAllThreads(); + } + + return iterations; +} + +void HamerlyKmeansModified::update_bounds(int startNdx, int endNdx) { + #ifdef COUNT_DISTANCES + for (int i = 0; i < k; ++i) + boundsUpdates += ((double) clusterSize[0][i]) * (lowerBoundUpdate[i]); + #endif + + // update upper/lower bounds + for (int i = startNdx; i < endNdx; ++i) { + // the upper bound increases by the amount that its center moved + upper[i] += centerMovement[assignment[i]]; + + // The lower bound decreases by the update that was calculated by + // the superclass + lower[i] -= lowerBoundUpdate[assignment[i]]; + } +} + diff --git a/fast_kmeans/hamerly_kmeans_modified.h b/fast_kmeans/hamerly_kmeans_modified.h new file mode 100644 index 0000000..0579dec --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_modified.h @@ -0,0 +1,30 @@ +#ifndef HAMERLY_KMEANS_MODIFIED_H +#define HAMERLY_KMEANS_MODIFIED_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * HamerlyKmeans implements Hamerly's k-means algorithm that uses one lower + * bound per point. This implementation adds to Hamerly's algorithm tighter + * lower bound update. + */ + +#include "modified_update_triangle_based_kmeans.h" + +class HamerlyKmeansModified : public ModifiedUpdateTriangleBasedKmeans { + public: + HamerlyKmeansModified() { numLowerBounds = 1; } + virtual ~HamerlyKmeansModified() { free(); } + virtual std::string getName() const { return "hamerlymodified"; } + + protected: + // Update the upper and lower bounds for the given range of points. + void update_bounds(int startNdx, int endNdx); + + virtual int runThread(int threadId, int maxIterations); +}; + +#endif + diff --git a/fast_kmeans/hamerly_kmeans_neighbors.cpp b/fast_kmeans/hamerly_kmeans_neighbors.cpp new file mode 100644 index 0000000..a1ae390 --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_neighbors.cpp @@ -0,0 +1,84 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "hamerly_kmeans_neighbors.h" +#include "general_functions.h" +#include + +/* This class implements Hamerly's algorithm together with tighter lower bound + * update and iteration over neighbors. + * - For information about Hamerly's algorithm see hamerly_kmeans.cpp + * - For information about tighter lower bound update and neighbors see + * modified_update_triangle_based_kmeans.h. + */ +int HamerlyKmeansNeighbors::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + // here we need to calculate s & the centroid-centroid distances before the first iteration + // the remaining calls to this method are hidden by move_centers + update_s(threadId); + synchronizeAllThreads(); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + + double upper_comparison_bound = std::max(s[closest], lower[i]); + + if (upper[i] <= upper_comparison_bound) { + continue; + } + + double u2 = pointCenterDist2(i, closest); + upper[i] = sqrt(u2); + + if (upper[i] <= upper_comparison_bound) { + continue; + } + + double l2 = std::numeric_limits::max(); // the squared lower bound + // iterate only over centroids that can be closest or second closest to some x in the dataset + for (int* ptr = neighbours[closest]; (*ptr) != -1; ++ptr) { + double dist2 = pointCenterDist2(i, (*ptr)); + + if (dist2 < u2) { + l2 = u2; + u2 = dist2; + closest = (*ptr); + } else if (dist2 < l2) { + l2 = dist2; + } + } + + lower[i] = sqrt(l2); + + if (assignment[i] != closest) { + upper[i] = sqrt(u2); + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + + if (!converged) { + update_bounds(startNdx, endNdx); + } + + synchronizeAllThreads(); + } + + return iterations; +} diff --git a/fast_kmeans/hamerly_kmeans_neighbors.h b/fast_kmeans/hamerly_kmeans_neighbors.h new file mode 100644 index 0000000..5acdd4c --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_neighbors.h @@ -0,0 +1,29 @@ +#ifndef HAMERLY_KMEANS_NEIGHBORS_H +#define HAMERLY_KMEANS_NEIGHBORS_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * HamerlyKmeans implements Hamerly's k-means algorithm that uses one lower + * bound per point. + * + * This adds to the Hamerly's algorith tighter lower bound update and + * iteration over neighbours. + */ + +#include "hamerly_kmeans_modified.h" + +class HamerlyKmeansNeighbors : public HamerlyKmeansModified { + public: + HamerlyKmeansNeighbors() {} + virtual ~HamerlyKmeansNeighbors() { free(); } + virtual std::string getName() const { return "hamerlyneighbors"; } + + protected: + virtual int runThread(int threadId, int maxIterations); +}; + +#endif + diff --git a/fast_kmeans/hamerly_kmeans_neighbors1st.cpp b/fast_kmeans/hamerly_kmeans_neighbors1st.cpp new file mode 100644 index 0000000..27042fe --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_neighbors1st.cpp @@ -0,0 +1,135 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "hamerly_kmeans_neighbors1st.h" +#include "general_functions.h" +#include +#include +#include + +/* This class implements Hamerly's algorithm together with tighter lower bound + * update and iteration over neighbors (including the first iteration). + * - For information about Hamerly's algorithm see hamerly_kmeans.cpp + * - For information about tighter lower bound update and neighbors see + * modified_update_triangle_based_kmeans.h. + * The iteration over neighbors is based on the following: + * - If the initial assignment is good and tight, we can reuse its results. + * - We use the neighbors condition in the first iteration. + * - To use it we evaluate the maximum upper bound. + * - Therefore we calculate the upper bound separately. + */ +int HamerlyKmeansNeighbors1st::runThread(int threadId, int maxIterations) { + int iterations = 0; + + int startNdx = start(threadId); + int endNdx = end(threadId); + + // here we need to calculate s & the centroid-centroid distances before the first iteration + // the remaining calls to this method are hidden by move_centers + update_s(threadId); + synchronizeAllThreads(); + + ++iterations; + + // calculate the distance between each point and its assigned centroid + // we would have to calculate this anyway + if (threadId == 0) + std::fill(maxUpperBound, maxUpperBound + k, 0.0); + #ifdef USE_THREADS + if (threadId == 0) + std::fill(maxUpperBoundAgg, maxUpperBoundAgg + k * numThreads, 0.0); + #endif + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + upper[i] = sqrt(pointCenterDist2(i, closest)); + + // also evaluate the maximum upper bound + #ifdef USE_THREADS + if (maxUpperBoundAgg[threadId * k + closest] < upper[i]) + maxUpperBoundAgg[threadId * k + closest] = upper[i]; + #else + if (upper[i] > maxUpperBound[closest]) + maxUpperBound[closest] = upper[i]; + #endif + } + #ifdef USE_THREADS + synchronizeAllThreads(); + aggregate_maximum_upper_bound(threadId); + #endif + synchronizeAllThreads(); + + // now get the neighbours, but we cannot use the superclass as centroids have + // not moved yet, therefore there is no centerMovement + for (int C = 0; C < k; ++C) + #ifdef USE_THREADS + if (C % numThreads == threadId) + #endif + { + double boundOnOtherDistance = maxUpperBound[C] + s[C]; + int neighboursPos = 0; + for (int c = 0; c < k; ++c) { + if (c != C && boundOnOtherDistance >= centerCenterDistDiv2[C * k + c]) + neighbours[C][neighboursPos++] = c; + } + neighbours[C][neighboursPos] = -1; + } + synchronizeAllThreads(); + + for (int i = startNdx; i < endNdx; ++i) { + unsigned short closest = assignment[i]; + // check that we can skip the distance calculations + if (s[closest] >= upper[i]) { + // if so, initialize the lower bound + lower[i] = 2 * s[closest] - upper[i]; + continue; + } + + // we cannot tighten the upper bound - it is already tight, skip it therefore + + double u2 = upper[i] * upper[i]; + double l2 = std::numeric_limits::max(); // the squared lower bound + // now iterate over the neighbors + for (int* ptr = neighbours[closest]; (*ptr) != -1; ++ptr) { + double dist2 = pointCenterDist2(i, (*ptr)); + + if (dist2 < u2) { + l2 = u2; + u2 = dist2; + closest = (*ptr); + } else if (dist2 < l2) { + l2 = dist2; + } + } + + lower[i] = sqrt(l2); + + if (assignment[i] != closest) { + upper[i] = sqrt(u2); + changeAssignment(i, closest, threadId); + } + } + + verifyAssignment(iterations, startNdx, endNdx); + + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + + if (!converged) { + update_bounds(startNdx, endNdx); + } + + synchronizeAllThreads(); + + // end of the first iteration + + // ... now do everything in the same way as we did before + // therefore call the superclass + // note that this implementation costs us unnecessary k*k-k distance calculations + // on update_s() function - this k*(k-1) distances can be eliminated by the change + return HamerlyKmeansNeighbors::runThread(threadId, maxIterations - 1) + 1; +} diff --git a/fast_kmeans/hamerly_kmeans_neighbors1st.h b/fast_kmeans/hamerly_kmeans_neighbors1st.h new file mode 100644 index 0000000..82189a2 --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_neighbors1st.h @@ -0,0 +1,31 @@ +#ifndef HAMERLY_KMEANS_NEIGHBORS_1ST_H +#define HAMERLY_KMEANS_NEIGHBORS_1ST_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * HamerlyKmeans implements Hamerly's k-means algorithm that uses one lower + * bound per point. + * + * This adds to the Hamerly's algorith tighter lower bound update and + * iteration over neighbours. This iteration over neighbors is implemented + * also for the first iteration, which means some additional modifications + * to the runThread method. + */ + +#include "hamerly_kmeans_neighbors.h" + +class HamerlyKmeansNeighbors1st : public HamerlyKmeansNeighbors { + public: + HamerlyKmeansNeighbors1st() {} + virtual ~HamerlyKmeansNeighbors1st() { free(); } + virtual std::string getName() const { return "hamerlyneighbors1st"; } + + protected: + virtual int runThread(int threadId, int maxIterations); +}; + +#endif + diff --git a/fast_kmeans/hamerly_kmeans_neighbors_only.cpp b/fast_kmeans/hamerly_kmeans_neighbors_only.cpp new file mode 100644 index 0000000..47e04a0 --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_neighbors_only.cpp @@ -0,0 +1,55 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "hamerly_kmeans_neighbors_only.h" +#include "general_functions.h" +#include + +void HamerlyKmeansNeighborsOnly::move_centers(int threadId) { + if (threadId == 0) { + // move the centers + int furthestMovingCenter = TriangleInequalityBaseKmeans::move_centers(); + converged = (0.0 == centerMovement[furthestMovingCenter]); + } + synchronizeAllThreads(); + + if (!converged) { + // ... calculate the neighbors and the lower bound update given by the triangle inequality + update_s(threadId); + calculate_max_upper_bound(threadId); + synchronizeAllThreads(); + for (int c = 0; c < k; ++c) + #ifdef USE_THREADS + if (c % numThreads == threadId) + #endif + calculate_neighbors(c); + synchronizeAllThreads(); + if (threadId == 0) + calculate_lower_bound_update(threadId); + } +} + +/* This method fills the lower bound update array in the same manner as it + * would be filled for the default Hamerly's kmeans implementation. The + * code is copied from hamerly_kmeans.cpp with small modification. */ +void HamerlyKmeansNeighborsOnly::calculate_lower_bound_update(int threadId) { + int furthestMovingCenter = 0; + double longest = centerMovement[furthestMovingCenter]; + double secondLongest = 0.0; + for (int j = 0; j < k; ++j) { + if (longest < centerMovement[j]) { + secondLongest = longest; + longest = centerMovement[j]; + furthestMovingCenter = j; + } else if (secondLongest < centerMovement[j]) { + secondLongest = centerMovement[j]; + } + } + + // store the update instead of updating the bound directly + for (int i = 0; i < k; ++i) + lowerBoundUpdate[i] = (i == furthestMovingCenter) ? secondLongest : longest; +} \ No newline at end of file diff --git a/fast_kmeans/hamerly_kmeans_neighbors_only.h b/fast_kmeans/hamerly_kmeans_neighbors_only.h new file mode 100644 index 0000000..5a8a391 --- /dev/null +++ b/fast_kmeans/hamerly_kmeans_neighbors_only.h @@ -0,0 +1,37 @@ +#ifndef HAMERLY_KMEANS_NEIGHBORS_ONLY_H +#define HAMERLY_KMEANS_NEIGHBORS_ONLY_H + +/* Authors: Greg Hamerly and Jonathan Drake + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2014 + * + * HamerlyKmeans implements Hamerly's k-means algorithm that uses one lower + * bound per point. + * + * In this implementation the iteration over neighbors is implemented. + * - For information about neighbors see + * modified_update_triangle_based_kmeans.h + * note the the tighter lower bound update is not used. + */ + +#include "hamerly_kmeans_neighbors.h" + +class HamerlyKmeansNeighborsOnly : public HamerlyKmeansNeighbors { + public: + HamerlyKmeansNeighborsOnly() {} + virtual ~HamerlyKmeansNeighborsOnly() { free(); } + virtual std::string getName() const { return "hamerlyneighborsonly"; } + + protected: + // we have to override this method in order to skip the tighter lower + // bound update calculation + virtual void move_centers(int threadId); + + // here we need to consider only the centroid movement + // therefore we get the default update given by the triangle inequality + virtual void calculate_lower_bound_update(int threadId); +}; + +#endif + diff --git a/fast_kmeans/heap_kmeans_modified.cpp b/fast_kmeans/heap_kmeans_modified.cpp new file mode 100644 index 0000000..ed7a39f --- /dev/null +++ b/fast_kmeans/heap_kmeans_modified.cpp @@ -0,0 +1,138 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "modified_update_triangle_based_kmeans.h" +#include "general_functions.h" +#include "heap_kmeans_modified.h" +#include +#include + +void HeapKmeansModified::free() { + ModifiedUpdateTriangleBasedKmeans::free(); + for (int t = 0; t < numThreads; ++t) { + delete [] heaps[t]; + } + delete [] heaps; + delete [] heapBounds; + heaps = NULL; + heapBounds = NULL; +} + +void HeapKmeansModified::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + ModifiedUpdateTriangleBasedKmeans::initialize(aX, aK, initialAssignment, aNumThreads); + + heaps = new Heap*[numThreads]; + heapBounds = new double[k]; + + for (int t = 0; t < numThreads; ++t) { + heaps[t] = new Heap[k]; + int startNdx = start(t); + int endNdx = end(t); + heaps[t][0].resize(endNdx - startNdx, std::make_pair(-1.0, 0)); + for (int i = 0; i < endNdx - startNdx; ++i) { + heaps[t][0][i].second = i + startNdx; + } + } + + std::fill(heapBounds, heapBounds + k, 0.0); + // start with zeros here + std::fill(maxUpperBound, maxUpperBound + k, 0.0); +} + +int HeapKmeansModified::runThread(int threadId, int maxIterations) { + int iterations = 0; + + std::greater> heapComp; + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + for (int h = 0; h < k; ++h) { + Heap &heap = heaps[threadId][h]; + + while (heap.size() > 0) { + if (heapBounds[h] <= heap[0].first) + break; + + int i = heap[0].second; + + std::pop_heap(heap.begin(), heap.end(), heapComp); + heap.pop_back(); + + unsigned short closest = assignment[i]; + unsigned short nextClosest = 0; + + double u2 = pointCenterDist2(i, closest); + double l2 = std::numeric_limits::max(); + + for (unsigned short j = 0; j < k; ++j) { + if (j == closest) continue; + + double dist2 = pointCenterDist2(i, j); + if (dist2 < u2) { + l2 = u2; + u2 = dist2; + nextClosest = closest; + closest = j; + } else if (dist2 < l2) { + l2 = dist2; + nextClosest = j; + } + } + + const double u = sqrt(u2); + const double bound = sqrt(l2) - u; + + if ((bound == 0.0) && (nextClosest < closest)) { + closest = nextClosest; + } + + // save the maximum upper bound, should be active only in the first iteration and when assignment changes + if (u > maxUpperBound[closest]) + maxUpperBound[closest] = u; + + if (closest != assignment[i]) { + changeAssignment(i, closest, threadId); + } + + Heap &newHeap = heaps[threadId][closest]; + newHeap.push_back(std::make_pair(heapBounds[closest] + bound, i)); + std::push_heap(newHeap.begin(), newHeap.end(), heapComp); + } + } + + verifyAssignment(iterations, start(threadId), end(threadId)); + + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + if (threadId == 0 && !converged) + update_bounds(); + + synchronizeAllThreads(); + } + return iterations; +} + +void HeapKmeansModified::update_bounds() { + #ifdef COUNT_DISTANCES + for (int i = 0; i < k; ++i) + boundsUpdates += ((double) clusterSize[0][i]) * (lowerBoundUpdate[i]); + #endif + for (int j = 0; j < k; ++j) { + // this is the worst case by that the maximum upper bound can grow + maxUpperBound[j] += centerMovement[j]; + heapBounds[j] += centerMovement[j]; + // update the lower bound by the calculated tighter update + heapBounds[j] += lowerBoundUpdate[j]; + } +} + +// do not do anything, we will update maximum upper bound in update_bounds + +void HeapKmeansModified::calculate_max_upper_bound(int threadId) { +} diff --git a/fast_kmeans/heap_kmeans_modified.h b/fast_kmeans/heap_kmeans_modified.h new file mode 100644 index 0000000..c5fe98b --- /dev/null +++ b/fast_kmeans/heap_kmeans_modified.h @@ -0,0 +1,53 @@ +#ifndef HEAP_KMEANS_MODIFIED_H +#define HEAP_KMEANS_MODIFIED_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * This class is extension to the HeapKmeans. It implement the tigher upper bound update + * that uses only estimate of m(c_i). To get more information about heap kmeans see + * heap_kmeans.cpp/h. To get information about the tighter update see + * modified_update_triangle_based_kmeans.h. The most of the code is inherited from the + * parent class or copied from the HeapKmeans.h. + */ + +#include "modified_update_triangle_based_kmeans.h" +#include + +typedef std::vector > Heap; + +class HeapKmeansModified : public ModifiedUpdateTriangleBasedKmeans { +public: + + HeapKmeansModified() : heaps(NULL), heapBounds(NULL) { + } + + virtual ~HeapKmeansModified() { free(); } + + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + virtual void free(); + + virtual std::string getName() const { + return "heapmodified"; + } + + +protected: + + virtual int runThread(int threadId, int maxIterations); + + void update_bounds(); + + // this method is overridden, takes as maximum upper bound the + // maximum upper bound from the first iteration increased in + // each iteration by the center movement + virtual void calculate_max_upper_bound(int threadId); + + Heap **heaps; + + double *heapBounds; +}; + +#endif \ No newline at end of file diff --git a/fast_kmeans/heap_kmeans_ubarr.cpp b/fast_kmeans/heap_kmeans_ubarr.cpp new file mode 100644 index 0000000..0f41862 --- /dev/null +++ b/fast_kmeans/heap_kmeans_ubarr.cpp @@ -0,0 +1,216 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "heap_kmeans_ubarr.h" +#include "general_functions.h" +#include +#include + +void HeapKmeansUBarr::free() { + HeapKmeansModified::free(); + for (int t = 0; t < numThreads; ++t) { + delete [] maxUBHeap[t]; + } + delete[] maxUBHeap; + delete[] ubHeapBounds; + maxUBHeap = NULL; + ubHeapBounds = NULL; +} + +/* Calculates the maximum upper bound over each cluster. This is achieved by + * looking onto the top of the heaps that contain the upper bound. + */ +void HeapKmeansUBarr::calculate_max_upper_bound(int threadId) { + for (int c = 0; c < k; ++c) { + Heap &heap = maxUBHeap[threadId][c]; + while (heap.size() > 1) { + // look onto the top of the heap + // check that the point is still assigned to this cluter + if (c == assignment[heap[0].second]) { + // also the upper bound may be invalid - it may have been tigtened + if (upper[heap[0].second] == heap[0].first) + break; + + // if the entry is invalid, push a new one with the correct upper bound + heap.push_back(std::make_pair(upper[heap[0].second], heap[0].second)); + std::push_heap(heap.begin(), heap.end()); + } + + // drop the outdated entries + std::pop_heap(heap.begin(), heap.end()); + heap.pop_back(); + } + + // and do not forget that the upper bound is relative to the center movement history + #ifdef USE_THREADS + // remember that in multithreaded version the size of the heap may be zero + maxUpperBoundAgg[threadId * k + c] = heap.empty() ? -1.0 : heap[0].first + ubHeapBounds[c]; + #else + maxUpperBound[c] = heap[0].first + ubHeapBounds[c]; + #endif + } + + #ifdef USE_THREADS + if (threadId == 0) + std::fill(maxUpperBound, maxUpperBound + k, 0.0); + synchronizeAllThreads(); + aggregate_maximum_upper_bound(threadId); + #endif +} + +void HeapKmeansUBarr::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + HeapKmeansModified::initialize(aX, aK, initialAssignment, aNumThreads); + + maxUBHeap = new Heap*[numThreads]; + ubHeapBounds = new double[k]; + + for (int t = 0; t < numThreads; ++t) { + maxUBHeap[t] = new Heap[k]; + } + std::fill(ubHeapBounds, ubHeapBounds + k, 0.0); +} + +int HeapKmeansUBarr::runThread(int threadId, int maxIterations) { + int iterations = 0; + + std::greater> heapComp; + + update_s(threadId); + synchronizeAllThreads(); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + for (int h = 0; h < k; ++h) { + Heap &heap = heaps[threadId][h]; + + while (heap.size() > 0) { + if (heapBounds[h] <= heap[0].first) { + break; + } + + int i = heap[0].second; + unsigned short closest = assignment[i]; + unsigned short nextClosest = 0; + double bound = heap[0].first - heapBounds[closest]; + + std::pop_heap(heap.begin(), heap.end(), heapComp); + heap.pop_back(); + + // calculate the real value of the upper bound + double u = upper[i] + ubHeapBounds[closest]; + const double originalLower = bound + u; + + // if the upper bound is less than s, closest, push the point to the heap + // with new key - using s[closest] to estimate the lower bound + // this is equivalent to tightening the upper bound in Hamerly's aglorithm + if (u <= s[closest]) { // note that this cannot happen in the 1st iteration (u = inf) + const double newLower = heapBounds[closest] + 2 * (s[closest] - u); + heap.push_back(std::make_pair(newLower, i)); + std::push_heap(heap.begin(), heap.end(), heapComp); + continue; + } + + double u2 = pointCenterDist2(i, closest); + u = sqrt(u2); + + // the same condition as in the case of Hamerly's algorithm + if (u <= std::max(s[closest], originalLower) && iterations != 1) { + upper[i] = u - ubHeapBounds[closest]; + const double newLowerUpper = heapBounds[closest] + std::max(originalLower, 2 * s[closest] - u) - u; + heap.push_back(std::make_pair(newLowerUpper, i)); + std::push_heap(heap.begin(), heap.end(), heapComp); + continue; + } // in the case of first iteration, store the upper bound in the heap + // using s[closest] as estimate, if possible + else if (iterations == 1 && u < s[closest]) { + upper[i] = u - ubHeapBounds[closest]; + Heap &newHeap = heaps[threadId][closest]; + const double newLower = heapBounds[closest] + 2 * (s[closest] - u); + newHeap.push_back(std::make_pair(newLower, i)); + std::push_heap(newHeap.begin(), newHeap.end(), heapComp); + + // we have to store into the maxUBHeap as we need to ensure that + // *all* points are in this heap with at least some upper bound + Heap &ubHeap = maxUBHeap[threadId][closest]; + ubHeap.push_back(std::make_pair(u - ubHeapBounds[closest], i)); + std::push_heap(ubHeap.begin(), ubHeap.end()); + continue; + } + + double l2 = std::numeric_limits::max(); + + for (unsigned short j = 0; j < k; ++j) { + if (j == closest) continue; + + double dist2 = pointCenterDist2(i, j); + if (dist2 < u2) { + l2 = u2; + u2 = dist2; + nextClosest = closest; + closest = j; + } else if (dist2 < l2) { + l2 = dist2; + nextClosest = j; + } + } + + u = sqrt(u2); + bound = sqrt(l2) - u; + + if ((bound == 0.0) && (nextClosest < closest)) { + closest = nextClosest; + } + + if (closest != assignment[i] || iterations == 1) // iterations == 1 needed for points that are assigned correctly + { + // we need to make sure that all points are in ubHeap after 1st iteration + // or after the assignment change + Heap &ubHeap = maxUBHeap[threadId][closest]; + ubHeap.push_back(std::make_pair(u - ubHeapBounds[closest], i)); + std::push_heap(ubHeap.begin(), ubHeap.end()); + } + + if (closest != assignment[i]) { + changeAssignment(i, closest, threadId); + } + + upper[i] = u - ubHeapBounds[closest]; + Heap &newHeap = heaps[threadId][closest]; + newHeap.push_back(std::make_pair(heapBounds[closest] + bound, i)); + std::push_heap(newHeap.begin(), newHeap.end(), heapComp); + } + } + + verifyAssignment(iterations, start(threadId), end(threadId)); + + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + if (threadId == 0 && !converged) + update_bounds(); + + synchronizeAllThreads(); + } + + return iterations; +} + +void HeapKmeansUBarr::update_bounds() { + #ifdef COUNT_DISTANCES + for (int i = 0; i < k; ++i) + boundsUpdates += ((double) clusterSize[0][i]) * (lowerBoundUpdate[i]); + #endif + for (int j = 0; j < k; ++j) { + // this is by what grow the upper bounds + ubHeapBounds[j] += centerMovement[j]; + heapBounds[j] += centerMovement[j]; + heapBounds[j] += lowerBoundUpdate[j]; + } +} + + diff --git a/fast_kmeans/heap_kmeans_ubarr.h b/fast_kmeans/heap_kmeans_ubarr.h new file mode 100644 index 0000000..d462bdc --- /dev/null +++ b/fast_kmeans/heap_kmeans_ubarr.h @@ -0,0 +1,59 @@ +#ifndef HEAP_KMEANS_UBARR_H +#define HEAP_KMEANS_UBARR_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * HeapKmeansUBarr extends HeapKmeansModified. This algorithm enhances the + * heap algorithm by array with upper bound and also a heap for finding + * maximum over this array. As a result this algorithm can take advantage + * from tightening the upper bound and other advantages that Hamerly's algorithm + * have over the heap algorithm. + */ + +#include "heap_kmeans_modified.h" +#include + +class HeapKmeansUBarr : public HeapKmeansModified { +public: + + HeapKmeansUBarr() : maxUBHeap(NULL), ubHeapBounds(NULL) {} + + virtual ~HeapKmeansUBarr() { + free(); + } + + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + virtual void free(); + + virtual std::string getName() const { + return "heapubarr"; + } + + +protected: + + virtual int runThread(int threadId, int maxIterations); + + void update_bounds(); + + // When we will calculate the maximum upper bound, we will use the values + // stored in the *maxUBHeap to find the maximum of the upper bound + virtual void calculate_max_upper_bound(int threadId); + + // this will be set of k heaps, one for each centroid + // each heap will store a pair of upper bound and point index + // this allows us to find maximum over upper bound + Heap **maxUBHeap; + + // this array relativizes the upper bound, similarly as the + // heap key, so that we do not have to update all the n + // upper bounds, but updating k is enough + // ... note that the upper bound array is in parent class, + // but it is not used there + double *ubHeapBounds; +}; + +#endif \ No newline at end of file diff --git a/fast_kmeans/heap_kmeans_ubarr_neighbors.cpp b/fast_kmeans/heap_kmeans_ubarr_neighbors.cpp new file mode 100644 index 0000000..df2b5b5 --- /dev/null +++ b/fast_kmeans/heap_kmeans_ubarr_neighbors.cpp @@ -0,0 +1,128 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "heap_kmeans_ubarr_neighbors.h" +#include "general_functions.h" +#include +#include + +int HeapKmeansUBarrNeighbors::runThread(int threadId, int maxIterations) { + int iterations = 0; + + std::greater> heapComp; + + update_s(threadId); + synchronizeAllThreads(); + + while ((iterations < maxIterations) && !converged) { + ++iterations; + + for (int h = 0; h < k; ++h) { + Heap &heap = heaps[threadId][h]; + + while (heap.size() > 0) { + if (heapBounds[h] <= heap[0].first) { + break; + } + + int i = heap[0].second; + unsigned short closest = assignment[i]; + unsigned short nextClosest = 0; + double bound = heap[0].first - heapBounds[closest]; + + std::pop_heap(heap.begin(), heap.end(), heapComp); + heap.pop_back(); + + double u = upper[i] + ubHeapBounds[closest]; + const double originalLower = bound + u; + + if (u <= s[closest]) { + const double newLower = heapBounds[closest] + 2 * (s[closest] - u); + heap.push_back(std::make_pair(newLower, i)); + std::push_heap(heap.begin(), heap.end(), heapComp); + continue; + } + + double u2 = pointCenterDist2(i, closest); + u = sqrt(u2); + + if (u <= std::max(s[closest], originalLower) && iterations != 1) { + upper[i] = u - ubHeapBounds[closest]; + const double newLowerUpper = heapBounds[closest] + std::max(originalLower, 2 * s[closest] - u) - u; + heap.push_back(std::make_pair(newLowerUpper, i)); + std::push_heap(heap.begin(), heap.end(), heapComp); + continue; + } else if (iterations == 1 && u < s[closest]) { + upper[i] = u - ubHeapBounds[closest]; + Heap &newHeap = heaps[threadId][closest]; + const double newLower = heapBounds[closest] + 2 * (s[closest] - u); + newHeap.push_back(std::make_pair(newLower, i)); + std::push_heap(newHeap.begin(), newHeap.end(), heapComp); + + Heap &ubHeap = maxUBHeap[threadId][closest]; + ubHeap.push_back(std::make_pair(u - ubHeapBounds[closest], i)); + std::push_heap(ubHeap.begin(), ubHeap.end()); + continue; + } + + double l2 = std::numeric_limits::max(); + + // this is the difference from the parent class, we iterate only over selected centroids here + for (int* ptr = neighbours[closest]; (*ptr) != -1; ++ptr) { + // now j has the same meaning as in the parent class + int j = (*ptr); + if (j == closest) continue; + + double dist2 = pointCenterDist2(i, j); + if (dist2 < u2) { + l2 = u2; + u2 = dist2; + nextClosest = closest; + closest = j; + } else if (dist2 < l2) { + l2 = dist2; + nextClosest = j; + } + } + + u = sqrt(u2); + bound = sqrt(l2) - u; + + if ((bound == 0.0) && (nextClosest < closest)) { + closest = nextClosest; + } + + if (closest != assignment[i] || iterations == 1) { + Heap &ubHeap = maxUBHeap[threadId][closest]; + ubHeap.push_back(std::make_pair(u - ubHeapBounds[closest], i)); + std::push_heap(ubHeap.begin(), ubHeap.end()); + } + + if (closest != assignment[i]) { + changeAssignment(i, closest, threadId); + } + + upper[i] = u - ubHeapBounds[closest]; + Heap &newHeap = heaps[threadId][closest]; + newHeap.push_back(std::make_pair(heapBounds[closest] + bound, i)); + std::push_heap(newHeap.begin(), newHeap.end(), heapComp); + } + } + + verifyAssignment(iterations, start(threadId), end(threadId)); + + synchronizeAllThreads(); + move_centers(threadId); + + synchronizeAllThreads(); + if (threadId == 0 && !converged) + update_bounds(); + + synchronizeAllThreads(); + } + + return iterations; +} diff --git a/fast_kmeans/heap_kmeans_ubarr_neighbors.h b/fast_kmeans/heap_kmeans_ubarr_neighbors.h new file mode 100644 index 0000000..3b1f93e --- /dev/null +++ b/fast_kmeans/heap_kmeans_ubarr_neighbors.h @@ -0,0 +1,39 @@ +#ifndef HEAP_KMEANS_UBARR_NEIGHBORS_H +#define HEAP_KMEANS_UBARR_NEIGHBORS_H + +/* Authors: Greg Hamerly, Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * HeapKmeansUBarrNeighbors extends HeapKmeansModifiedUBarr. This algorithm + * enhances the the previous algorithm by iteration over neighbors, which + * allows it to eliminate centroids from the innermost loop. Everything is + * exactly same as in the parent class. + */ + +#include "heap_kmeans_ubarr.h" +#include + +typedef std::vector > Heap; + +class HeapKmeansUBarrNeighbors : public HeapKmeansUBarr { +public: + + HeapKmeansUBarrNeighbors() {} + + virtual ~HeapKmeansUBarrNeighbors() { + free(); + } + + virtual std::string getName() const { + return "heapubarrneighbors"; + } + + +protected: + + virtual int runThread(int threadId, int maxIterations); +}; + +#endif \ No newline at end of file diff --git a/fast_kmeans/kmeans.cpp b/fast_kmeans/kmeans.cpp index dc7f719..6cc9157 100644 --- a/fast_kmeans/kmeans.cpp +++ b/fast_kmeans/kmeans.cpp @@ -10,9 +10,12 @@ #include Kmeans::Kmeans() : x(NULL), n(0), k(0), d(0), numThreads(0), converged(false), - clusterSize(NULL), centerMovement(NULL), assignment(NULL) { +clusterSize(NULL), centerMovement(NULL), assignment(NULL) { #ifdef COUNT_DISTANCES numDistances = 0; + numInnerProducts = 0; + assignmentChanges = 0; + boundsUpdates = 0.0; #endif } @@ -28,8 +31,6 @@ void Kmeans::free() { n = k = d = numThreads = 0; } - - void Kmeans::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { free(); @@ -61,10 +62,16 @@ void Kmeans::initialize(Dataset const *aX, unsigned short aK, unsigned short *in #ifdef COUNT_DISTANCES numDistances = 0; + numInnerProducts = 0; + assignmentChanges = 0; + boundsUpdates = 0.0; #endif } void Kmeans::changeAssignment(int xIndex, int closestCluster, int threadId) { + #ifdef COUNT_DISTANCES + ++assignmentChanges; + #endif --clusterSize[threadId][assignment[xIndex]]; ++clusterSize[threadId][closestCluster]; assignment[xIndex] = closestCluster; @@ -72,19 +79,19 @@ void Kmeans::changeAssignment(int xIndex, int closestCluster, int threadId) { #ifdef USE_THREADS struct ThreadInfo { - public: - ThreadInfo() : km(NULL), threadId(0), pthread_id(0) {} - Kmeans *km; - int threadId; - pthread_t pthread_id; - int numIterations; - int maxIterations; +public: + ThreadInfo() : km(NULL), threadId(0), pthread_id(0) {} + Kmeans *km; + int threadId; + pthread_t pthread_id; + int numIterations; + int maxIterations; }; #endif void *Kmeans::runner(void *args) { #ifdef USE_THREADS - ThreadInfo *ti = (ThreadInfo *)args; + ThreadInfo *ti = (ThreadInfo *) args; ti->numIterations = ti->km->runThread(ti->threadId, ti->maxIterations); #endif return NULL; @@ -151,11 +158,11 @@ void Kmeans::verifyAssignment(int iteration, int startNdx, int endNdx) const { // the program if (closest != assignment[i]) { std::cerr << "assignment error:" << std::endl; - std::cerr << "iteration = " << iteration << std::endl; - std::cerr << "point index = " << i << std::endl; - std::cerr << "closest center = " << closest << std::endl; - std::cerr << "closest center dist2 = " << closest_dist2 << std::endl; - std::cerr << "assigned center = " << assignment[i] << std::endl; + std::cerr << "iteration = " << iteration << std::endl; + std::cerr << "point index = " << i << std::endl; + std::cerr << "closest center = " << closest << std::endl; + std::cerr << "closest center dist2 = " << closest_dist2 << std::endl; + std::cerr << "assigned center = " << assignment[i] << std::endl; std::cerr << "assigned center dist2 = " << original_closest_dist2 << std::endl; assert(false); } diff --git a/fast_kmeans/kmeans.h b/fast_kmeans/kmeans.h index 25643e1..48b32c8 100644 --- a/fast_kmeans/kmeans.h +++ b/fast_kmeans/kmeans.h @@ -86,6 +86,9 @@ class Kmeans { #error Counting distances and using multiple threads is not supported. #endif mutable long long numDistances; + mutable long long numInnerProducts; + mutable long long assignmentChanges; + mutable double boundsUpdates; #endif protected: diff --git a/fast_kmeans/modified_update_triangle_based_kmeans.cpp b/fast_kmeans/modified_update_triangle_based_kmeans.cpp new file mode 100644 index 0000000..1636c0a --- /dev/null +++ b/fast_kmeans/modified_update_triangle_based_kmeans.cpp @@ -0,0 +1,344 @@ +/* Authors: Greg Hamerly and Jonathan Drake and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + */ + +#include "modified_update_triangle_based_kmeans.h" +#include "general_functions.h" +#include +#include +#include +#include + +void ModifiedUpdateTriangleBasedKmeans::free() { + TriangleInequalityBaseKmeans::free(); + centersByMovement.clear(); + delete oldCenters; + delete [] lowerBoundUpdate; + delete [] maxUpperBound; + #ifdef USE_THREADS + delete [] maxUpperBoundAgg; + #endif + delete [] oldCentroidsNorm2; + delete [] centroidsNorm2; + delete [] oldNewCentroidInnerProduct; + delete [] centerCenterDistDiv2; + delete [] neighbours; + oldCenters = NULL; + lowerBoundUpdate = NULL; + maxUpperBound = NULL; + maxUpperBoundAgg = NULL; + oldCentroidsNorm2 = NULL; + centroidsNorm2 = NULL; + oldNewCentroidInnerProduct = NULL; + centerCenterDistDiv2 = NULL; + neighbours = NULL; +} + +/* This method moves the newCenters to their new locations, based on the + * sufficient statistics in sumNewCenters. It also computes the centerMovement + * and the center that moved the furthest. + * + * Here the implementation adds the lower bound update. This includes copying + * the old centroid locations and calculating the update. After this method + * the center-center distances are known, the s array is filled and the tighter + * lower bound update is calculated. + * + * Parameters: none + * + * Return value: none + */ +void ModifiedUpdateTriangleBasedKmeans::move_centers(int threadId) { + // copy the location of centers into oldCenters so that we know it + if (threadId == 0) { + memcpy(oldCenters->data, centers->data, sizeof (double) * (d * k)); + + // move the centers as we do in the original algorithms + int furthestMovingCenter = TriangleInequalityBaseKmeans::move_centers(); + converged = (0.0 == centerMovement[furthestMovingCenter]); + } + synchronizeAllThreads(); + + // if not converged ... + if (!converged) { + // ... calculate the lower bound update + update_s(threadId); // first update center-center distances + calculate_max_upper_bound(threadId); // get m(c_i) + update_cached_inner_products(threadId); // update precalculated norms + synchronizeAllThreads(); + calculate_lower_bound_update(threadId); // and get the update + } +} + +void ModifiedUpdateTriangleBasedKmeans::update_cached_inner_products(int threadId) { + // copy the oldCentroids norms, we need to store this value + #ifdef USE_THREADS + if (threadId == 0) + #endif + memcpy(oldCentroidsNorm2, centroidsNorm2, sizeof (double) * k); + synchronizeAllThreads(); + + for (int c = 0; c < k; ++c) // for each centroid + #ifdef USE_THREADS + if (c % numThreads == threadId) + #endif + { + int offset = c*d; + // calculate norm of each centroid + centroidsNorm2[c] = inner_product(centers->data + offset, centers->data + offset); + // calculate the inner product of new and old location + oldNewCentroidInnerProduct[c] = inner_product(centers->data + offset, oldCenters->data + offset); + } + + // sort centers by movemenet, use function center_movement_comparator_function as comparator + #ifdef USE_THREADS + if (threadId == 0) + #endif + std::sort(centersByMovement.begin(), centersByMovement.end(), std::bind(&ModifiedUpdateTriangleBasedKmeans::center_movement_comparator_function, this, std::placeholders::_1, std::placeholders::_2)); +} + +/* This method does the aggregation of the updates so that we can + * use it in algorithms with single bound. It is overridden in modified + * versions of Elkan's algorithm. + */ +void ModifiedUpdateTriangleBasedKmeans::calculate_lower_bound_update(int threadId) { + // big C is the point for that we calculate the update + for (int C = 0; C < k; ++C) + #ifdef USE_THREADS + if (C % numThreads == threadId) + #endif + { + calculate_neighbors(C); + + double maxUpdate = 0; + + // iterate in decreasing order of centroid movement + // ... it is already stored this way in neighbors array + for (int* ptr = neighbours[C]; (*ptr) != -1; ++ptr) { + // and small c is the other point that moved + const int c = (*ptr); + + // if all remaining centroids moved less than the current update, we do not + // need to consider them - the case of Hamerly's & heap + if (centerMovement[c] <= maxUpdate) + break; + + // calculate update and overwrite if it is bigger than the current value + double update = calculate_update(C, c); + if (update > maxUpdate) + maxUpdate = update; + } + + // store the tighter update for this cluster + lowerBoundUpdate[C] = maxUpdate; + } +} + +// notion: C is the point c_i in the thesis c is the point c_j in the thesis + +double ModifiedUpdateTriangleBasedKmeans::calculate_update(const unsigned int C, const unsigned int c, bool consider_negative) { + // those values will be needed + const double cCInnerProduct = inner_product(oldCenters->data + c * d, oldCenters->data + C * d); + const double cPrimeCInnerProduct = inner_product(centers->data + c * d, oldCenters->data + C * d); + const double ccPrimeInnerProduct = oldNewCentroidInnerProduct[c]; + const double cNorm2 = oldCentroidsNorm2[c]; + const double cPrimeNorm2 = centroidsNorm2[c]; + const double CNorm2 = oldCentroidsNorm2[C]; + + double maxUpperBoundC = maxUpperBound[C]; + double cMovement = centerMovement[c]; + + // t, that specifies the projection of C onto c cPrime line (P(c_i) = c_j + t * (c_j' - c_j)) + double factor = (cNorm2 - cCInnerProduct + cPrimeCInnerProduct - ccPrimeInnerProduct) / cMovement / cMovement; + + // calculate the distance ||P(c_i)-c_i|| using the extended form, now only square + double distanceOfCFromLine = cNorm2 * (1 - factor) * (1 - factor) + + ccPrimeInnerProduct * 2 * factor * (1 - factor) + - cCInnerProduct * 2 * (1 - factor) + - 2 * factor * cPrimeCInnerProduct + + CNorm2 + + factor * factor * cPrimeNorm2; + // rounding errors make this sometimes negative if the distance is low + // then this sqrt causes NaN - do abs value therefore + // ... from the definition this should never happen + if (distanceOfCFromLine < 0) + distanceOfCFromLine = -distanceOfCFromLine; + // calculate the distance + distanceOfCFromLine = sqrt(distanceOfCFromLine); + + // project the y coordinate + double y = 1 - factor * 2; + // project the radius of the sphere around the cluster + double r = 2 * maxUpperBoundC / cMovement; + + // here we will store the update divided by cMovement + double update; + // the case when the sphere with radius r around C goes through c-c' line + if (distanceOfCFromLine < maxUpperBoundC) { + // take the bottommost point where the sphere can be = bound by hyperplane + // perpendicular to the line through c and c' + update = r - y; + if (update >= 1.0) // this is too bad, triangle inequality gives us better result + return cMovement; + // put there zero, because sphere can be curved less than the hyperbola and therefore + // condition that while circle is above the hyperbola may be invalid + // bound therefore by a hyperplane that goes throught the origin and is perpendicular to c-c' + else if (update < 0) + return 0.0; // we do not need to scale zero by multiplying, return here + return update * cMovement; + } + + // this is the case when the circle contains no point with a negative coordinate + if (y > r) { + if (!consider_negative) + return 0.0; + // Note that if the bottommost point of the sphere has y-coordinate + // from interval [0,1], we can use update 0 (update from line 213 would be >0) + if (y - r <= 1.0) + return 0.0; + + // handle the negative update ... it is the same as decreasing y by 1, + // i.e. moving sphere by half center movement down + y -= 1.0; + } + + // the case that fulfils conditions of Lemma 3.1 (or the case when y = y-1) + // encode the formula + const double x = 2 * distanceOfCFromLine / cMovement; + const double xSqPlusYSq = x * x + y*y; + const double aNorm = sqrt(xSqPlusYSq - r * r); + update = (x * r - y * aNorm) / xSqPlusYSq; + + // multiply back by the movement of c + return update * cMovement; +} + +void ModifiedUpdateTriangleBasedKmeans::calculate_max_upper_bound(int threadId) { + if (threadId == 0) + std::fill(maxUpperBound, maxUpperBound + k, 0.0); + #ifdef USE_THREADS + if (threadId == 0) + std::fill(maxUpperBoundAgg, maxUpperBoundAgg + k * numThreads, 0.0); + synchronizeAllThreads(); + for (int i = 0; i < n; ++i) + if (maxUpperBoundAgg[threadId * k + assignment[i]] < upper[i]) + maxUpperBoundAgg[threadId * k + assignment[i]] = upper[i]; + synchronizeAllThreads(); + aggregate_maximum_upper_bound(threadId); + #else + // calculate over the array of upper bound and find maximum for each cluster + for (int i = 0; i < n; ++i) + if (maxUpperBound[assignment[i]] < upper[i]) + maxUpperBound[assignment[i]] = upper[i]; + #endif +} + +#ifdef USE_THREADS +// for each cluster: get maximum upper bound for each thread and find maximum +void ModifiedUpdateTriangleBasedKmeans::aggregate_maximum_upper_bound(int threadId) { + for (int c = 0; c < k; ++c) + if (c % numThreads == threadId) + for (int tId = 0; tId < numThreads; tId++) + if (maxUpperBound[c] < maxUpperBoundAgg[tId * k + c]) + maxUpperBound[c] = maxUpperBoundAgg[tId * k + c]; +} +#endif + +bool ModifiedUpdateTriangleBasedKmeans::center_movement_comparator_function(int c1, int c2) { + return (centerMovement[c1] > centerMovement[c2]); // values must be decreaing +} + +// copied from elkan_kmeans.cpp +// finds the center-center distances + +void ModifiedUpdateTriangleBasedKmeans::update_s(int threadId) { + // find the inter-center distances + for (int c1 = 0; c1 < k; ++c1) + #ifdef USE_THREADS + if (c1 % numThreads == threadId) + #endif + { + s[c1] = std::numeric_limits::max(); + + for (int c2 = 0; c2 < k; ++c2) + if (c1 != c2) { + centerCenterDistDiv2[c1 * k + c2] = sqrt(centerCenterDist2(c1, c2)) / 2.0; + + if (centerCenterDistDiv2[c1 * k + c2] < s[c1]) + s[c1] = centerCenterDistDiv2[c1 * k + c2]; + } + } +} + +/* This function initializes the old centers, bound update, maxUpperBound, + * cached inner product, the vector of centroids sorted by movement, distances + * between centroids and also norms of centroids. + * + * Parameters: none + * + * Return value: none + */ +void ModifiedUpdateTriangleBasedKmeans::initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads) { + TriangleInequalityBaseKmeans::initialize(aX, aK, initialAssignment, aNumThreads); + + oldCenters = new Dataset(k, d); + + // set length k if heap (numLB = 0), otherwise k*numLowerBounds + lowerBoundUpdate = new double[k * (numLowerBounds == 0 ? 1 : numLowerBounds)]; + maxUpperBound = new double[k]; + #ifdef USE_THREADS + maxUpperBoundAgg = new double[k * aNumThreads]; + #endif + + // the cached inner products + oldCentroidsNorm2 = new double[k]; + centroidsNorm2 = new double[k]; + oldNewCentroidInnerProduct = new double[k]; + + centerCenterDistDiv2 = new double[k * k]; + std::fill(centerCenterDistDiv2, centerCenterDistDiv2 + k * k, 0.0); + + for (int c = 0; c < k; ++c) { + int offset = c*d; + // calcualte norms of initial centers + centroidsNorm2[c] = inner_product(centers->data + offset, centers->data + offset); + // let centers by movement contain initially 0-1-2-3-...-k, it will be sorted each iteration + centersByMovement.push_back(c); + } + + // there we will store neighbouring clusters to each cluster + // in first iteration we have to go through all neighbours as all the upper bounds are invalid + neighbours = new int*[k]; + for (int i = 0; i < k; ++i) { + neighbours[i] = new int[k]; + int pos = 0; + for (int j = 0; j < k; ++j) + if (i != j) + neighbours[i][pos++] = j; + // place the termination sign, iteration will stop there + neighbours[i][k - 1] = -1; + } +} + +/* Here we will calculate the neighbors of the cluster centered at C. */ +void ModifiedUpdateTriangleBasedKmeans::calculate_neighbors(const int C) { + // This is half of the bound on distance between center C and some + // other c. If || C-c || is greater than twice this, then c is not + // neighbor of C. + double boundOnOtherDistance = maxUpperBound[C] + s[C] + centerMovement[C]; + + // find out which clusters are neighbours of cluster C + int neighboursPos = 0; + for (int i = 0; i < k; ++i) { + // let them be sorted by movement, we need to go through in this order in the second loop + // so as to eliminate the updates calculations for Hamerly & heap + int c = centersByMovement[i]; + // exclude also C from negihbors and check the condition + if (c != C && boundOnOtherDistance >= centerCenterDistDiv2[C * k + c]) + neighbours[C][neighboursPos++] = c; + } + // place the stop mark + neighbours[C][neighboursPos] = -1; +} + diff --git a/fast_kmeans/modified_update_triangle_based_kmeans.h b/fast_kmeans/modified_update_triangle_based_kmeans.h new file mode 100644 index 0000000..c0f3b57 --- /dev/null +++ b/fast_kmeans/modified_update_triangle_based_kmeans.h @@ -0,0 +1,182 @@ +#ifndef MODIFIED_UPDATE_TRIANGLE_BASED_KMEANS_H +#define MODIFIED_UPDATE_TRIANGLE_BASED_KMEANS_H + +/* Authors: Greg Hamerly and Petr Ryšavý + * Feedback: hamerly@cs.baylor.edu + * See: http://cs.baylor.edu/~hamerly/software/kmeans.php + * Copyright 2015 + * + * ModifiedUpdateTriangleBasedKmeans is a base class for kmeans algorithms + * that want to benefit from the tigher bound update, iteration over neighbors + * and other improvements proposed in (Ryšavý, Hamerly 2015). Goal of this class + * is to provide infrastructure that allows efficient calculations of this + * tigher update and neighbors set. + * + * The idea about the tighter lower bound is the following: + * - If we update the lower bound using the triangle inequality, we are too + * pessimistic as the triangle inequality holds everywhere in the space. + * - Therefore we bound the cluster into a sphere. + * - And on the sphere we calculate the maximal update that will be needed + * for the lower bound considering each centroid move. + * If we consider the neighbors set, the idea is the following: + * - Again we bound the cluster in a sphere. + * - We evaluate for each centroid a condition, which must be fulfilled if + * the point is the closest or the second closest to some point in the + * cluster. + * - We can ignore the poitns that violate this condition from the innermost + * loop in Hamerly's algorithm and also in the tighter update calculation. + */ + +#include "triangle_inequality_base_kmeans.h" +#include +#include + +class ModifiedUpdateTriangleBasedKmeans : public TriangleInequalityBaseKmeans { +public: + + ModifiedUpdateTriangleBasedKmeans() : oldCenters(NULL), + lowerBoundUpdate(NULL), maxUpperBound(NULL), maxUpperBoundAgg(NULL), + oldCentroidsNorm2(NULL), centroidsNorm2(NULL), oldNewCentroidInnerProduct(NULL), + centerCenterDistDiv2(NULL), neighbours(NULL) { + } + + virtual ~ModifiedUpdateTriangleBasedKmeans() { + free(); + } + + virtual void initialize(Dataset const *aX, unsigned short aK, unsigned short *initialAssignment, int aNumThreads); + virtual void free(); + + /* This function claculated inner product of two vectors of dimension d. + * The number of inner products is counted in the superclass. + */ + inline double inner_product(const double *a, double const *b) const { +#ifdef COUNT_DISTANCES + ++numInnerProducts; +#endif + return std::inner_product(a, a + d, b, 0.0); + }; + + +protected: + /* This method is overloaded because we use it also for calculating the + * center-center distances. They need to be stored so that we can use them + * for the optimizations. + */ + virtual void update_s(int threadId); + + /* Override how the centers are moved. Before the movement, we need to + * backup the old location of centroids and after movement we need to + * calculate the tigher upper bound update and the neighbors set. + */ + virtual void move_centers(int threadId); + + /* Calculates the lower bound update assuming that all the cached values + * are properly calculated. This, default implementation assumes a single + * lower bound and therefore takes max update over centroids that fulfil + * the neighbor condition. This is true for Hamerly's algorithm, the heap + * algorithm and the annular algorithm. In the case of Elkan's algorithm + * this method is implemented differently. + */ + virtual void calculate_lower_bound_update(int threadId); + + /* Updates the cached inner products. We cache for each centroid its new + * norm, its old norm and also the inner product of the old centroid location + * and the new centroid location. + */ + void update_cached_inner_products(int threadId); + + /* Calculates the maximum upper bound by simple iteration over upper + * bound array. + */ + virtual void calculate_max_upper_bound(int threadId); + + #ifdef USE_THREADS + /* This method is used in multithreaded version to collect the maximum upper + * bound from the results that are produced by each thread. Each thread outputs + * the maximum upper bound per cluster for points that it manages into the + * maxUpperBoundAgg array. This method converts the result into maximumUpperBound + * array. All threads must be synchronized before calling this method. */ + void aggregate_maximum_upper_bound(int threadId); + #endif + + /* Comarator of two points by their movement. The points that have moved + * more will be first if the standard order is used. + * + * Parameters: The indices of centroids that have moved to compare. + */ + bool center_movement_comparator_function(int c1, int c2); + + /** + * Calculates the tigher update. + * + * Parameters: + * C the center whose update should be calculated + * c the center that moved + * consider_negative Should the calculation consider the negative value. + * This option should be true for Elkan kmeans. Negative update calculation + * costs a bit more, but gives tighter update. + * Returns: tigher update of the lower bound of points assigned to C if we + * consider only centroid c + */ + double calculate_update(const unsigned int C, const unsigned int c, bool consider_negative = false); + + /* + * Calculates the neighbors of centroid C and fills the corresponding row + * of the neighbors array. The neighbor array contains for each cluster + * a list of centroids that can be second closest to any point of the cluster. + * In this implementation we exclude from the list the cluster centroid. + * + * Parameters: + * C the centroid for that we need to know the neighbors + * Returns: nothing + */ + void calculate_neighbors(const int C); + + /* Backup here the old location of the centers. */ + Dataset *oldCenters; + + /* Array for the lower bound update. Here we will store the more tighter + * lower bound update. + * + * Size is set automatically using the numLowerBounds * k. + * When numLowerBounds is zero (the heap algorithm), then this has size of k. + */ + double *lowerBoundUpdate; + + /* Array for storing maximum upper bound per cluster. */ + double *maxUpperBound; + + /* Temporary array used for finding the maximal upper bound using multiple threads. */ + double *maxUpperBoundAgg; + + /* Norm of the centroids in the last iteration. */ + double *oldCentroidsNorm2; + + /* Norm of the current centroids. */ + double *centroidsNorm2; + + /* Inner product of each centroid's new location with old location. */ + double *oldNewCentroidInnerProduct; + + /* Keep track of the distance (divided by 2) between each pair of + * points. */ + double *centerCenterDistDiv2; + + /* Here we will store centroids sorted by how much they moved. This is used for + * more effective calculation of the tighter lower bound update so that we can + * skip some of the points from the iteration. */ + std::vector centersByMovement; + + /* + * Here we will store the set of neighbors. It is k array of k arrays, which + * contains the set of neighbors. For each centroid there is list of neighbors, + * that ends by -1, which means end. + * + * As this is needed for evaluating the tighter lower bound update, this array + * is filled every time and it is algorithm's responsibility to use it or not. + */ + int** neighbours; +}; + +#endif diff --git a/fast_kmeans/stats_functions.cpp b/fast_kmeans/stats_functions.cpp new file mode 100644 index 0000000..20028ba --- /dev/null +++ b/fast_kmeans/stats_functions.cpp @@ -0,0 +1,141 @@ +#include "stats_functions.h" +#include +#include +#include +#include + +void stdev(eval_result* target, eval_result* sum, eval_result* sum_of_squares, int n); +double stdev(int n, double sum_of_squares, double sum); +eval_result* get_min(std::vector *data); +eval_result* get_max(std::vector *data); +eval_result* get_average(std::vector *data); +eval_result* get_stdev(std::vector *data); + +void print_result(eval_result* result) { + // Report results + std::cout << std::setw(5) << result->iterations << "\t"; + std::cout << std::setw(10) << result->num_threads << "\t"; + std::cout << std::setw(10) << result->cluster_time << "\t"; + std::cout << std::setw(10) << result->cluster_wall_time << "\t"; + std::cout << std::setw(8) << result->memory_usage; // from kilo to mega + #ifdef MONITOR_ACCURACY + std::cout << "\t" << std::setw(8) << result->sse; + #endif + #ifdef COUNT_DISTANCES + std::cout << "\t" << std::setw(11) << result->num_distances; + std::cout << "\t" << std::setw(11) << result->inner_products; + std::cout << "\t" << std::setw(11) << result->assignment_changes; + std::cout << "\t" << std::setw(11) << result->bounds_updates; + #endif +} + +void print_summary(std::vector* results) { + std::cout << "---------------------------------------" << std::endl; + std::cout << std::setw(35) << "min\t"; + eval_result* min = get_min(results); + print_result(min); + delete min; + std::cout << std::endl << std::setw(35) << "max\t"; + eval_result* max = get_max(results); + print_result(max); + delete max; + std::cout << std::endl << std::setw(35) << "average\t"; + eval_result* avg = get_average(results); + print_result(avg); + delete avg; + std::cout << std::endl << std::setw(35) << "standard deviation\t"; + eval_result* stdev = get_stdev(results); + print_result(stdev); + delete stdev; + std::cout << std::endl << "---------------------------------------" << std::endl; +} + +void print_header_row() { + std::cout << std::setw(35) << "algorithm" << "\t" + << std::setw(5) << "iters" << "\t" + << std::setw(10) << "numThreads" << "\t" + << std::setw(10) << "cpu_secs" << "\t" + << std::setw(10) << "wall_secs" << "\t" + << std::setw(8) << "MB" + #ifdef MONITOR_ACCURACY + << "\t" << std::setw(8) << "sse" + #endif + #ifdef COUNT_DISTANCES + << "\t" << std::setw(11) << "#distances" + << "\t" << std::setw(11) << "#i. products" + << "\t" << std::setw(11) << "#assig. changes" + << "\t" << std::setw(11) << "#boundsUpdts." + #endif + << std::endl; +} + +void stdev(eval_result* target, eval_result* sum, eval_result* sum_of_squares, int n) { + target->iterations = stdev(n, sum_of_squares->iterations, sum->iterations); + target->num_threads = stdev(n, sum_of_squares->num_threads, sum->num_threads); + target->cluster_time = stdev(n, sum_of_squares->cluster_time, sum->cluster_time); + target->cluster_wall_time = stdev(n, sum_of_squares->cluster_wall_time, sum->cluster_wall_time); + target->memory_usage = stdev(n, sum_of_squares->memory_usage, sum->memory_usage); + #ifdef MONITOR_ACCURACY + target->sse = stdev(n, sum_of_squares->sse, sum->sse); + #endif + #ifdef COUNT_DISTANCES + target->num_distances = stdev(n, sum_of_squares->num_distances, sum->num_distances); + target->inner_products = stdev(n, sum_of_squares->inner_products, sum->inner_products); + target->assignment_changes = stdev(n, sum_of_squares->assignment_changes, sum->assignment_changes); + target->bounds_updates = stdev(n, sum_of_squares->bounds_updates, sum->bounds_updates); + #endif +} + +double stdev(int n, double sum_of_squares, double sum) { + return sqrt(1.0 / n * sum_of_squares - 1.0 / n / n * sum * sum); +} + +eval_result* get_min(std::vector *data) { + eval_result* retval = new eval_result(); + std::memcpy(retval, data->at(0), sizeof(eval_result)); + for (unsigned int i = 1; i < data->size(); ++i) { + eval_result* result = data->at(i); + retval->min(result); + } + return retval; +} + +eval_result* get_max(std::vector *data) { + eval_result* retval = new eval_result(); + std::memcpy(retval, data->at(0), sizeof(eval_result)); + for (unsigned int i = 1; i < data->size(); ++i) { + eval_result* result = data->at(i); + retval->max(result); + } + return retval; +} + +eval_result* get_average(std::vector *data) { + eval_result* retval = new eval_result(); + const unsigned int count = data->size(); + for (unsigned int i = 0; i < count; ++i) { + eval_result* result = data->at(i); + retval->add(result); + } + + retval->divide(count); + return retval; +} + +eval_result* get_stdev(std::vector *data) { + eval_result* retval = new eval_result(); + eval_result* sum = new eval_result(); + eval_result* sumSq = new eval_result(); + const unsigned int count = data->size(); + for (unsigned int i = 0; i < count; ++i) { + eval_result* result = data->at(i); + sum->add(result); + sumSq->add_square(result); + } + + stdev(retval, sum, sumSq, count); + + delete sum; + delete sumSq; + return retval; +} \ No newline at end of file diff --git a/fast_kmeans/stats_functions.h b/fast_kmeans/stats_functions.h new file mode 100644 index 0000000..0d7db56 --- /dev/null +++ b/fast_kmeans/stats_functions.h @@ -0,0 +1,130 @@ +/* + * File: stats_functions.h + * Author: Petr Rysavy + * + * Created on 16. August 2015, 12:57 + */ + +#ifndef STATS_FUNCTIONS_H +#define STATS_FUNCTIONS_H + +#include + +/* The result of execution of algorithm. */ +struct eval_result { + // number of iterations that were needed to reach the optimum + int iterations; + // number of threads used + int num_threads; + // CPU time needed for calculations + double cluster_time; + // wall time needed for calculations + double cluster_wall_time; + // memory usage + double memory_usage; + #ifdef MONITOR_ACCURACY + double sse; + #endif + #ifdef COUNT_DISTANCES + // number of distances evaluated by the algorithm + long long num_distances; + // number of inner products evaluated by the algorithm; does not include num_distances + long long inner_products; + // how many times did a point change its assignment + long long assignment_changes; + // total update of the bounds + double bounds_updates; + #endif + + void add(const eval_result* a) { + iterations += a->iterations; + num_threads += a->num_threads; + cluster_time += a->cluster_time; + cluster_wall_time += a->cluster_wall_time; + memory_usage += a->memory_usage; + #ifdef MONITOR_ACCURACY + sse += a->sse; + #endif + #ifdef COUNT_DISTANCES + num_distances += a->num_distances; + inner_products += a->inner_products; + assignment_changes += a->assignment_changes; + bounds_updates += a->bounds_updates; + #endif + } + + void divide(const int n) { + iterations = ((double) iterations) / n; + num_threads /= n; + cluster_time /= n; + cluster_wall_time /= n; + memory_usage /= n; + #ifdef MONITOR_ACCURACY + sse /= n; + #endif + #ifdef COUNT_DISTANCES + num_distances /= n; + inner_products /= n; + assignment_changes /= n; + bounds_updates /= n; + #endif + } + + void min(const eval_result* a) { + iterations = std::min(iterations, a->iterations); + num_threads = std::min(num_threads, a->num_threads); + cluster_time = std::min(cluster_time, a->cluster_time); + cluster_wall_time = std::min(cluster_wall_time, a->cluster_wall_time); + memory_usage = std::min(memory_usage, a->memory_usage); + #ifdef MONITOR_ACCURACY + sse = std::min(sse, a->sse); + #endif + #ifdef COUNT_DISTANCES + num_distances = std::min(num_distances, a->num_distances); + inner_products = std::min(inner_products, a->inner_products); + assignment_changes = std::min(assignment_changes, a->assignment_changes); + bounds_updates = std::min(bounds_updates, a->bounds_updates); + #endif + } + + void max(const eval_result* a) { + iterations = std::max(iterations, a->iterations); + num_threads = std::max(num_threads, a->num_threads); + cluster_time = std::max(cluster_time, a->cluster_time); + cluster_wall_time = std::max(cluster_wall_time, a->cluster_wall_time); + memory_usage = std::max(memory_usage, a->memory_usage); + #ifdef MONITOR_ACCURACY + sse = std::max(sse, a->sse); + #endif + #ifdef COUNT_DISTANCES + num_distances = std::max(num_distances, a->num_distances); + inner_products = std::max(inner_products, a->inner_products); + assignment_changes = std::max(assignment_changes, a->assignment_changes); + bounds_updates = std::max(bounds_updates, a->bounds_updates); + #endif + } + + void add_square(const eval_result* a) { + iterations += a->iterations * a->iterations; + num_threads += a->num_threads * a->num_threads; + cluster_time += a->cluster_time * a->cluster_time; + cluster_wall_time += a->cluster_wall_time * a->cluster_wall_time; + memory_usage += a->memory_usage * a->memory_usage; + #ifdef MONITOR_ACCURACY + sse += a->sse * a->sse; + #endif + #ifdef COUNT_DISTANCES + num_distances += a->num_distances * a->num_distances; + inner_products += a->inner_products * a->inner_products; + assignment_changes += a->assignment_changes * a->assignment_changes; + bounds_updates += a->bounds_updates * a->bounds_updates; + #endif + } +}; + +void print_result(eval_result* result); +void print_summary(std::vector* results); +void print_header_row(); + +#endif /* STATS_FUNCTIONS_H */ +