Skip to content

Commit

Permalink
important bugfix in clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
derohde committed Aug 24, 2021
1 parent 74c8d4d commit fb8c547
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 72 deletions.
Binary file removed Fred/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ By default, Fred will automatically determine the number of threads to use. If y

### Curve Simplification

All simplifications are vertex-restricted!

#### weak minimum error simplification
- graph approach from [**Polygonal Approximations of a Curve — Formulations and Algorithms**](https://www.sciencedirect.com/science/article/pii/B9780444704672500114)
- signature: `fred.weak_minimum_error_simplification(fred.Curve, int complexity)`
Expand Down Expand Up @@ -65,8 +67,8 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di
- `k`: number of centers
- `l`: maximum complexity of the centers, only used when center_domain is default value
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
- `center_domain`: possible centers, defaults to empty `fred.Curves()`, in this case the input is simplified and used as center domain
- `random_first_center`: determines if first center is chosen uniformly at random or first curve is used as first center, optional, defaults to true
- `fast_simplification`: determines whether to use the weak minimum error simplification or the faster approximate weak minimum error simplification, defaults to false
- returns: `fred.Clustering_Result` with mebers
- `value`: objective value
- `time`: running-time
Expand All @@ -78,7 +80,7 @@ A `fred.Distance_Matrix()` can be used to speed up consecutive calls of `fred.di
- `k`: number of centers
- `l`: maximum complexity of the centers, only used when center_domain is default value
- `distances`: `fred.Distance_Matrix`, defaults to empty `fred.Distance_Matrix`
- `center_domain`: possible centers, optional parameter, if not given the input is simplified and used as center domain
- `fast_simplification`: determines whether to use the weak minimum error simplification or the faster approximate weak minimum error simplification, defaults to false
- returns: `fred.Clustering_Result` with mebers
- `value`: objective value
- `time`: running-time
Expand Down
62 changes: 27 additions & 35 deletions include/clustering.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,41 +128,35 @@ struct Clustering_Result {
}
};

Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool local_search = false, const Curves &center_domain = Curves(), const bool random_start_center = true) {

Clustering_Result kl_cluster(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool local_search = false, const bool random_start_center = true, const bool fast_simplification = false) {
const auto start = std::chrono::high_resolution_clock::now();
Clustering_Result result;

if (in.empty()) return result;

std::vector<curve_number_t> centers;
Curves &simplified_in = const_cast<Curves&>(center_domain);
bool self_simplify = false;
Curves simplified_in(in.number(), ell, in.dimensions());

if (center_domain.empty()) {
self_simplify = true;
Curves simplified_in_self(in.number(), ell, in.dimensions());
simplified_in = simplified_in_self;
}

auto simplify = [&](const curve_number_t i) {
if (fast_simplification) {
auto simplified_curve = Simplification::approximate_weak_minimum_error_simplification(const_cast<Curve&>(in[i]), ell);
simplified_curve.set_name("Simplification of " + in[i].get_name());
return simplified_curve;
} else {
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[i]));
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
simplified_curve.set_name("Simplification of " + in[i].get_name());
return simplified_curve;
}
};

if (random_start_center) {
Random::Uniform_Random_Generator<double> ugen;
const curve_number_t r = std::floor(simplified_in.size() * ugen.get());
if (self_simplify) {
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[r]));
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
simplified_curve.set_name("Simplification of " + in[r].get_name());
simplified_in[r] = simplified_curve;
}
simplified_in[r] = simplify(r);
centers.push_back(r);

} else {
if (self_simplify) {
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[0]));
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
simplified_curve.set_name("Simplification of " + in[0].get_name());
simplified_in[0] = simplified_curve;
}
simplified_in[0] = simplify(0);
centers.push_back(0);
}

Expand Down Expand Up @@ -195,11 +189,8 @@ Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t
std::cout << "found center no. " << i+1 << std::endl;
#endif

if (self_simplify and simplified_in[curr_maxcurve].empty()) {
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[curr_maxcurve]));
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
simplified_curve.set_name("Simplification of " + in[curr_maxcurve].get_name());
simplified_in[curr_maxcurve] = simplified_curve;
if (simplified_in[curr_maxcurve].empty()) {
simplified_in[curr_maxcurve] = simplify(curr_maxcurve);
}
centers.push_back(curr_maxcurve);
}
Expand Down Expand Up @@ -229,11 +220,8 @@ Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t
if (std::find(curr_centers.begin(), curr_centers.end(), j) != curr_centers.end()) continue;

// swap
if (self_simplify and simplified_in[j].empty()) {
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(in[j]));
auto simplified_curve = graph.weak_minimum_error_simplification(ell);
simplified_curve.set_name("Simplification of " + in[j].get_name());
simplified_in[j] = simplified_curve;
if (simplified_in[j].empty()) {
simplified_in[j] = simplify(j);
}
curr_centers[i] = j;
// new cost
Expand All @@ -260,8 +248,12 @@ Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t
return result;
}

Clustering_Result kl_median(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const Curves &center_domain = Curves()) {
return kl_center(num_centers, ell, in, distances, true, center_domain, false);
Clustering_Result kl_center(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool random_start_center = true, const bool fast_simplification = false) {
return kl_cluster(num_centers, ell, in, distances, false, random_start_center, fast_simplification);
}

Clustering_Result kl_median(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Distance_Matrix &distances, const bool fast_simplification = false) {
return kl_cluster(num_centers, ell, in, distances, true, fast_simplification);
}

Clustering_Result one_median_sampling(const curve_size_t ell, const Curves &in, const double epsilon, const Curves &center_domain = Curves()) {
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def build_extension(self, ext):

setup(
name='Fred-Frechet',
version='1.7.4',
version='1.7.5',
author='Dennis Rohde',
author_email='[email protected]',
description='A fast, scalable and light-weight C++ Fréchet distance library, exposed to python and focused on (k,l)-clustering of polygonal curves.',
Expand Down
36 changes: 2 additions & 34 deletions src/fred_python_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,38 +45,6 @@ bool get_frechet_rounding() {
return fc::round;
}

Clustering::Clustering_Result klcenter(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix &distances, const Curves &center_domain = Curves(), const bool random_start_center = true) {
auto result = Clustering::kl_center(num_centers, ell, in, distances, false, center_domain, random_start_center);
return result;
}

Clustering::Clustering_Result klmedian(const curve_number_t num_centers, const curve_size_t ell, const Curves &in, Clustering::Distance_Matrix distances, const Curves &center_domain = Curves()) {

auto result = Clustering::kl_median(num_centers, ell, in, distances, center_domain);

return result;
}

// Clustering::Clustering_Result onemedian_sampling(const curve_size_t ell, Curves &in, const double epsilon, const bool with_assignment = false, const Curves &center_domain = Curves()) {
//
// auto result = Clustering::one_median_sampling(ell, in, epsilon, with_assignment);
//
// return result;
// }
//
// Clustering::Clustering_Result onemedian_exhaustive(const curve_size_t ell, Curves &in, const bool with_assignment = false, const Curves &center_domain = Curves()) {
//
// auto result = Clustering::one_median_exhaustive(ell, in, with_assignment);
//
// return result;
// }
//
//
// Coreset::Onemedian_Coreset onemedian_coreset(const Curves &in, const curve_size_t ell, const double epsilon, const double constant = 1) {
// return Coreset::Onemedian_Coreset(ell, in, epsilon, constant);
// }
//

Curve weak_minimum_error_simplification(const Curve &curve, const curve_size_t l) {
Simplification::Subcurve_Shortcut_Graph graph(const_cast<Curve&>(curve));
auto scurve = graph.weak_minimum_error_simplification(l);
Expand Down Expand Up @@ -224,8 +192,8 @@ PYBIND11_MODULE(backend, m) {

m.def("dimension_reduction", &JLTransform::transform_naive, py::arg("in") = Curves(), py::arg("epsilon") = 0.5, py::arg("empirical_constant") = true);

m.def("discrete_klcenter", &klcenter, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("center_domain") = Curves(), py::arg("random_start_center") = true);
m.def("discrete_klmedian", &klmedian, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("center_domain") = Curves());
m.def("discrete_klcenter", &Clustering::kl_center, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("random_start_center") = true, py::arg("fast_simplification") = false);
m.def("discrete_klmedian", &Clustering::kl_median, py::arg("num_centers") = 1, py::arg("ell") = 2, py::arg("in") = Curves(), py::arg("distances") = Clustering::Distance_Matrix(), py::arg("fast_simplification") = false);

// these are experimental
//m.def("two_two_dtw_one_two_median", &Clustering::two_two_dtw_one_two_median);
Expand Down

0 comments on commit fb8c547

Please sign in to comment.