Skip to content

Commit 90605c0

Browse files
Parallelized GMM filtering on CPU
1 parent 5cec387 commit 90605c0

File tree

2 files changed

+38
-52
lines changed

2 files changed

+38
-52
lines changed

ed_sensor_integration/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,10 @@ if (CATKIN_ENABLE_TESTING)
267267
${catkin_LIBRARIES}
268268
)
269269
endif ()
270+
271+
find_package(OpenMP REQUIRED)
272+
if(OPENMP_FOUND)
273+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
274+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
275+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
276+
endif()

ed_sensor_integration/src/kinect/segmenter.cpp

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -207,40 +207,36 @@ std::vector<cv::Mat> Segmenter::cluster(const cv::Mat& depth_image, const geo::D
207207

208208
std::vector<cv::Mat> masks = SegmentationPipeline(rgb_image.clone(), config_);
209209
ROS_DEBUG("Creating clusters");
210-
unsigned int size = width * height;
211210

211+
// Pre-allocate temporary storage (one per mask, avoid push_back races)
212+
std::vector<EntityUpdate> temp_clusters(masks.size());
213+
std::vector<bool> valid_cluster(masks.size(), false);
214+
215+
// Parallel loop - each iteration is independent
216+
#pragma omp parallel for schedule(dynamic)
212217
for (size_t i = 0; i < masks.size(); ++i)
213218
{
214-
cv::Mat mask = masks[i];
215-
clusters.push_back(EntityUpdate());
216-
EntityUpdate& cluster = clusters.back();
217-
218-
unsigned int num_points = 0;
219+
const cv::Mat& mask = masks[i];
220+
EntityUpdate cluster; // local to this thread
219221

220-
// Add to cluster
222+
// Extract points from mask
221223
for(int y = 0; y < mask.rows; ++y) {
222224
for(int x = 0; x < mask.cols; ++x) {
223225
if (mask.at<unsigned char>(y, x) > 0) {
224226
unsigned int pixel_idx = y * width + x;
225227
float d = depth_image.at<float>(pixel_idx);
226228

227229
if (d > 0 && std::isfinite(d)) {
228-
229-
// Add this point to the cluster
230230
cluster.pixel_indices.push_back(pixel_idx);
231231
cluster.points.push_back(cam_model.project2Dto3D(x, y) * d);
232-
num_points++;
233232
}
234233
}
235234
}
236235
}
237236

238-
239-
// Check if cluster has enough points. If not, remove it from the list
240-
if (cluster.pixel_indices.size() < 100) // TODO: magic number
241-
{
242-
clusters.pop_back();
243-
continue;
237+
// Skip small clusters (< 100 points)
238+
if (cluster.pixel_indices.size() < 100) {
239+
continue; // valid_cluster[i] remains false
244240
}
245241

246242
// BMM point cloud denoising
@@ -264,62 +260,45 @@ std::vector<cv::Mat> Segmenter::cluster(const cv::Mat& depth_image, const geo::D
264260
}
265261
}
266262
// Safety check
267-
if (filtered_points.size() > MIN_FILTERED_POINTS && filtered_points.size() > MIN_RETENTION_RATIO * cluster.points.size()) {
268-
ROS_INFO("MAP-GMM filtering: kept %zu of %zu points (%.1f%%)",
269-
filtered_points.size(), cluster.points.size(),
270-
100.0 * filtered_points.size() / cluster.points.size());
263+
if (filtered_points.size() > MIN_FILTERED_POINTS &&
264+
filtered_points.size() > MIN_RETENTION_RATIO * cluster.points.size()) {
265+
// Note: ROS_INFO is thread-safe in recent versions, but excessive parallel logging
266+
// can interleave output. Consider reducing log level or moving outside loop.
271267
cluster.points = filtered_points;
272-
} else {
273-
ROS_WARN("MAP-GMM filtering: too few points kept, using original points");
274268
}
275269

276-
// Calculate cluster convex hull
270+
// Calculate convex hull
277271
float z_min = 1e9;
278272
float z_max = -1e9;
279-
280-
// Calculate z_min and z_max of cluster
281-
ROS_DEBUG("Computing min and max of cluster");
282273
std::vector<geo::Vec2f> points_2d(cluster.points.size());
274+
283275
for(unsigned int j = 0; j < cluster.points.size(); ++j)
284276
{
285277
const geo::Vec3& p = cluster.points[j];
286278

287279
// Transform sensor point to map frame
288280
geo::Vector3 p_map = sensor_pose * p;
289-
290281
points_2d[j] = geo::Vec2f(p_map.x, p_map.y);
291-
292282
z_min = std::min<float>(z_min, p_map.z);
293283
z_max = std::max<float>(z_max, p_map.z);
294284
}
295285

296286
ed::convex_hull::create(points_2d, z_min, z_max, cluster.chull, cluster.pose_map);
297287
cluster.chull.complete = false;
298288

299-
// Simple statistical outlier removal (geometric based optional for some better results if needed)
300-
// After collecting all points, filter outliers
301-
// if (!cluster.points.empty()) {
302-
// // Calculate mean and standard deviation of distances to centroid
303-
// geo::Vec3 centroid(0, 0, 0);
304-
// for (const auto& p : cluster.points)
305-
// centroid += p;
306-
// centroid = centroid / cluster.points.size();
307-
308-
// // Calculate standard deviation
309-
// float sum_sq_dist = 0;
310-
// for (const auto& p : cluster.points)
311-
// sum_sq_dist += (p - centroid).length2();
312-
// float stddev = std::sqrt(sum_sq_dist / cluster.points.size());
313-
314-
// // Filter points that are too far from centroid
315-
// std::vector<geo::Vec3> filtered_points;
316-
// for (const auto& p : cluster.points) {
317-
// if ((p - centroid).length() < 2.5 * stddev)
318-
// filtered_points.push_back(p);
319-
// }
320-
321-
// cluster.points = filtered_points;
322-
// }
289+
// Store in thread-safe pre-allocated array
290+
temp_clusters[i] = cluster;
291+
valid_cluster[i] = true;
323292
}
293+
294+
// Sequential section: collect valid clusters
295+
clusters.clear();
296+
clusters.reserve(masks.size());
297+
for (size_t i = 0; i < temp_clusters.size(); ++i) {
298+
if (valid_cluster[i]) {
299+
clusters.push_back(std::move(temp_clusters[i]));
300+
}
301+
}
302+
324303
return masks;
325304
}

0 commit comments

Comments
 (0)