Add API to transform pixel from color image to depth image with searching on epipolar line (#696)

* Add API to transform pixel from color image to depth image with searching on epipolar line * Fix linux build * Fix clang build * Addressing comments * Remove extra header * Fix clang * Add C++ wrapper
2019-08-30 11:16:40 -07:00 · 2019-08-30 11:16:40 -07:00 · 6fdbcb0188
--- a/include/k4a/k4a.h
+++ b/include/k4a/k4a.h
@ -1932,6 +1932,59 @@ K4A_EXPORT k4a_result_t k4a_calibration_2d_to_2d(const k4a_calibration_t *calibr
                                                 k4a_float2_t *target_point2d,
                                                 int *valid);

+/** Transform a 2D pixel coordinate from color camera into a 2D pixel coordinate of
+ * the depth camera.
+ *
+ * \param calibration
+ * Location to read the camera calibration obtained by k4a_device_get_calibration().
+ *
+ * \param source_point2d
+ * The 2D pixel in \p color camera coordinates.
+ *
+ * \param depth_image
+ * Handle to input depth image.
+ *
+ * \param target_point2d
+ * The 2D pixel in \p depth camera coordinates.
+ *
+ * \param valid
+ * The output parameter returns a value of 1 if the \p source_point2d is a valid coordinate in the \p target_camera
+ * coordinate system, and will return 0 if the coordinate is not valid in the calibration model.
+ *
+ * \returns
+ * ::K4A_RESULT_SUCCEEDED if \p target_point2d was successfully written. ::K4A_RESULT_FAILED if \p calibration
+ * contained invalid transformation parameters. If the function returns ::K4A_RESULT_SUCCEEDED, but \p valid is 0,
+ * the transformation was computed, but the results in \p target_point2d are outside of the range of valid calibration
+ * and should be ignored.
+ *
+ * \remarks
+ * This function represents an alternative to k4a_calibration_2d_to_2d() if the number of pixels that need to be
+ * transformed is small. This function searches along an epipolar line in the depth image to find the corresponding
+ * depth pixel. If a larger number of pixels need to be transformed, it might be computationally cheaper to call
+ * k4a_transformation_depth_image_to_color_camera() to get correspondence depth values for these color pixels, then call
+ * the function k4a_calibration_2d_to_2d().
+ *
+ * \remarks
+ * If \p source_point2d does not map to a valid 2D coordinate in the \p target_camera coordinate system, \p valid is set
+ * to 0. If it is valid, \p valid will be set to 1. The user should not use the value of \p target_point2d if \p valid
+ * was set to 0.
+ *
+ * \relates k4a_calibration_t
+ *
+ * \xmlonly
+ * <requirements>
+ *   <requirement name="Header">k4a.h (include k4a/k4a.h)</requirement>
+ *   <requirement name="Library">k4a.lib</requirement>
+ *   <requirement name="DLL">k4a.dll</requirement>
+ * </requirements>
+ * \endxmlonly
+ */
+K4A_EXPORT k4a_result_t k4a_calibration_color_2d_to_depth_2d(const k4a_calibration_t *calibration,
+                                                             const k4a_float2_t *source_point2d,
+                                                             const k4a_image_t depth_image,
+                                                             k4a_float2_t *target_point2d,
+                                                             int *valid);
+
 /** Get handle to transformation handle.
 *
 * \param calibration
--- a/include/k4a/k4a.hpp
+++ b/include/k4a/k4a.hpp
@ -723,6 +723,28 @@ struct calibration : public k4a_calibration_t
        return static_cast<bool>(valid);
    }

+    /** Transform a 2D pixel coordinate from color camera into a 2D pixel coordinate of the depth camera. This function
+     * searches along an epipolar line in the depth image to find the corresponding depth pixel.
+     * Returns false if the point is invalid in the target coordinate system (and therefore target_point2d should not be
+     * used) Throws error if calibration contains invalid data.
+     *
+     * \sa k4a_calibration_color_2d_to_depth_2d
+     */
+    bool convert_color_2d_to_depth_2d(const k4a_float2_t &source_point2d,
+                                      const image &depth_image,
+                                      k4a_float2_t *target_point2d) const
+    {
+        int valid = 0;
+        k4a_result_t result =
+            k4a_calibration_color_2d_to_depth_2d(this, &source_point2d, depth_image.handle(), target_point2d, &valid);
+
+        if (K4A_RESULT_SUCCEEDED != result)
+        {
+            throw error("Calibration contained invalid transformation parameters!");
+        }
+        return static_cast<bool>(valid);
+    }
+
    /** Get the camera calibration for a device from a raw calibration blob.
     * Throws error on failure.
     *
--- a/include/k4ainternal/transformation.h
+++ b/include/k4ainternal/transformation.h
@ -36,6 +36,16 @@ typedef struct _k4a_transformation_xy_tables_t
    int height;     // height of x and y tables
 } k4a_transformation_xy_tables_t;

+typedef struct _k4a_transformation_pinhole_t
+{
+    float px;
+    float py;
+    float fx;
+    float fy;
+    int width;
+    int height;
+} k4a_transformation_pinhole_t;
+
 typedef struct _k4a_transform_engine_calibration_t
 {
    k4a_calibration_camera_t depth_camera_calibration;                    // depth camera calibration
@ -82,6 +92,12 @@ k4a_result_t transformation_2d_to_2d(const k4a_calibration_t *calibration,
                                     float target_point2d[2],
                                     int *valid);

+k4a_result_t transformation_color_2d_to_depth_2d(const k4a_calibration_t *calibration,
+                                                 const float source_point2d[2],
+                                                 const k4a_image_t depth_image,
+                                                 float target_point2d[2],
+                                                 int *valid);
+
 k4a_transformation_t transformation_create(const k4a_calibration_t *calibration, bool gpu_optimization);

 void transformation_destroy(k4a_transformation_t transformation_handle);
--- a/src/sdk/k4a.c
+++ b/src/sdk/k4a.c
@ -1134,6 +1134,16 @@ k4a_result_t k4a_calibration_2d_to_2d(const k4a_calibration_t *calibration,
        calibration, source_point2d->v, source_depth_mm, source_camera, target_camera, target_point2d->v, valid));
 }

+k4a_result_t k4a_calibration_color_2d_to_depth_2d(const k4a_calibration_t *calibration,
+                                                  const k4a_float2_t *source_point2d,
+                                                  const k4a_image_t depth_image,
+                                                  k4a_float2_t *target_point2d,
+                                                  int *valid)
+{
+    return TRACE_CALL(
+        transformation_color_2d_to_depth_2d(calibration, source_point2d->v, depth_image, target_point2d->v, valid));
+}
+
 k4a_transformation_t k4a_transformation_create(const k4a_calibration_t *calibration)
 {
    return transformation_create(calibration, TRANSFORM_ENABLE_GPU_OPTIMIZATION);
--- a/src/transformation/transformation.c
+++ b/src/transformation/transformation.c
@ -8,10 +8,12 @@
 #include <k4ainternal/logging.h>
 #include <k4ainternal/deloader.h>
 #include <k4ainternal/tewrapper.h>
+#include <k4ainternal/image.h>

 // System dependencies
 #include <stdlib.h>
 #include <math.h>
+#include <float.h>

 k4a_result_t transformation_get_mode_specific_calibration(const k4a_calibration_camera_t *depth_camera_calibration,
                                                          const k4a_calibration_camera_t *color_camera_calibration,
@ -96,6 +98,56 @@ static k4a_result_t transformation_possible(const k4a_calibration_t *camera_cali
    return K4A_RESULT_SUCCEEDED;
 }

+static bool transformation_is_pixel_within_line_segment(const float p[2], const float start[2], const float stop[2])
+{
+    return (stop[0] >= start[0] ? stop[0] >= p[0] && p[0] >= start[0] : stop[0] <= p[0] && p[0] <= start[0]) &&
+           (stop[1] >= start[1] ? stop[1] >= p[1] && p[1] >= start[1] : stop[1] <= p[1] && p[1] <= start[1]);
+}
+
+static bool transformation_is_pixel_within_image(const float p[2], const int width, const int height)
+{
+    return p[0] >= 0 && p[0] < width && p[1] >= 0 && p[1] < height;
+}
+
+static k4a_result_t transformation_create_depth_camera_pinhole(const k4a_calibration_t *calibration,
+                                                               k4a_transformation_pinhole_t *pinhole)
+{
+    float fov_degrees[2];
+    switch (calibration->depth_mode)
+    {
+    case K4A_DEPTH_MODE_NFOV_2X2BINNED:
+    case K4A_DEPTH_MODE_NFOV_UNBINNED:
+    {
+        fov_degrees[0] = 75;
+        fov_degrees[1] = 65;
+        break;
+    }
+    case K4A_DEPTH_MODE_WFOV_2X2BINNED:
+    case K4A_DEPTH_MODE_WFOV_UNBINNED:
+    case K4A_DEPTH_MODE_PASSIVE_IR:
+    {
+        fov_degrees[0] = 120;
+        fov_degrees[1] = 120;
+        break;
+    }
+    default:
+        LOG_ERROR("Invalid depth mode.", 0);
+        return K4A_RESULT_FAILED;
+    }
+
+    float radian_per_degree = 3.14159265f / 180.f;
+    float fx = 0.5f / tanf(0.5f * fov_degrees[0] * radian_per_degree);
+    float fy = 0.5f / tanf(0.5f * fov_degrees[1] * radian_per_degree);
+    pinhole->width = calibration->depth_camera_calibration.resolution_width;
+    pinhole->height = calibration->depth_camera_calibration.resolution_height;
+    pinhole->px = pinhole->width / 2.f;
+    pinhole->py = pinhole->height / 2.f;
+    pinhole->fx = fx * pinhole->width;
+    pinhole->fy = fy * pinhole->height;
+
+    return K4A_RESULT_SUCCEEDED;
+}
+
 k4a_result_t transformation_3d_to_3d(const k4a_calibration_t *calibration,
                                     const float source_point3d[3],
                                     const k4a_calibration_type_t source_camera,
@ -258,6 +310,158 @@ k4a_result_t transformation_2d_to_2d(const k4a_calibration_t *calibration,
    return K4A_RESULT_SUCCEEDED;
 }

+k4a_result_t transformation_color_2d_to_depth_2d(const k4a_calibration_t *calibration,
+                                                 const float source_point2d[2],
+                                                 const k4a_image_t depth_image,
+                                                 float target_point2d[2],
+                                                 int *valid)
+{
+    k4a_transformation_pinhole_t pinhole = { 0 };
+    if (K4A_FAILED(TRACE_CALL(transformation_create_depth_camera_pinhole(calibration, &pinhole))))
+    {
+        return K4A_RESULT_FAILED;
+    }
+
+    // Compute the 3d points in depth camera space that the current color camera pixel can be transformed to with the
+    // theoretical minimum and maximum depth values (mm)
+    float depth_range_mm[2] = { 50.f, 14000.f };
+    float start_point3d[3], stop_point3d[3];
+    int start_valid = 0;
+    if (K4A_FAILED(TRACE_CALL(transformation_2d_to_3d(calibration,
+                                                      source_point2d,
+                                                      depth_range_mm[0],
+                                                      K4A_CALIBRATION_TYPE_COLOR,
+                                                      K4A_CALIBRATION_TYPE_DEPTH,
+                                                      start_point3d,
+                                                      &start_valid))))
+    {
+        return K4A_RESULT_FAILED;
+    }
+
+    int stop_valid = 0;
+    if (K4A_FAILED(TRACE_CALL(transformation_2d_to_3d(calibration,
+                                                      source_point2d,
+                                                      depth_range_mm[1],
+                                                      K4A_CALIBRATION_TYPE_COLOR,
+                                                      K4A_CALIBRATION_TYPE_DEPTH,
+                                                      stop_point3d,
+                                                      &stop_valid))))
+    {
+        return K4A_RESULT_FAILED;
+    }
+
+    *valid = start_valid && stop_valid;
+    if (*valid == 0)
+    {
+        return K4A_RESULT_SUCCEEDED;
+    }
+
+    // Project above two 3d points into the undistorted depth image space with the pinhole model, both start and stop 2d
+    // points are expected to locate on the epipolar line
+    float start_point2d[2], stop_point2d[2];
+    start_point2d[0] = start_point3d[0] / start_point3d[2] * pinhole.fx + pinhole.px;
+    start_point2d[1] = start_point3d[1] / start_point3d[2] * pinhole.fy + pinhole.py;
+    stop_point2d[0] = stop_point3d[0] / stop_point3d[2] * pinhole.fx + pinhole.px;
+    stop_point2d[1] = stop_point3d[1] / stop_point3d[2] * pinhole.fy + pinhole.py;
+
+    // Search every pixel on the epipolar line so that its reprojected pixel coordinates in color image have minimum
+    // distance from the input color pixel coordinates
+    int depth_image_width_pixels = image_get_width_pixels(depth_image);
+    int depth_image_height_pixels = image_get_height_pixels(depth_image);
+    const uint16_t *depth_image_data = (const uint16_t *)(const void *)(image_get_buffer(depth_image));
+    float best_error = FLT_MAX;
+    float p[2];
+    p[0] = start_point2d[0];
+    p[1] = start_point2d[1];
+    if (stop_point2d[0] - start_point2d[0] == 0.0f)
+    {
+        return K4A_RESULT_FAILED;
+    }
+    float epipolar_line_slope = (stop_point2d[1] - start_point2d[1]) / (stop_point2d[0] - start_point2d[0]);
+    bool xStep1 = fabs(epipolar_line_slope) < 1;
+    bool stop_larger_than_start = xStep1 ? stop_point2d[0] > start_point2d[0] : stop_point2d[1] > start_point2d[1];
+    while (transformation_is_pixel_within_line_segment(p, start_point2d, stop_point2d))
+    {
+        // Compute the ray from the depth camera oringin, intersecting with the current searching pixel on the epipolar
+        // line
+        float ray[3];
+        ray[0] = (p[0] - pinhole.px) / pinhole.fx;
+        ray[1] = (p[1] - pinhole.py) / pinhole.fy;
+        ray[2] = 1.f;
+
+        // Project the ray to the distorted depth image to read the depth value from nearest pixel
+        float depth_point2d[2];
+        int p_valid = 0;
+        if (K4A_FAILED(TRACE_CALL(transformation_3d_to_2d(
+                calibration, ray, K4A_CALIBRATION_TYPE_DEPTH, K4A_CALIBRATION_TYPE_DEPTH, depth_point2d, &p_valid))))
+        {
+            return K4A_RESULT_FAILED;
+        }
+
+        if (p_valid == 1)
+        {
+            // Transform the current searching depth pixel to color image
+            if (transformation_is_pixel_within_image(depth_point2d,
+                                                     depth_image_width_pixels,
+                                                     depth_image_height_pixels))
+            {
+                int u = (int)(floorf(depth_point2d[0] + 0.5f));
+                int v = (int)(floorf(depth_point2d[1] + 0.5f));
+                uint16_t d = depth_image_data[v * depth_image_width_pixels + u];
+                float reprojected_point2d[2];
+                if (K4A_FAILED(TRACE_CALL(transformation_2d_to_2d(calibration,
+                                                                  depth_point2d,
+                                                                  d,
+                                                                  K4A_CALIBRATION_TYPE_DEPTH,
+                                                                  K4A_CALIBRATION_TYPE_COLOR,
+                                                                  reprojected_point2d,
+                                                                  &p_valid))))
+                {
+                    return K4A_RESULT_FAILED;
+                }
+
+                if (p_valid == 1)
+                {
+                    if (transformation_is_pixel_within_image(reprojected_point2d,
+                                                             calibration->color_camera_calibration.resolution_width,
+                                                             calibration->color_camera_calibration.resolution_height))
+                    {
+
+                        // Compute the 2d reprojection error and store the minimum one
+                        float error = sqrtf(powf(reprojected_point2d[0] - source_point2d[0], 2) +
+                                            powf(reprojected_point2d[1] - source_point2d[1], 2));
+                        if (error < best_error)
+                        {
+                            best_error = error;
+                            target_point2d[0] = depth_point2d[0];
+                            target_point2d[1] = depth_point2d[1];
+                        }
+                    }
+                }
+            }
+        }
+
+        // Compute next pixel to search for on the epipolar line
+        if (xStep1)
+        {
+            p[0] = stop_larger_than_start ? p[0] + 1 : p[0] - 1;
+            p[1] = stop_larger_than_start ? p[1] + epipolar_line_slope : p[1] - epipolar_line_slope;
+        }
+        else
+        {
+            p[1] = stop_larger_than_start ? p[1] + 1 : p[1] - 1;
+            p[0] = stop_larger_than_start ? p[1] + 1 / epipolar_line_slope : p[1] - 1 / epipolar_line_slope;
+        }
+    }
+
+    if (best_error > 10)
+    {
+        *valid = 0;
+    }
+
+    return K4A_RESULT_SUCCEEDED;
+}
+
 static k4a_buffer_result_t transformation_init_xy_tables(const k4a_calibration_t *calibration,
                                                         const k4a_calibration_type_t camera,
                                                         float *data,
--- a/tests/Transformation/transformation.cpp
+++ b/tests/Transformation/transformation.cpp
@ -311,6 +311,41 @@ TEST_F(transformation_ut, transformation_2d_to_2d)
    ASSERT_EQ_FLT2(point2d, m_depth_point2d_reference);
 }

+TEST_F(transformation_ut, transformation_color_2d_to_depth_2d)
+{
+    float point2d[2] = { 0.f, 0.f };
+    int valid = 0;
+
+    int width = m_calibration.depth_camera_calibration.resolution_width;
+    int height = m_calibration.depth_camera_calibration.resolution_height;
+    k4a_image_t depth_image = NULL;
+    ASSERT_EQ(image_create(K4A_IMAGE_FORMAT_DEPTH16,
+                           width,
+                           height,
+                           width * (int)sizeof(uint16_t),
+                           ALLOCATION_SOURCE_USER,
+                           &depth_image),
+              K4A_RESULT_SUCCEEDED);
+    ASSERT_NE(depth_image, (k4a_image_t)NULL);
+
+    uint16_t *depth_image_buffer = (uint16_t *)(void *)image_get_buffer(depth_image);
+    for (int i = 0; i < width * height; i++)
+    {
+        depth_image_buffer[i] = (uint16_t)1000;
+    }
+
+    k4a_result_t result =
+        transformation_color_2d_to_depth_2d(&m_calibration, m_color_point2d_reference, depth_image, point2d, &valid);
+
+    ASSERT_EQ(result, K4A_RESULT_SUCCEEDED);
+    ASSERT_EQ(valid, 1);
+
+    // Since the API is searching by step of 1 pixel on the epipolar line (better performance), we expect there is less
+    // than 1 pixel error for the computed depth point coordinates comparing to reference coodinates
+    ASSERT_LT(fabs(point2d[0] - m_depth_point2d_reference[0]), 1);
+    ASSERT_LT(fabs(point2d[1] - m_depth_point2d_reference[1]), 1);
+}
+
 TEST_F(transformation_ut, transformation_depth_image_to_point_cloud)
 {
    k4a_transformation_t transformation_handle = transformation_create(&m_calibration, false);