The open source OpenXR runtime
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

d/ht: split ht_algorithm into source and header

authored by

Simon Zeni and committed by
Moses Turner
49d2d819 39bc1295

+774 -768
+2 -2
src/xrt/drivers/CMakeLists.txt
··· 212 212 213 213 if(XRT_BUILD_DRIVER_HANDTRACKING) 214 214 add_library(drv_ht STATIC 215 + ht/ht_algorithm.cpp 215 216 ht/ht_driver.cpp 216 217 ht/ht_driver.hpp 217 218 ht/ht_interface.h 218 219 ht/ht_models.cpp 219 220 ht/ht_hand_math.cpp 220 221 ht/ht_image_math.cpp 221 - ht/ht_nms.cpp 222 - ht/templates/NaivePermutationSort.hpp) 222 + ht/ht_nms.cpp) 223 223 target_link_libraries(drv_ht PRIVATE xrt-interfaces aux_os aux_util aux_math aux_gstreamer ONNXRuntime::ONNXRuntime ${OpenCV_LIBRARIES}) 224 224 target_include_directories(drv_ht PRIVATE ${OpenCV_INCLUDE_DIRS} ${EIGEN3_INCLUDE_DIR}) 225 225 list(APPEND ENABLED_DRIVERS ht)
+768
src/xrt/drivers/ht/ht_algorithm.cpp
··· 1 + // Copyright 2021, Collabora, Ltd. 2 + // SPDX-License-Identifier: BSL-1.0 3 + /*! 4 + * @file 5 + * @brief Camera based hand tracking mainloop algorithm. 6 + * @author Moses Turner <moses@collabora.com> 7 + * @ingroup drv_ht 8 + */ 9 + 10 + #include "xrt/xrt_defines.h" 11 + 12 + #include "math/m_vec2.h" 13 + #include "util/u_frame.h" 14 + #include "util/u_trace_marker.h" 15 + 16 + #include "ht_algorithm.hpp" 17 + #include "ht_driver.hpp" 18 + #include "ht_hand_math.hpp" 19 + #include "ht_image_math.hpp" 20 + #include "templates/NaivePermutationSort.hpp" 21 + 22 + // Flags to tell state tracker that these are indeed valid joints 23 + static const enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_relation_flags)( 24 + XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT | 25 + XRT_SPACE_RELATION_POSITION_VALID_BIT | XRT_SPACE_RELATION_POSITION_TRACKED_BIT); 26 + 27 + 28 + static void 29 + htProcessJoint(struct ht_device *htd, 30 + struct xrt_vec3 model_out, 31 + struct xrt_hand_joint_set *hand, 32 + enum xrt_hand_joint idx) 33 + { 34 + hand->values.hand_joint_set_default[idx].relation.relation_flags = valid_flags_ht; 35 + hand->values.hand_joint_set_default[idx].relation.pose.position.x = model_out.x; 36 + hand->values.hand_joint_set_default[idx].relation.pose.position.y = model_out.y; 37 + hand->values.hand_joint_set_default[idx].relation.pose.position.z = model_out.z; 38 + } 39 + 40 + static float 41 + errHistory2D(HandHistory2DBBox *past, Palm7KP *present) 42 + { 43 + if (!past->htAlgorithm_approves) { 44 + // U_LOG_E("Returning big number because htAlgorithm told me to!"); 45 + return 100000000000000000000000000000.0f; 46 + } 47 + float sum_of_lengths = m_vec2_len(*past->wrist_unfiltered[0] - *past->middle_unfiltered[0]) + 48 + m_vec2_len(present->kps[WRIST_7KP] - present->kps[MIDDLE_7KP]); 49 + 50 + float sum_of_distances = (m_vec2_len(*past->wrist_unfiltered[0] - present->kps[WRIST_7KP]) + 51 + m_vec2_len(*past->middle_unfiltered[0] - present->kps[MIDDLE_7KP])); 52 + 53 + 54 + float final = sum_of_distances / sum_of_lengths; 55 + 56 + return final; 57 + } 58 + 59 + static std::vector<Hand2D> 60 + htImageToKeypoints(struct ht_view *htv) 61 + { 62 + int view = htv->view; 63 + struct ht_device *htd = htv->htd; 64 + 65 + 66 + cv::Mat raw_input = htv->run_model_on_this; 67 + 68 + // Get a list of palms - drop confidences and ssd bounding boxes, just keypoints. 69 + 70 + std::vector<Palm7KP> hand_detections = htv->run_detection_model(htv, raw_input); 71 + 72 + std::vector<bool> used_histories; 73 + std::vector<bool> used_detections; 74 + 75 + std::vector<size_t> history_indices; 76 + std::vector<size_t> detection_indices; 77 + std::vector<float> dontuse; 78 + 79 + 80 + // Strategy here is: We have a big list of palms. Match 'em up to previous palms. 81 + naive_sort_permutation_by_error<HandHistory2DBBox, Palm7KP>(htv->bbox_histories, hand_detections, 82 + 83 + // bools 84 + used_histories, used_detections, 85 + 86 + history_indices, detection_indices, dontuse, 87 + errHistory2D, 1.0f); 88 + 89 + // Here's the trick - we use the associated bbox_filter to get an output but *never commit* the noisy 128x128 90 + // detection; instead later on we commit the (hopefully) nicer palm and wrist from the 224x224 keypoint 91 + // estimation. 92 + 93 + // Add extra detections! 94 + for (size_t i = 0; i < used_detections.size(); i++) { 95 + if ((used_detections[i] == false) && hand_detections[i].confidence > 0.65) { 96 + // Confidence to get in the door is 0.65, confidence to stay in is 0.3 97 + HandHistory2DBBox hist_new = {}; 98 + m_filter_euro_vec2_init(&hist_new.m_filter_center, FCMIN_BBOX_POSITION, FCMIN_D_BB0X_POSITION, 99 + BETA_BB0X_POSITION); 100 + m_filter_euro_vec2_init(&hist_new.m_filter_direction, FCMIN_BBOX_ORIENTATION, 101 + FCMIN_D_BB0X_ORIENTATION, BETA_BB0X_ORIENTATION); 102 + 103 + htv->bbox_histories.push_back(hist_new); 104 + history_indices.push_back(htv->bbox_histories.size() - 1); 105 + detection_indices.push_back(i); 106 + } 107 + } 108 + 109 + // Do the things for each active bbox history! 110 + for (size_t i = 0; i < history_indices.size(); i++) { 111 + HandHistory2DBBox *hist_of_interest = &htv->bbox_histories[history_indices[i]]; 112 + hist_of_interest->wrist_unfiltered.push(hand_detections[detection_indices[i]].kps[WRIST_7KP]); 113 + hist_of_interest->index_unfiltered.push(hand_detections[detection_indices[i]].kps[INDEX_7KP]); 114 + hist_of_interest->middle_unfiltered.push(hand_detections[detection_indices[i]].kps[MIDDLE_7KP]); 115 + hist_of_interest->pinky_unfiltered.push(hand_detections[detection_indices[i]].kps[LITTLE_7KP]); 116 + // Eh do the rest later 117 + } 118 + 119 + // Prune stale detections! (After we don't need {history,detection}_indices to be correct) 120 + int bob = 0; 121 + for (size_t i = 0; i < used_histories.size(); i++) { 122 + if (used_histories[i] == false) { 123 + // history never got assigned a present hand to it. treat it as stale delete it. 124 + 125 + HT_TRACE(htv->htd, "Removing bbox from history!\n"); 126 + htv->bbox_histories.erase(htv->bbox_histories.begin() + i + bob); 127 + bob--; 128 + } 129 + } 130 + if (htv->bbox_histories.size() == 0) { 131 + return {}; // bail early 132 + } 133 + 134 + 135 + 136 + std::vector<Hand2D> list_of_hands_in_bbox( 137 + htv->bbox_histories.size()); // all of these are same size as htv->bbox_histories 138 + 139 + std::vector<std::future<Hand2D>> await_list_of_hand_in_bbox; //(htv->bbox_histories.size()); 140 + 141 + std::vector<DetectionModelOutput> blah(htv->bbox_histories.size()); 142 + 143 + std::vector<Hand2D> output; 144 + 145 + if (htv->bbox_histories.size() > 2) { 146 + HT_DEBUG(htd, "More than two hands (%zu) in 2D view %i", htv->bbox_histories.size(), htv->view); 147 + } 148 + 149 + 150 + for (size_t i = 0; i < htv->bbox_histories.size(); i++) { //(BBoxHistory * entry : htv->bbox_histories) { 151 + HandHistory2DBBox *entry = &htv->bbox_histories[i]; 152 + cv::Mat hand_rect = cv::Mat(224, 224, CV_8UC3); 153 + xrt_vec2 unfiltered_middle; 154 + xrt_vec2 unfiltered_direction; 155 + 156 + 157 + centerAndRotationFromJoints(htv, entry->wrist_unfiltered[0], entry->index_unfiltered[0], 158 + entry->middle_unfiltered[0], entry->pinky_unfiltered[0], &unfiltered_middle, 159 + &unfiltered_direction); 160 + 161 + xrt_vec2 filtered_middle; 162 + xrt_vec2 filtered_direction; 163 + 164 + m_filter_euro_vec2_run_no_commit(&entry->m_filter_center, htv->htd->current_frame_timestamp, 165 + &unfiltered_middle, &filtered_middle); 166 + m_filter_euro_vec2_run_no_commit(&entry->m_filter_direction, htv->htd->current_frame_timestamp, 167 + &unfiltered_direction, &filtered_direction); 168 + 169 + rotatedRectFromJoints(htv, filtered_middle, filtered_direction, &blah[i]); 170 + 171 + warpAffine(raw_input, hand_rect, blah[i].warp_there, hand_rect.size()); 172 + 173 + await_list_of_hand_in_bbox.push_back( 174 + std::async(std::launch::async, htd->views[view].run_keypoint_model, &htd->views[view], hand_rect)); 175 + } 176 + 177 + // cut here 178 + 179 + for (size_t i = 0; i < htv->bbox_histories.size(); i++) { 180 + 181 + Hand2D in_bbox = await_list_of_hand_in_bbox[i].get(); 182 + 183 + cv::Matx23f warp_back = blah[i].warp_back; 184 + 185 + Hand2D in_image_ray_coords; 186 + Hand2D in_image_px_coords; 187 + 188 + for (int i = 0; i < 21; i++) { 189 + struct xrt_vec3 vec = in_bbox.kps[i]; 190 + 191 + #if 1 192 + xrt_vec3 rr = transformVecBy2x3(vec, warp_back); 193 + rr.z = vec.z; 194 + #else 195 + xrt_vec3 rr; 196 + rr.x = (vec.x * warp_back(0, 0)) + (vec.y * warp_back(0, 1)) + warp_back(0, 2); 197 + rr.y = (vec.x * warp_back(1, 0)) + (vec.y * warp_back(1, 1)) + warp_back(1, 2); 198 + rr.z = vec.z; 199 + #endif 200 + in_image_px_coords.kps[i] = rr; 201 + 202 + in_image_ray_coords.kps[i] = raycoord(htv, rr); 203 + if (htd->debug_scribble && htd->dynamic_config.scribble_2d_keypoints) { 204 + handDot(htv->debug_out_to_this, {rr.x, rr.y}, fmax((-vec.z + 100 - 20) * .08, 2), 205 + ((float)i) / 21.0f, 0.95f, cv::FILLED); 206 + } 207 + } 208 + xrt_vec2 wrist_in_px_coords = {in_image_px_coords.kps[WRIST].x, in_image_px_coords.kps[WRIST].y}; 209 + xrt_vec2 index_in_px_coords = {in_image_px_coords.kps[INDX_PXM].x, in_image_px_coords.kps[INDX_PXM].y}; 210 + xrt_vec2 middle_in_px_coords = {in_image_px_coords.kps[MIDL_PXM].x, in_image_px_coords.kps[MIDL_PXM].y}; 211 + xrt_vec2 little_in_px_coords = {in_image_px_coords.kps[LITL_PXM].x, in_image_px_coords.kps[LITL_PXM].y}; 212 + xrt_vec2 dontuse; 213 + 214 + xrt_vec2 unfiltered_middle, unfiltered_direction; 215 + 216 + centerAndRotationFromJoints(htv, &wrist_in_px_coords, &index_in_px_coords, &middle_in_px_coords, 217 + &little_in_px_coords, &unfiltered_middle, &unfiltered_direction); 218 + 219 + m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_center, htv->htd->current_frame_timestamp, 220 + &unfiltered_middle, &dontuse); 221 + 222 + m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_direction, htv->htd->current_frame_timestamp, 223 + &unfiltered_direction, &dontuse); 224 + 225 + output.push_back(in_image_ray_coords); 226 + } 227 + return output; 228 + } 229 + 230 + #if defined(EXPERIMENTAL_DATASET_RECORDING) 231 + 232 + static void 233 + jsonAddJoint(cJSON *into_this, xrt_pose loc, const char *name) 234 + { 235 + 236 + cJSON *container = cJSON_CreateObject(); 237 + cJSON *joint_loc = cJSON_CreateArray(); 238 + cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.x)); 239 + cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.y)); 240 + cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.z)); 241 + 242 + cJSON_AddItemToObject(container, "position", joint_loc); 243 + 244 + cJSON *joint_rot = cJSON_CreateArray(); 245 + 246 + 247 + cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.x)); 248 + cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.y)); 249 + cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.z)); 250 + cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.w)); 251 + 252 + cJSON_AddItemToObject(container, "rotation_quat_xyzw", joint_rot); 253 + 254 + cJSON_AddItemToObject(into_this, name, container); 255 + } 256 + 257 + void 258 + jsonMaybeAddSomeHands(struct ht_device *htd, bool err) 259 + { 260 + if (!htd->tracking_should_record_dataset) { 261 + return; 262 + } 263 + cJSON *j_this_frame = cJSON_CreateObject(); 264 + cJSON_AddItemToObject(j_this_frame, "seq_since_start", cJSON_CreateNumber(htd->gst.current_index)); 265 + cJSON_AddItemToObject(j_this_frame, "seq_src", cJSON_CreateNumber(htd->frame_for_process->source_sequence)); 266 + cJSON_AddItemToObject(j_this_frame, "ts", cJSON_CreateNumber(htd->gst.last_frame_ns)); 267 + 268 + cJSON *j_hands_in_frame = cJSON_AddArrayToObject(j_this_frame, "detected_hands"); 269 + if (!err) { 270 + for (size_t idx_hand = 0; idx_hand < htd->histories_3d.size(); idx_hand++) { 271 + cJSON *j_hand_in_frame = cJSON_CreateObject(); 272 + 273 + cJSON *j_uuid = cJSON_CreateNumber(htd->histories_3d[idx_hand].uuid); 274 + cJSON_AddItemToObject(j_hand_in_frame, "uuid", j_uuid); 275 + 276 + cJSON *j_handedness = cJSON_CreateNumber(htd->histories_3d[idx_hand].handedness); 277 + cJSON_AddItemToObject(j_hand_in_frame, "handedness", j_handedness); 278 + 279 + static const char *keys[21] = { 280 + "WRIST", 281 + 282 + "THMB_MCP", "THMB_PXM", "THMB_DST", "THMB_TIP", 283 + 284 + "INDX_PXM", "INDX_INT", "INDX_DST", "INDX_TIP", 285 + 286 + "MIDL_PXM", "MIDL_INT", "MIDL_DST", "MIDL_TIP", 287 + 288 + "RING_PXM", "RING_INT", "RING_DST", "RING_TIP", 289 + 290 + "LITL_PXM", "LITL_INT", "LITL_DST", "LITL_TIP", 291 + }; 292 + 293 + for (int idx_joint = 0; idx_joint < 21; idx_joint++) { 294 + // const char* key = keys[idx_joint]; 295 + cJSON *j_vec3 = cJSON_AddArrayToObject(j_hand_in_frame, keys[idx_joint]); 296 + cJSON_AddItemToArray( 297 + j_vec3, 298 + cJSON_CreateNumber( 299 + htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].x)); 300 + cJSON_AddItemToArray( 301 + j_vec3, 302 + cJSON_CreateNumber( 303 + htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].y)); 304 + cJSON_AddItemToArray( 305 + j_vec3, 306 + cJSON_CreateNumber( 307 + htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].z)); 308 + } 309 + 310 + 311 + cJSON_AddItemToArray(j_hands_in_frame, j_hand_in_frame); 312 + } 313 + } 314 + cJSON_AddItemToArray(htd->gst.output_array, j_this_frame); 315 + } 316 + 317 + #endif 318 + 319 + 320 + 321 + static void 322 + htExitFrame(struct ht_device *htd, 323 + bool err, 324 + struct xrt_hand_joint_set final_hands_ordered_by_handedness[2], 325 + uint64_t timestamp) 326 + { 327 + 328 + os_mutex_lock(&htd->openxr_hand_data_mediator); 329 + if (err) { 330 + htd->hands_for_openxr[0].is_active = false; 331 + htd->hands_for_openxr[1].is_active = false; 332 + } else { 333 + memcpy(&htd->hands_for_openxr[0], &final_hands_ordered_by_handedness[0], 334 + sizeof(struct xrt_hand_joint_set)); 335 + memcpy(&htd->hands_for_openxr[1], &final_hands_ordered_by_handedness[1], 336 + sizeof(struct xrt_hand_joint_set)); 337 + htd->hands_for_openxr_timestamp = timestamp; 338 + HT_DEBUG(htd, "Adding ts %zu", htd->hands_for_openxr_timestamp); 339 + } 340 + os_mutex_unlock(&htd->openxr_hand_data_mediator); 341 + #ifdef EXPERIMENTAL_DATASET_RECORDING 342 + if (htd->tracking_should_record_dataset) { 343 + // Add nothing-entry to json file. 344 + jsonMaybeAddSomeHands(htd, err); 345 + htd->gst.current_index++; 346 + } 347 + #endif 348 + } 349 + 350 + 351 + static void 352 + htJointDisparityMath(struct ht_device *htd, Hand2D *hand_in_left, Hand2D *hand_in_right, Hand3D *out_hand) 353 + { 354 + for (int i = 0; i < 21; i++) { 355 + // Believe it or not, this is where the 3D stuff happens! 356 + float t = htd->baseline / (hand_in_left->kps[i].x - hand_in_right->kps[i].x); 357 + 358 + out_hand->kps[i].z = -t; 359 + 360 + out_hand->kps[i].x = (hand_in_left->kps[i].x * t); 361 + out_hand->kps[i].y = -hand_in_left->kps[i].y * t; 362 + 363 + out_hand->kps[i].x += htd->baseline + (hand_in_right->kps[i].x * t); 364 + out_hand->kps[i].y += -hand_in_right->kps[i].y * t; 365 + 366 + out_hand->kps[i].x *= .5; 367 + out_hand->kps[i].y *= .5; 368 + } 369 + } 370 + int64_t last_frame, this_frame; 371 + 372 + void 373 + htRunAlgorithm(struct ht_device *htd) 374 + { 375 + XRT_TRACE_MARKER(); 376 + 377 + #ifdef EXPERIMENTAL_DATASET_RECORDING 378 + 379 + if (htd->tracking_should_record_dataset) { 380 + U_LOG_E("PUSHING!"); 381 + uint64_t start = os_monotonic_get_ns(); 382 + xrt_sink_push_frame(htd->gst.sink, htd->frame_for_process); 383 + uint64_t end = os_monotonic_get_ns(); 384 + 385 + if ((end - start) > 0.1 * U_TIME_1MS_IN_NS) { 386 + U_LOG_E("Encoder overloaded!"); 387 + } 388 + 389 + htd->gst.offset_ns = gstreamer_sink_get_timestamp_offset(htd->gst.gs); 390 + htd->gst.last_frame_ns = htd->frame_for_process->timestamp - htd->gst.offset_ns; 391 + } 392 + #endif 393 + 394 + htd->current_frame_timestamp = htd->frame_for_process->timestamp; 395 + 396 + int64_t start, end; 397 + start = os_monotonic_get_ns(); 398 + 399 + 400 + /* 401 + * Setup views. 402 + */ 403 + 404 + const int full_width = htd->frame_for_process->width; 405 + const int full_height = htd->frame_for_process->height; 406 + const int view_width = htd->camera.one_view_size_px.w; 407 + const int view_height = htd->camera.one_view_size_px.h; 408 + 409 + // assert(full_width == view_width * 2); 410 + assert(full_height == view_height); 411 + 412 + const cv::Size full_size = cv::Size(full_width, full_height); 413 + const cv::Size view_size = cv::Size(view_width, view_height); 414 + const cv::Point view_offsets[2] = {cv::Point(0, 0), cv::Point(view_width, 0)}; 415 + 416 + cv::Mat full_frame(full_size, CV_8UC3, htd->frame_for_process->data, htd->frame_for_process->stride); 417 + htd->views[0].run_model_on_this = full_frame(cv::Rect(view_offsets[0], view_size)); 418 + htd->views[1].run_model_on_this = full_frame(cv::Rect(view_offsets[1], view_size)); 419 + 420 + htd->mat_for_process = &full_frame; 421 + 422 + // Check this every frame. We really, really, really don't want it to ever suddenly be null. 423 + htd->debug_scribble = htd->debug_sink.sink != nullptr; 424 + 425 + cv::Mat debug_output = {}; 426 + xrt_frame *debug_frame = nullptr; // only use if htd->debug_scribble 427 + 428 + if (htd->debug_scribble) { 429 + u_frame_clone(htd->frame_for_process, &debug_frame); 430 + debug_output = cv::Mat(full_size, CV_8UC3, debug_frame->data, debug_frame->stride); 431 + htd->views[0].debug_out_to_this = debug_output(cv::Rect(view_offsets[0], view_size)); 432 + htd->views[1].debug_out_to_this = debug_output(cv::Rect(view_offsets[1], view_size)); 433 + } 434 + 435 + 436 + /* 437 + * Do the hand tracking! 438 + */ 439 + 440 + std::future<std::vector<Hand2D>> future_left = 441 + std::async(std::launch::async, htImageToKeypoints, &htd->views[0]); 442 + std::future<std::vector<Hand2D>> future_right = 443 + std::async(std::launch::async, htImageToKeypoints, &htd->views[1]); 444 + std::vector<Hand2D> hands_in_left_view = future_left.get(); 445 + std::vector<Hand2D> hands_in_right_view = future_right.get(); 446 + 447 + end = os_monotonic_get_ns(); 448 + 449 + 450 + this_frame = os_monotonic_get_ns(); 451 + 452 + double time_ms = (double)(end - start) / (double)U_TIME_1MS_IN_NS; 453 + double _1_time = 1 / (time_ms * 0.001); 454 + 455 + char t[64]; 456 + char t2[64]; 457 + sprintf(t, "% 8.2f ms", time_ms); 458 + sprintf(t2, "% 8.2f fps", _1_time); 459 + last_frame = this_frame; 460 + 461 + 462 + if (htd->debug_scribble) { 463 + cv::putText(debug_output, t, cv::Point(30, 60), cv::FONT_HERSHEY_SIMPLEX, 1.0f, cv::Scalar(0, 255, 0), 464 + 4); 465 + cv::putText(debug_output, t2, cv::Point(30, 100), cv::FONT_HERSHEY_SIMPLEX, 1.0f, cv::Scalar(0, 255, 0), 466 + 4); 467 + } else { 468 + HT_DEBUG(htd, "%s", t); 469 + HT_DEBUG(htd, "%s", t2); 470 + } 471 + 472 + 473 + // Convenience 474 + uint64_t timestamp = htd->frame_for_process->timestamp; 475 + 476 + if (htd->debug_scribble) { 477 + u_sink_debug_push_frame(&htd->debug_sink, debug_frame); 478 + xrt_frame_reference(&debug_frame, NULL); 479 + } 480 + 481 + // Bail early this frame if no hands were detected. 482 + // In the long run, this'll be a silly thing - we shouldn't always take the detection model's word for it 483 + // especially when part of the pipeline is an arbitrary confidence threshold. 484 + if (hands_in_left_view.size() == 0 || hands_in_right_view.size() == 0) { 485 + htExitFrame(htd, true, NULL, 0); 486 + return; 487 + } 488 + 489 + 490 + 491 + std::vector<Hand3D> possible_3d_hands; 492 + 493 + // for every possible combination of hands in left view and hands in right view, 494 + for (size_t idx_l = 0; idx_l < hands_in_left_view.size(); idx_l++) { 495 + for (size_t idx_r = 0; idx_r < hands_in_right_view.size(); idx_r++) { 496 + Hand3D cur_hand = {}; 497 + 498 + Hand2D &left_2d = hands_in_left_view[idx_l]; 499 + Hand2D &right_2d = hands_in_right_view[idx_r]; 500 + 501 + // Calculate a 3D hand for this combination 502 + htJointDisparityMath(htd, &hands_in_left_view[idx_l], &hands_in_right_view[idx_r], &cur_hand); 503 + cur_hand.timestamp = timestamp; 504 + cur_hand.rejected_by_smush = false; 505 + 506 + cur_hand.idx_l = idx_l; 507 + cur_hand.idx_r = idx_r; 508 + 509 + // Calculate a y-disparity for this combination 510 + cur_hand.y_disparity_error = errHandDisparity(&left_2d, &right_2d); 511 + 512 + possible_3d_hands.push_back(cur_hand); 513 + } 514 + } 515 + 516 + HT_DEBUG(htd, "Starting with %zu hands!", possible_3d_hands.size()); 517 + 518 + // For each pair of 3D hands we just made 519 + for (size_t idx_one = 0; idx_one < possible_3d_hands.size(); idx_one++) { 520 + for (size_t idx_two = 0; idx_two < possible_3d_hands.size(); idx_two++) { 521 + if ((idx_one <= idx_two)) { 522 + continue; 523 + } 524 + 525 + // See if this pair is suspiciously close together. 526 + // If it is, then this pairing is wrong - this is what was causing the "hands smushing together" 527 + // issue - we weren't catching these reliably. 528 + float errr = sumOfHandJointDistances(&possible_3d_hands[idx_one], &possible_3d_hands[idx_two]); 529 + HT_TRACE(htd, "%zu %zu is smush %f", idx_one, idx_two, errr); 530 + if (errr < 0.03f * 21.0f) { 531 + possible_3d_hands[idx_one].rejected_by_smush = true; 532 + possible_3d_hands[idx_two].rejected_by_smush = true; 533 + } 534 + } 535 + } 536 + 537 + std::vector<Hand3D> hands_unfiltered; 538 + 539 + for (Hand3D hand : possible_3d_hands) { 540 + // If none of these are false, then all our heuristics indicate this is a real hand, so we add it to our 541 + // list of real hands. 542 + bool selected = !hand.rejected_by_smush && // 543 + hand.y_disparity_error < 1.0f && // 544 + rejectTooClose(htd, &hand) && // 545 + rejectTooFar(htd, &hand) && // 546 + rejectTinyPalm(htd, &hand); 547 + if (selected) { 548 + HT_TRACE(htd, "Pushing back with y-error %f", hand.y_disparity_error); 549 + hands_unfiltered.push_back(hand); 550 + } 551 + } 552 + 553 + 554 + std::vector<bool> past_hands_taken; 555 + std::vector<bool> present_hands_taken; 556 + 557 + std::vector<size_t> past_indices; 558 + std::vector<size_t> present_indices; 559 + std::vector<float> flow_errors; 560 + 561 + 562 + float max_dist_between_frames = 1.0f; 563 + 564 + naive_sort_permutation_by_error<HandHistory3D, Hand3D>(htd->histories_3d, // past 565 + hands_unfiltered, // present 566 + 567 + 568 + // outputs 569 + past_hands_taken, present_hands_taken, past_indices, 570 + present_indices, flow_errors, errHandHistory, 571 + (max_dist_between_frames * 21.0f) 572 + 573 + ); 574 + 575 + 576 + for (size_t i = 0; i < past_indices.size(); i++) { 577 + htd->histories_3d[past_indices[i]].last_hands_unfiltered.push(hands_unfiltered[present_indices[i]]); 578 + } 579 + // The above may not do anything, because we'll start out with no hand histories! All the numbers of elements 580 + // should be zero. 581 + 582 + 583 + for (size_t i = 0; i < present_hands_taken.size(); i++) { 584 + if (present_hands_taken[i] == false) { 585 + // if this hand never got assigned to a history 586 + HandHistory3D history_new; 587 + history_new.uuid = rand(); // Not a great uuid, huh? Good enough for us, this only has to be 588 + // unique across say an hour period max. 589 + handEuroFiltersInit(&history_new, FCMIN_HAND, FCMIN_D_HAND, BETA_HAND); 590 + history_new.last_hands_unfiltered.push(hands_unfiltered[i]); 591 + // history_new. 592 + htd->histories_3d.push_back( 593 + history_new); // Add something to the end - don't initialize any of it. 594 + } 595 + } 596 + 597 + int bob = 0; 598 + for (size_t i = 0; i < past_hands_taken.size(); i++) { 599 + if (past_hands_taken[i] == false) { 600 + htd->histories_3d.erase(htd->histories_3d.begin() + i + bob); 601 + bob--; 602 + } 603 + } 604 + 605 + if (htd->histories_3d.size() == 0) { 606 + HT_DEBUG(htd, "Bailing"); 607 + htExitFrame(htd, true, NULL, 0); 608 + return; 609 + } 610 + 611 + size_t num_hands = htd->histories_3d.size(); 612 + // if (num_hands > 2) { 613 + HT_DEBUG(htd, "Ending with %zu hands!", 614 + num_hands); // this is quite bad, but rarely happens. 615 + // } 616 + 617 + // Here, we go back to our bbox_histories and remove the histories for any bounding boxes that never turned into 618 + // good hands. 619 + 620 + // Iterate over all hands we're keeping track of, compute their current handedness. 621 + std::vector<size_t> valid_2d_idxs[2]; 622 + 623 + 624 + for (size_t i = 0; i < htd->histories_3d.size(); i++) { 625 + // U_LOG_E("Valid hand %zu l_idx %i r_idx %i", i, htd->histories_3d[i].last_hands[0]->idx_l, 626 + // htd->histories_3d[i].last_hands[0]->idx_r); 627 + valid_2d_idxs[0].push_back(htd->histories_3d[i].last_hands_unfiltered[0]->idx_l); 628 + valid_2d_idxs[1].push_back(htd->histories_3d[i].last_hands_unfiltered[0]->idx_r); 629 + handednessHandHistory3D(&htd->histories_3d[i]); 630 + } 631 + 632 + // Almost certainly not the cleanest way of doing this but leave me alone 633 + // Per camera view 634 + for (int view = 0; view < 2; view++) { 635 + // Per entry in bbox_histories 636 + for (size_t hist_idx = 0; hist_idx < htd->views[view].bbox_histories.size(); hist_idx++) { 637 + // See if this entry in bbox_histories ever turned into a 3D hand. If not, we notify (in a very 638 + // silly way) htImageToKeypoints that it should go away because it was an erroneous detection. 639 + for (size_t valid_idx : valid_2d_idxs[view]) { 640 + if (valid_idx == hist_idx) { 641 + htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = true; 642 + break; 643 + } else { 644 + htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = false; 645 + } 646 + } 647 + } 648 + } 649 + 650 + // Whoo! Okay, now we have some unfiltered hands in htd->histories_3d[i].last_hands[0]! Euro filter them! 651 + 652 + std::vector<Hand3D> filtered_hands(num_hands); 653 + 654 + for (size_t hand_index = 0; hand_index < num_hands; hand_index++) { 655 + handEuroFiltersRun(htd, &htd->histories_3d[hand_index], &filtered_hands[hand_index]); 656 + htd->histories_3d[hand_index].last_hands_filtered.push(filtered_hands[hand_index]); 657 + applyThumbIndexDrag(&filtered_hands[hand_index]); 658 + filtered_hands[hand_index].handedness = htd->histories_3d[hand_index].handedness; 659 + } 660 + 661 + std::vector<size_t> xr_indices; 662 + std::vector<Hand3D *> hands_to_use; 663 + 664 + if (filtered_hands.size() == 1) { 665 + if (filtered_hands[0].handedness < 0) { 666 + // Left 667 + xr_indices = {0}; 668 + hands_to_use = {&filtered_hands[0]}; 669 + } else { 670 + xr_indices = {1}; 671 + hands_to_use = {&filtered_hands[0]}; 672 + } 673 + } else { 674 + // filtered_hands better be two for now. 675 + if (filtered_hands[0].handedness < filtered_hands[1].handedness) { 676 + xr_indices = {0, 1}; 677 + hands_to_use = {&filtered_hands[0], &filtered_hands[1]}; 678 + } else { 679 + xr_indices = {1, 0}; 680 + hands_to_use = {&filtered_hands[0], &filtered_hands[1]}; 681 + } 682 + } 683 + 684 + struct xrt_hand_joint_set final_hands_ordered_by_handedness[2]; 685 + memset(&final_hands_ordered_by_handedness[0], 0, sizeof(xrt_hand_joint_set)); 686 + memset(&final_hands_ordered_by_handedness[1], 0, sizeof(xrt_hand_joint_set)); 687 + final_hands_ordered_by_handedness[0].is_active = false; 688 + final_hands_ordered_by_handedness[1].is_active = false; 689 + 690 + for (size_t i = 0; (i < xr_indices.size()); i++) { 691 + Hand3D *hand = hands_to_use[i]; 692 + 693 + struct xrt_hand_joint_set *put_in_set = &final_hands_ordered_by_handedness[xr_indices[i]]; 694 + 695 + xrt_vec3 wrist = hand->kps[0]; 696 + 697 + xrt_vec3 index_prox = hand->kps[5]; 698 + xrt_vec3 middle_prox = hand->kps[9]; 699 + xrt_vec3 ring_prox = hand->kps[13]; 700 + xrt_vec3 pinky_prox = hand->kps[17]; 701 + 702 + xrt_vec3 middle_to_index = m_vec3_sub(index_prox, middle_prox); 703 + xrt_vec3 middle_to_ring = m_vec3_sub(ring_prox, middle_prox); 704 + xrt_vec3 middle_to_pinky = m_vec3_sub(pinky_prox, middle_prox); 705 + 706 + xrt_vec3 three_fourths_down_middle_mcp = 707 + m_vec3_add(m_vec3_mul_scalar(wrist, 3.0f / 4.0f), m_vec3_mul_scalar(middle_prox, 1.0f / 4.0f)); 708 + 709 + xrt_vec3 middle_metacarpal = three_fourths_down_middle_mcp; 710 + 711 + float s = 0.6f; 712 + 713 + xrt_vec3 index_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_index, s); 714 + xrt_vec3 ring_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_ring, s); 715 + xrt_vec3 pinky_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_pinky, s); 716 + 717 + float palm_ness = 0.33; 718 + xrt_vec3 palm = 719 + m_vec3_add(m_vec3_mul_scalar(wrist, palm_ness), m_vec3_mul_scalar(middle_prox, (1.0f - palm_ness))); 720 + 721 + 722 + 723 + htProcessJoint(htd, palm, put_in_set, XRT_HAND_JOINT_PALM); 724 + 725 + htProcessJoint(htd, hand->kps[0], put_in_set, XRT_HAND_JOINT_WRIST); 726 + htProcessJoint(htd, hand->kps[1], put_in_set, XRT_HAND_JOINT_THUMB_METACARPAL); 727 + htProcessJoint(htd, hand->kps[2], put_in_set, XRT_HAND_JOINT_THUMB_PROXIMAL); 728 + htProcessJoint(htd, hand->kps[3], put_in_set, XRT_HAND_JOINT_THUMB_DISTAL); 729 + htProcessJoint(htd, hand->kps[4], put_in_set, XRT_HAND_JOINT_THUMB_TIP); 730 + 731 + htProcessJoint(htd, index_metacarpal, put_in_set, XRT_HAND_JOINT_INDEX_METACARPAL); 732 + htProcessJoint(htd, hand->kps[5], put_in_set, XRT_HAND_JOINT_INDEX_PROXIMAL); 733 + htProcessJoint(htd, hand->kps[6], put_in_set, XRT_HAND_JOINT_INDEX_INTERMEDIATE); 734 + htProcessJoint(htd, hand->kps[7], put_in_set, XRT_HAND_JOINT_INDEX_DISTAL); 735 + htProcessJoint(htd, hand->kps[8], put_in_set, XRT_HAND_JOINT_INDEX_TIP); 736 + 737 + htProcessJoint(htd, middle_metacarpal, put_in_set, XRT_HAND_JOINT_MIDDLE_METACARPAL); 738 + htProcessJoint(htd, hand->kps[9], put_in_set, XRT_HAND_JOINT_MIDDLE_PROXIMAL); 739 + htProcessJoint(htd, hand->kps[10], put_in_set, XRT_HAND_JOINT_MIDDLE_INTERMEDIATE); 740 + htProcessJoint(htd, hand->kps[11], put_in_set, XRT_HAND_JOINT_MIDDLE_DISTAL); 741 + htProcessJoint(htd, hand->kps[12], put_in_set, XRT_HAND_JOINT_MIDDLE_TIP); 742 + 743 + htProcessJoint(htd, ring_metacarpal, put_in_set, XRT_HAND_JOINT_RING_METACARPAL); 744 + htProcessJoint(htd, hand->kps[13], put_in_set, XRT_HAND_JOINT_RING_PROXIMAL); 745 + htProcessJoint(htd, hand->kps[14], put_in_set, XRT_HAND_JOINT_RING_INTERMEDIATE); 746 + htProcessJoint(htd, hand->kps[15], put_in_set, XRT_HAND_JOINT_RING_DISTAL); 747 + htProcessJoint(htd, hand->kps[16], put_in_set, XRT_HAND_JOINT_RING_TIP); 748 + 749 + htProcessJoint(htd, pinky_metacarpal, put_in_set, XRT_HAND_JOINT_LITTLE_METACARPAL); 750 + htProcessJoint(htd, hand->kps[17], put_in_set, XRT_HAND_JOINT_LITTLE_PROXIMAL); 751 + htProcessJoint(htd, hand->kps[18], put_in_set, XRT_HAND_JOINT_LITTLE_INTERMEDIATE); 752 + htProcessJoint(htd, hand->kps[19], put_in_set, XRT_HAND_JOINT_LITTLE_DISTAL); 753 + htProcessJoint(htd, hand->kps[20], put_in_set, XRT_HAND_JOINT_LITTLE_TIP); 754 + 755 + put_in_set->is_active = true; 756 + math_pose_identity(&put_in_set->hand_pose.pose); 757 + 758 + 759 + put_in_set->hand_pose.pose.orientation = htd->stereo_camera_to_left_camera; 760 + 761 + put_in_set->hand_pose.relation_flags = valid_flags_ht; 762 + 763 + applyJointWidths(put_in_set); 764 + applyJointOrientations(put_in_set, xr_indices[i]); 765 + } 766 + 767 + htExitFrame(htd, false, final_hands_ordered_by_handedness, filtered_hands[0].timestamp); 768 + }
+2 -763
src/xrt/drivers/ht/ht_algorithm.hpp
··· 9 9 10 10 #pragma once 11 11 12 - #include "cjson/cJSON.h" 13 - #include "math/m_filter_one_euro.h" 14 - #include "math/m_vec2.h" 15 - #include "os/os_time.h" 16 - #include "util/u_frame.h" 17 - 18 - #include "templates/NaivePermutationSort.hpp" 19 - 20 - #include "ht_driver.hpp" 21 - #include "ht_models.hpp" 22 - #include "ht_hand_math.hpp" 23 - #include "ht_image_math.hpp" 24 - #include "util/u_time.h" 25 - 26 - #include <opencv2/imgcodecs.hpp> 27 - #include <opencv2/imgproc.hpp> 28 - 29 - 30 - // Flags to tell state tracker that these are indeed valid joints 31 - static enum xrt_space_relation_flags valid_flags_ht = (enum xrt_space_relation_flags)( 32 - XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT | 33 - XRT_SPACE_RELATION_POSITION_VALID_BIT | XRT_SPACE_RELATION_POSITION_TRACKED_BIT); 34 - 35 - 36 - static void 37 - htProcessJoint(struct ht_device *htd, 38 - struct xrt_vec3 model_out, 39 - struct xrt_hand_joint_set *hand, 40 - enum xrt_hand_joint idx) 41 - { 42 - hand->values.hand_joint_set_default[idx].relation.relation_flags = valid_flags_ht; 43 - hand->values.hand_joint_set_default[idx].relation.pose.position.x = model_out.x; 44 - hand->values.hand_joint_set_default[idx].relation.pose.position.y = model_out.y; 45 - hand->values.hand_joint_set_default[idx].relation.pose.position.z = model_out.z; 46 - } 47 - 48 - static float 49 - errHistory2D(HandHistory2DBBox *past, Palm7KP *present) 50 - { 51 - if (!past->htAlgorithm_approves) { 52 - // U_LOG_E("Returning big number because htAlgorithm told me to!"); 53 - return 100000000000000000000000000000.0f; 54 - } 55 - float sum_of_lengths = m_vec2_len(*past->wrist_unfiltered[0] - *past->middle_unfiltered[0]) + 56 - m_vec2_len(present->kps[WRIST_7KP] - present->kps[MIDDLE_7KP]); 57 - 58 - float sum_of_distances = (m_vec2_len(*past->wrist_unfiltered[0] - present->kps[WRIST_7KP]) + 59 - m_vec2_len(*past->middle_unfiltered[0] - present->kps[MIDDLE_7KP])); 60 - 61 - 62 - float final = sum_of_distances / sum_of_lengths; 63 - 64 - return final; 65 - } 66 - 67 - static std::vector<Hand2D> 68 - htImageToKeypoints(struct ht_view *htv) 69 - { 70 - int view = htv->view; 71 - struct ht_device *htd = htv->htd; 72 - 73 - 74 - cv::Mat raw_input = htv->run_model_on_this; 75 - 76 - // Get a list of palms - drop confidences and ssd bounding boxes, just keypoints. 77 - 78 - std::vector<Palm7KP> hand_detections = htv->run_detection_model(htv, raw_input); 79 - 80 - std::vector<bool> used_histories; 81 - std::vector<bool> used_detections; 82 - 83 - std::vector<size_t> history_indices; 84 - std::vector<size_t> detection_indices; 85 - std::vector<float> dontuse; 86 - 87 - 88 - // Strategy here is: We have a big list of palms. Match 'em up to previous palms. 89 - naive_sort_permutation_by_error<HandHistory2DBBox, Palm7KP>(htv->bbox_histories, hand_detections, 90 - 91 - // bools 92 - used_histories, used_detections, 93 - 94 - history_indices, detection_indices, dontuse, 95 - errHistory2D, 1.0f); 96 - 97 - // Here's the trick - we use the associated bbox_filter to get an output but *never commit* the noisy 128x128 98 - // detection; instead later on we commit the (hopefully) nicer palm and wrist from the 224x224 keypoint 99 - // estimation. 100 - 101 - // Add extra detections! 102 - for (size_t i = 0; i < used_detections.size(); i++) { 103 - if ((used_detections[i] == false) && hand_detections[i].confidence > 0.65) { 104 - // Confidence to get in the door is 0.65, confidence to stay in is 0.3 105 - HandHistory2DBBox hist_new = {}; 106 - m_filter_euro_vec2_init(&hist_new.m_filter_center, FCMIN_BBOX_POSITION, FCMIN_D_BB0X_POSITION, 107 - BETA_BB0X_POSITION); 108 - m_filter_euro_vec2_init(&hist_new.m_filter_direction, FCMIN_BBOX_ORIENTATION, 109 - FCMIN_D_BB0X_ORIENTATION, BETA_BB0X_ORIENTATION); 110 - 111 - htv->bbox_histories.push_back(hist_new); 112 - history_indices.push_back(htv->bbox_histories.size() - 1); 113 - detection_indices.push_back(i); 114 - } 115 - } 116 - 117 - // Do the things for each active bbox history! 118 - for (size_t i = 0; i < history_indices.size(); i++) { 119 - HandHistory2DBBox *hist_of_interest = &htv->bbox_histories[history_indices[i]]; 120 - hist_of_interest->wrist_unfiltered.push(hand_detections[detection_indices[i]].kps[WRIST_7KP]); 121 - hist_of_interest->index_unfiltered.push(hand_detections[detection_indices[i]].kps[INDEX_7KP]); 122 - hist_of_interest->middle_unfiltered.push(hand_detections[detection_indices[i]].kps[MIDDLE_7KP]); 123 - hist_of_interest->pinky_unfiltered.push(hand_detections[detection_indices[i]].kps[LITTLE_7KP]); 124 - // Eh do the rest later 125 - } 126 - 127 - // Prune stale detections! (After we don't need {history,detection}_indices to be correct) 128 - int bob = 0; 129 - for (size_t i = 0; i < used_histories.size(); i++) { 130 - if (used_histories[i] == false) { 131 - // history never got assigned a present hand to it. treat it as stale delete it. 132 - 133 - HT_TRACE(htv->htd, "Removing bbox from history!\n"); 134 - htv->bbox_histories.erase(htv->bbox_histories.begin() + i + bob); 135 - bob--; 136 - } 137 - } 138 - if (htv->bbox_histories.size() == 0) { 139 - return {}; // bail early 140 - } 141 - 142 - 143 - 144 - std::vector<Hand2D> list_of_hands_in_bbox( 145 - htv->bbox_histories.size()); // all of these are same size as htv->bbox_histories 146 - 147 - std::vector<std::future<Hand2D>> await_list_of_hand_in_bbox; //(htv->bbox_histories.size()); 148 - 149 - std::vector<DetectionModelOutput> blah(htv->bbox_histories.size()); 150 - 151 - std::vector<Hand2D> output; 152 - 153 - if (htv->bbox_histories.size() > 2) { 154 - HT_DEBUG(htd, "More than two hands (%zu) in 2D view %i", htv->bbox_histories.size(), htv->view); 155 - } 156 - 157 - 158 - for (size_t i = 0; i < htv->bbox_histories.size(); i++) { //(BBoxHistory * entry : htv->bbox_histories) { 159 - HandHistory2DBBox *entry = &htv->bbox_histories[i]; 160 - cv::Mat hand_rect = cv::Mat(224, 224, CV_8UC3); 161 - xrt_vec2 unfiltered_middle; 162 - xrt_vec2 unfiltered_direction; 163 - 164 - 165 - centerAndRotationFromJoints(htv, entry->wrist_unfiltered[0], entry->index_unfiltered[0], 166 - entry->middle_unfiltered[0], entry->pinky_unfiltered[0], &unfiltered_middle, 167 - &unfiltered_direction); 168 - 169 - xrt_vec2 filtered_middle; 170 - xrt_vec2 filtered_direction; 171 - 172 - m_filter_euro_vec2_run_no_commit(&entry->m_filter_center, htv->htd->current_frame_timestamp, 173 - &unfiltered_middle, &filtered_middle); 174 - m_filter_euro_vec2_run_no_commit(&entry->m_filter_direction, htv->htd->current_frame_timestamp, 175 - &unfiltered_direction, &filtered_direction); 176 - 177 - rotatedRectFromJoints(htv, filtered_middle, filtered_direction, &blah[i]); 178 - 179 - warpAffine(raw_input, hand_rect, blah[i].warp_there, hand_rect.size()); 180 - 181 - await_list_of_hand_in_bbox.push_back( 182 - std::async(std::launch::async, htd->views[view].run_keypoint_model, &htd->views[view], hand_rect)); 183 - } 184 - 185 - // cut here 186 - 187 - for (size_t i = 0; i < htv->bbox_histories.size(); i++) { 188 - 189 - Hand2D in_bbox = await_list_of_hand_in_bbox[i].get(); 190 - 191 - cv::Matx23f warp_back = blah[i].warp_back; 192 - 193 - Hand2D in_image_ray_coords; 194 - Hand2D in_image_px_coords; 195 - 196 - for (int i = 0; i < 21; i++) { 197 - struct xrt_vec3 vec = in_bbox.kps[i]; 198 - 199 - #if 1 200 - xrt_vec3 rr = transformVecBy2x3(vec, warp_back); 201 - rr.z = vec.z; 202 - #else 203 - xrt_vec3 rr; 204 - rr.x = (vec.x * warp_back(0, 0)) + (vec.y * warp_back(0, 1)) + warp_back(0, 2); 205 - rr.y = (vec.x * warp_back(1, 0)) + (vec.y * warp_back(1, 1)) + warp_back(1, 2); 206 - rr.z = vec.z; 207 - #endif 208 - in_image_px_coords.kps[i] = rr; 209 - 210 - in_image_ray_coords.kps[i] = raycoord(htv, rr); 211 - if (htd->debug_scribble && htd->dynamic_config.scribble_2d_keypoints) { 212 - handDot(htv->debug_out_to_this, {rr.x, rr.y}, fmax((-vec.z + 100 - 20) * .08, 2), 213 - ((float)i) / 21.0f, 0.95f, cv::FILLED); 214 - } 215 - } 216 - xrt_vec2 wrist_in_px_coords = {in_image_px_coords.kps[WRIST].x, in_image_px_coords.kps[WRIST].y}; 217 - xrt_vec2 index_in_px_coords = {in_image_px_coords.kps[INDX_PXM].x, in_image_px_coords.kps[INDX_PXM].y}; 218 - xrt_vec2 middle_in_px_coords = {in_image_px_coords.kps[MIDL_PXM].x, in_image_px_coords.kps[MIDL_PXM].y}; 219 - xrt_vec2 little_in_px_coords = {in_image_px_coords.kps[LITL_PXM].x, in_image_px_coords.kps[LITL_PXM].y}; 220 - xrt_vec2 dontuse; 221 - 222 - xrt_vec2 unfiltered_middle, unfiltered_direction; 223 - 224 - centerAndRotationFromJoints(htv, &wrist_in_px_coords, &index_in_px_coords, &middle_in_px_coords, 225 - &little_in_px_coords, &unfiltered_middle, &unfiltered_direction); 226 - 227 - m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_center, htv->htd->current_frame_timestamp, 228 - &unfiltered_middle, &dontuse); 229 - 230 - m_filter_euro_vec2_run(&htv->bbox_histories[i].m_filter_direction, htv->htd->current_frame_timestamp, 231 - &unfiltered_direction, &dontuse); 232 - 233 - output.push_back(in_image_ray_coords); 234 - } 235 - return output; 236 - } 237 - 238 - #if defined(EXPERIMENTAL_DATASET_RECORDING) 239 - 240 - static void 241 - jsonAddJoint(cJSON *into_this, xrt_pose loc, const char *name) 242 - { 243 - 244 - cJSON *container = cJSON_CreateObject(); 245 - cJSON *joint_loc = cJSON_CreateArray(); 246 - cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.x)); 247 - cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.y)); 248 - cJSON_AddItemToArray(joint_loc, cJSON_CreateNumber(loc.position.z)); 249 - 250 - cJSON_AddItemToObject(container, "position", joint_loc); 251 - 252 - cJSON *joint_rot = cJSON_CreateArray(); 253 - 254 - 255 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.x)); 256 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.y)); 257 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.z)); 258 - cJSON_AddItemToArray(joint_rot, cJSON_CreateNumber(loc.orientation.w)); 259 - 260 - cJSON_AddItemToObject(container, "rotation_quat_xyzw", joint_rot); 261 - 262 - cJSON_AddItemToObject(into_this, name, container); 263 - } 12 + struct ht_device; 264 13 265 14 void 266 - jsonMaybeAddSomeHands(struct ht_device *htd, bool err) 267 - { 268 - if (!htd->tracking_should_record_dataset) { 269 - return; 270 - } 271 - cJSON *j_this_frame = cJSON_CreateObject(); 272 - cJSON_AddItemToObject(j_this_frame, "seq_since_start", cJSON_CreateNumber(htd->gst.current_index)); 273 - cJSON_AddItemToObject(j_this_frame, "seq_src", cJSON_CreateNumber(htd->frame_for_process->source_sequence)); 274 - cJSON_AddItemToObject(j_this_frame, "ts", cJSON_CreateNumber(htd->gst.last_frame_ns)); 275 - 276 - cJSON *j_hands_in_frame = cJSON_AddArrayToObject(j_this_frame, "detected_hands"); 277 - if (!err) { 278 - for (size_t idx_hand = 0; idx_hand < htd->histories_3d.size(); idx_hand++) { 279 - cJSON *j_hand_in_frame = cJSON_CreateObject(); 280 - 281 - cJSON *j_uuid = cJSON_CreateNumber(htd->histories_3d[idx_hand].uuid); 282 - cJSON_AddItemToObject(j_hand_in_frame, "uuid", j_uuid); 283 - 284 - cJSON *j_handedness = cJSON_CreateNumber(htd->histories_3d[idx_hand].handedness); 285 - cJSON_AddItemToObject(j_hand_in_frame, "handedness", j_handedness); 286 - 287 - static const char *keys[21] = { 288 - "WRIST", 289 - 290 - "THMB_MCP", "THMB_PXM", "THMB_DST", "THMB_TIP", 291 - 292 - "INDX_PXM", "INDX_INT", "INDX_DST", "INDX_TIP", 293 - 294 - "MIDL_PXM", "MIDL_INT", "MIDL_DST", "MIDL_TIP", 295 - 296 - "RING_PXM", "RING_INT", "RING_DST", "RING_TIP", 297 - 298 - "LITL_PXM", "LITL_INT", "LITL_DST", "LITL_TIP", 299 - }; 300 - 301 - for (int idx_joint = 0; idx_joint < 21; idx_joint++) { 302 - // const char* key = keys[idx_joint]; 303 - cJSON *j_vec3 = cJSON_AddArrayToObject(j_hand_in_frame, keys[idx_joint]); 304 - cJSON_AddItemToArray( 305 - j_vec3, 306 - cJSON_CreateNumber( 307 - htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].x)); 308 - cJSON_AddItemToArray( 309 - j_vec3, 310 - cJSON_CreateNumber( 311 - htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].y)); 312 - cJSON_AddItemToArray( 313 - j_vec3, 314 - cJSON_CreateNumber( 315 - htd->histories_3d[idx_hand].last_hands_unfiltered[0]->kps[idx_joint].z)); 316 - } 317 - 318 - 319 - cJSON_AddItemToArray(j_hands_in_frame, j_hand_in_frame); 320 - } 321 - } 322 - cJSON_AddItemToArray(htd->output_array, j_this_frame); 323 - } 324 - 325 - #endif 326 - 327 - 328 - 329 - static void 330 - htExitFrame(struct ht_device *htd, 331 - bool err, 332 - struct xrt_hand_joint_set final_hands_ordered_by_handedness[2], 333 - uint64_t timestamp) 334 - { 335 - 336 - os_mutex_lock(&htd->openxr_hand_data_mediator); 337 - if (err) { 338 - htd->hands_for_openxr[0].is_active = false; 339 - htd->hands_for_openxr[1].is_active = false; 340 - } else { 341 - memcpy(&htd->hands_for_openxr[0], &final_hands_ordered_by_handedness[0], 342 - sizeof(struct xrt_hand_joint_set)); 343 - memcpy(&htd->hands_for_openxr[1], &final_hands_ordered_by_handedness[1], 344 - sizeof(struct xrt_hand_joint_set)); 345 - htd->hands_for_openxr_timestamp = timestamp; 346 - HT_DEBUG(htd, "Adding ts %zu", htd->hands_for_openxr_timestamp); 347 - } 348 - os_mutex_unlock(&htd->openxr_hand_data_mediator); 349 - #ifdef EXPERIMENTAL_DATASET_RECORDING 350 - if (htd->tracking_should_record_dataset) { 351 - // Add nothing-entry to json file. 352 - jsonMaybeAddSomeHands(htd, err); 353 - htd->gst.current_index++; 354 - } 355 - #endif 356 - } 357 - 358 - 359 - static void 360 - htJointDisparityMath(struct ht_device *htd, Hand2D *hand_in_left, Hand2D *hand_in_right, Hand3D *out_hand) 361 - { 362 - for (int i = 0; i < 21; i++) { 363 - // Believe it or not, this is where the 3D stuff happens! 364 - float t = htd->baseline / (hand_in_left->kps[i].x - hand_in_right->kps[i].x); 365 - 366 - out_hand->kps[i].z = -t; 367 - 368 - out_hand->kps[i].x = (hand_in_left->kps[i].x * t); 369 - out_hand->kps[i].y = -hand_in_left->kps[i].y * t; 370 - 371 - out_hand->kps[i].x += htd->baseline + (hand_in_right->kps[i].x * t); 372 - out_hand->kps[i].y += -hand_in_right->kps[i].y * t; 373 - 374 - out_hand->kps[i].x *= .5; 375 - out_hand->kps[i].y *= .5; 376 - } 377 - } 378 - int64_t last_frame, this_frame; 379 - 380 - static void 381 - htRunAlgorithm(struct ht_device *htd) 382 - { 383 - XRT_TRACE_MARKER(); 384 - 385 - #ifdef EXPERIMENTAL_DATASET_RECORDING 386 - 387 - if (htd->tracking_should_record_dataset) { 388 - U_LOG_E("PUSHING!"); 389 - uint64_t start = os_monotonic_get_ns(); 390 - xrt_sink_push_frame(htd->gst.sink, htd->frame_for_process); 391 - uint64_t end = os_monotonic_get_ns(); 392 - 393 - if ((end - start) > 0.1 * U_TIME_1MS_IN_NS) { 394 - U_LOG_E("Encoder overloaded!"); 395 - } 396 - 397 - htd->gst.offset_ns = gstreamer_sink_get_timestamp_offset(htd->gst.gs); 398 - htd->gst.last_frame_ns = htd->frame_for_process->timestamp - htd->gst.offset_ns; 399 - } 400 - #endif 401 - 402 - htd->current_frame_timestamp = htd->frame_for_process->timestamp; 403 - 404 - int64_t start, end; 405 - start = os_monotonic_get_ns(); 406 - 407 - 408 - /* 409 - * Setup views. 410 - */ 411 - 412 - const int full_width = htd->frame_for_process->width; 413 - const int full_height = htd->frame_for_process->height; 414 - const int view_width = htd->camera.one_view_size_px.w; 415 - const int view_height = htd->camera.one_view_size_px.h; 416 - 417 - // assert(full_width == view_width * 2); 418 - assert(full_height == view_height); 419 - 420 - const cv::Size full_size = cv::Size(full_width, full_height); 421 - const cv::Size view_size = cv::Size(view_width, view_height); 422 - const cv::Point view_offsets[2] = {cv::Point(0, 0), cv::Point(view_width, 0)}; 423 - 424 - cv::Mat full_frame(full_size, CV_8UC3, htd->frame_for_process->data, htd->frame_for_process->stride); 425 - htd->views[0].run_model_on_this = full_frame(cv::Rect(view_offsets[0], view_size)); 426 - htd->views[1].run_model_on_this = full_frame(cv::Rect(view_offsets[1], view_size)); 427 - 428 - htd->mat_for_process = &full_frame; 429 - 430 - // Check this every frame. We really, really, really don't want it to ever suddenly be null. 431 - htd->debug_scribble = htd->debug_sink.sink != nullptr; 432 - 433 - cv::Mat debug_output = {}; 434 - xrt_frame *debug_frame = nullptr; // only use if htd->debug_scribble 435 - 436 - if (htd->debug_scribble) { 437 - u_frame_clone(htd->frame_for_process, &debug_frame); 438 - debug_output = cv::Mat(full_size, CV_8UC3, debug_frame->data, debug_frame->stride); 439 - htd->views[0].debug_out_to_this = debug_output(cv::Rect(view_offsets[0], view_size)); 440 - htd->views[1].debug_out_to_this = debug_output(cv::Rect(view_offsets[1], view_size)); 441 - } 442 - 443 - 444 - /* 445 - * Do the hand tracking! 446 - */ 447 - 448 - std::future<std::vector<Hand2D>> future_left = 449 - std::async(std::launch::async, htImageToKeypoints, &htd->views[0]); 450 - std::future<std::vector<Hand2D>> future_right = 451 - std::async(std::launch::async, htImageToKeypoints, &htd->views[1]); 452 - std::vector<Hand2D> hands_in_left_view = future_left.get(); 453 - std::vector<Hand2D> hands_in_right_view = future_right.get(); 454 - 455 - end = os_monotonic_get_ns(); 456 - 457 - 458 - this_frame = os_monotonic_get_ns(); 459 - 460 - double time_ms = (double)(end - start) / (double)U_TIME_1MS_IN_NS; 461 - double _1_time = 1 / (time_ms * 0.001); 462 - 463 - char t[64]; 464 - char t2[64]; 465 - sprintf(t, "% 8.2f ms", time_ms); 466 - sprintf(t2, "% 8.2f fps", _1_time); 467 - last_frame = this_frame; 468 - 469 - 470 - if (htd->debug_scribble) { 471 - cv::putText(debug_output, t, cv::Point(30, 60), cv::FONT_HERSHEY_SIMPLEX, 1.0f, cv::Scalar(0, 255, 0), 472 - 4); 473 - cv::putText(debug_output, t2, cv::Point(30, 100), cv::FONT_HERSHEY_SIMPLEX, 1.0f, cv::Scalar(0, 255, 0), 474 - 4); 475 - } else { 476 - HT_DEBUG(htd, "%s", t); 477 - HT_DEBUG(htd, "%s", t2); 478 - } 479 - 480 - 481 - // Convenience 482 - uint64_t timestamp = htd->frame_for_process->timestamp; 483 - 484 - if (htd->debug_scribble) { 485 - u_sink_debug_push_frame(&htd->debug_sink, debug_frame); 486 - xrt_frame_reference(&debug_frame, NULL); 487 - } 488 - 489 - // Bail early this frame if no hands were detected. 490 - // In the long run, this'll be a silly thing - we shouldn't always take the detection model's word for it 491 - // especially when part of the pipeline is an arbitrary confidence threshold. 492 - if (hands_in_left_view.size() == 0 || hands_in_right_view.size() == 0) { 493 - htExitFrame(htd, true, NULL, 0); 494 - return; 495 - } 496 - 497 - 498 - 499 - std::vector<Hand3D> possible_3d_hands; 500 - 501 - // for every possible combination of hands in left view and hands in right view, 502 - for (size_t idx_l = 0; idx_l < hands_in_left_view.size(); idx_l++) { 503 - for (size_t idx_r = 0; idx_r < hands_in_right_view.size(); idx_r++) { 504 - Hand3D cur_hand = {}; 505 - 506 - Hand2D &left_2d = hands_in_left_view[idx_l]; 507 - Hand2D &right_2d = hands_in_right_view[idx_r]; 508 - 509 - // Calculate a 3D hand for this combination 510 - htJointDisparityMath(htd, &hands_in_left_view[idx_l], &hands_in_right_view[idx_r], &cur_hand); 511 - cur_hand.timestamp = timestamp; 512 - cur_hand.rejected_by_smush = false; 513 - 514 - cur_hand.idx_l = idx_l; 515 - cur_hand.idx_r = idx_r; 516 - 517 - // Calculate a y-disparity for this combination 518 - cur_hand.y_disparity_error = errHandDisparity(&left_2d, &right_2d); 519 - 520 - possible_3d_hands.push_back(cur_hand); 521 - } 522 - } 523 - 524 - HT_DEBUG(htd, "Starting with %zu hands!", possible_3d_hands.size()); 525 - 526 - // For each pair of 3D hands we just made 527 - for (size_t idx_one = 0; idx_one < possible_3d_hands.size(); idx_one++) { 528 - for (size_t idx_two = 0; idx_two < possible_3d_hands.size(); idx_two++) { 529 - if ((idx_one <= idx_two)) { 530 - continue; 531 - } 532 - 533 - // See if this pair is suspiciously close together. 534 - // If it is, then this pairing is wrong - this is what was causing the "hands smushing together" 535 - // issue - we weren't catching these reliably. 536 - float errr = sumOfHandJointDistances(&possible_3d_hands[idx_one], &possible_3d_hands[idx_two]); 537 - HT_TRACE(htd, "%zu %zu is smush %f", idx_one, idx_two, errr); 538 - if (errr < 0.03f * 21.0f) { 539 - possible_3d_hands[idx_one].rejected_by_smush = true; 540 - possible_3d_hands[idx_two].rejected_by_smush = true; 541 - } 542 - } 543 - } 544 - 545 - std::vector<Hand3D> hands_unfiltered; 546 - 547 - for (Hand3D hand : possible_3d_hands) { 548 - // If none of these are false, then all our heuristics indicate this is a real hand, so we add it to our 549 - // list of real hands. 550 - bool selected = !hand.rejected_by_smush && // 551 - hand.y_disparity_error < 1.0f && // 552 - rejectTooClose(htd, &hand) && // 553 - rejectTooFar(htd, &hand) && // 554 - rejectTinyPalm(htd, &hand); 555 - if (selected) { 556 - HT_TRACE(htd, "Pushing back with y-error %f", hand.y_disparity_error); 557 - hands_unfiltered.push_back(hand); 558 - } 559 - } 560 - 561 - 562 - std::vector<bool> past_hands_taken; 563 - std::vector<bool> present_hands_taken; 564 - 565 - std::vector<size_t> past_indices; 566 - std::vector<size_t> present_indices; 567 - std::vector<float> flow_errors; 568 - 569 - 570 - float max_dist_between_frames = 1.0f; 571 - 572 - naive_sort_permutation_by_error<HandHistory3D, Hand3D>(htd->histories_3d, // past 573 - hands_unfiltered, // present 574 - 575 - 576 - // outputs 577 - past_hands_taken, present_hands_taken, past_indices, 578 - present_indices, flow_errors, errHandHistory, 579 - (max_dist_between_frames * 21.0f) 580 - 581 - ); 582 - 583 - 584 - for (size_t i = 0; i < past_indices.size(); i++) { 585 - htd->histories_3d[past_indices[i]].last_hands_unfiltered.push(hands_unfiltered[present_indices[i]]); 586 - } 587 - // The above may not do anything, because we'll start out with no hand histories! All the numbers of elements 588 - // should be zero. 589 - 590 - 591 - for (size_t i = 0; i < present_hands_taken.size(); i++) { 592 - if (present_hands_taken[i] == false) { 593 - // if this hand never got assigned to a history 594 - HandHistory3D history_new; 595 - history_new.uuid = rand(); // Not a great uuid, huh? Good enough for us, this only has to be 596 - // unique across say an hour period max. 597 - handEuroFiltersInit(&history_new, FCMIN_HAND, FCMIN_D_HAND, BETA_HAND); 598 - history_new.last_hands_unfiltered.push(hands_unfiltered[i]); 599 - // history_new. 600 - htd->histories_3d.push_back( 601 - history_new); // Add something to the end - don't initialize any of it. 602 - } 603 - } 604 - 605 - int bob = 0; 606 - for (size_t i = 0; i < past_hands_taken.size(); i++) { 607 - if (past_hands_taken[i] == false) { 608 - htd->histories_3d.erase(htd->histories_3d.begin() + i + bob); 609 - bob--; 610 - } 611 - } 612 - 613 - if (htd->histories_3d.size() == 0) { 614 - HT_DEBUG(htd, "Bailing"); 615 - htExitFrame(htd, true, NULL, 0); 616 - return; 617 - } 618 - 619 - size_t num_hands = htd->histories_3d.size(); 620 - // if (num_hands > 2) { 621 - HT_DEBUG(htd, "Ending with %zu hands!", 622 - num_hands); // this is quite bad, but rarely happens. 623 - // } 624 - 625 - // Here, we go back to our bbox_histories and remove the histories for any bounding boxes that never turned into 626 - // good hands. 627 - 628 - // Iterate over all hands we're keeping track of, compute their current handedness. 629 - std::vector<size_t> valid_2d_idxs[2]; 630 - 631 - 632 - for (size_t i = 0; i < htd->histories_3d.size(); i++) { 633 - // U_LOG_E("Valid hand %zu l_idx %i r_idx %i", i, htd->histories_3d[i].last_hands[0]->idx_l, 634 - // htd->histories_3d[i].last_hands[0]->idx_r); 635 - valid_2d_idxs[0].push_back(htd->histories_3d[i].last_hands_unfiltered[0]->idx_l); 636 - valid_2d_idxs[1].push_back(htd->histories_3d[i].last_hands_unfiltered[0]->idx_r); 637 - handednessHandHistory3D(&htd->histories_3d[i]); 638 - } 639 - 640 - // Almost certainly not the cleanest way of doing this but leave me alone 641 - // Per camera view 642 - for (int view = 0; view < 2; view++) { 643 - // Per entry in bbox_histories 644 - for (size_t hist_idx = 0; hist_idx < htd->views[view].bbox_histories.size(); hist_idx++) { 645 - // See if this entry in bbox_histories ever turned into a 3D hand. If not, we notify (in a very 646 - // silly way) htImageToKeypoints that it should go away because it was an erroneous detection. 647 - for (size_t valid_idx : valid_2d_idxs[view]) { 648 - if (valid_idx == hist_idx) { 649 - htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = true; 650 - break; 651 - } else { 652 - htd->views[view].bbox_histories[hist_idx].htAlgorithm_approves = false; 653 - } 654 - } 655 - } 656 - } 657 - 658 - // Whoo! Okay, now we have some unfiltered hands in htd->histories_3d[i].last_hands[0]! Euro filter them! 659 - 660 - std::vector<Hand3D> filtered_hands(num_hands); 661 - 662 - for (size_t hand_index = 0; hand_index < num_hands; hand_index++) { 663 - handEuroFiltersRun(htd, &htd->histories_3d[hand_index], &filtered_hands[hand_index]); 664 - htd->histories_3d[hand_index].last_hands_filtered.push(filtered_hands[hand_index]); 665 - applyThumbIndexDrag(&filtered_hands[hand_index]); 666 - filtered_hands[hand_index].handedness = htd->histories_3d[hand_index].handedness; 667 - } 668 - 669 - std::vector<size_t> xr_indices; 670 - std::vector<Hand3D *> hands_to_use; 671 - 672 - if (filtered_hands.size() == 1) { 673 - if (filtered_hands[0].handedness < 0) { 674 - // Left 675 - xr_indices = {0}; 676 - hands_to_use = {&filtered_hands[0]}; 677 - } else { 678 - xr_indices = {1}; 679 - hands_to_use = {&filtered_hands[0]}; 680 - } 681 - } else { 682 - // filtered_hands better be two for now. 683 - if (filtered_hands[0].handedness < filtered_hands[1].handedness) { 684 - xr_indices = {0, 1}; 685 - hands_to_use = {&filtered_hands[0], &filtered_hands[1]}; 686 - } else { 687 - xr_indices = {1, 0}; 688 - hands_to_use = {&filtered_hands[0], &filtered_hands[1]}; 689 - } 690 - } 691 - 692 - struct xrt_hand_joint_set final_hands_ordered_by_handedness[2]; 693 - memset(&final_hands_ordered_by_handedness[0], 0, sizeof(xrt_hand_joint_set)); 694 - memset(&final_hands_ordered_by_handedness[1], 0, sizeof(xrt_hand_joint_set)); 695 - final_hands_ordered_by_handedness[0].is_active = false; 696 - final_hands_ordered_by_handedness[1].is_active = false; 697 - 698 - for (size_t i = 0; (i < xr_indices.size()); i++) { 699 - Hand3D *hand = hands_to_use[i]; 700 - 701 - struct xrt_hand_joint_set *put_in_set = &final_hands_ordered_by_handedness[xr_indices[i]]; 702 - 703 - xrt_vec3 wrist = hand->kps[0]; 704 - 705 - xrt_vec3 index_prox = hand->kps[5]; 706 - xrt_vec3 middle_prox = hand->kps[9]; 707 - xrt_vec3 ring_prox = hand->kps[13]; 708 - xrt_vec3 pinky_prox = hand->kps[17]; 709 - 710 - xrt_vec3 middle_to_index = m_vec3_sub(index_prox, middle_prox); 711 - xrt_vec3 middle_to_ring = m_vec3_sub(ring_prox, middle_prox); 712 - xrt_vec3 middle_to_pinky = m_vec3_sub(pinky_prox, middle_prox); 713 - 714 - xrt_vec3 three_fourths_down_middle_mcp = 715 - m_vec3_add(m_vec3_mul_scalar(wrist, 3.0f / 4.0f), m_vec3_mul_scalar(middle_prox, 1.0f / 4.0f)); 716 - 717 - xrt_vec3 middle_metacarpal = three_fourths_down_middle_mcp; 718 - 719 - float s = 0.6f; 720 - 721 - xrt_vec3 index_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_index, s); 722 - xrt_vec3 ring_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_ring, s); 723 - xrt_vec3 pinky_metacarpal = middle_metacarpal + m_vec3_mul_scalar(middle_to_pinky, s); 724 - 725 - float palm_ness = 0.33; 726 - xrt_vec3 palm = 727 - m_vec3_add(m_vec3_mul_scalar(wrist, palm_ness), m_vec3_mul_scalar(middle_prox, (1.0f - palm_ness))); 728 - 729 - 730 - 731 - htProcessJoint(htd, palm, put_in_set, XRT_HAND_JOINT_PALM); 732 - 733 - htProcessJoint(htd, hand->kps[0], put_in_set, XRT_HAND_JOINT_WRIST); 734 - htProcessJoint(htd, hand->kps[1], put_in_set, XRT_HAND_JOINT_THUMB_METACARPAL); 735 - htProcessJoint(htd, hand->kps[2], put_in_set, XRT_HAND_JOINT_THUMB_PROXIMAL); 736 - htProcessJoint(htd, hand->kps[3], put_in_set, XRT_HAND_JOINT_THUMB_DISTAL); 737 - htProcessJoint(htd, hand->kps[4], put_in_set, XRT_HAND_JOINT_THUMB_TIP); 738 - 739 - htProcessJoint(htd, index_metacarpal, put_in_set, XRT_HAND_JOINT_INDEX_METACARPAL); 740 - htProcessJoint(htd, hand->kps[5], put_in_set, XRT_HAND_JOINT_INDEX_PROXIMAL); 741 - htProcessJoint(htd, hand->kps[6], put_in_set, XRT_HAND_JOINT_INDEX_INTERMEDIATE); 742 - htProcessJoint(htd, hand->kps[7], put_in_set, XRT_HAND_JOINT_INDEX_DISTAL); 743 - htProcessJoint(htd, hand->kps[8], put_in_set, XRT_HAND_JOINT_INDEX_TIP); 744 - 745 - htProcessJoint(htd, middle_metacarpal, put_in_set, XRT_HAND_JOINT_MIDDLE_METACARPAL); 746 - htProcessJoint(htd, hand->kps[9], put_in_set, XRT_HAND_JOINT_MIDDLE_PROXIMAL); 747 - htProcessJoint(htd, hand->kps[10], put_in_set, XRT_HAND_JOINT_MIDDLE_INTERMEDIATE); 748 - htProcessJoint(htd, hand->kps[11], put_in_set, XRT_HAND_JOINT_MIDDLE_DISTAL); 749 - htProcessJoint(htd, hand->kps[12], put_in_set, XRT_HAND_JOINT_MIDDLE_TIP); 750 - 751 - htProcessJoint(htd, ring_metacarpal, put_in_set, XRT_HAND_JOINT_RING_METACARPAL); 752 - htProcessJoint(htd, hand->kps[13], put_in_set, XRT_HAND_JOINT_RING_PROXIMAL); 753 - htProcessJoint(htd, hand->kps[14], put_in_set, XRT_HAND_JOINT_RING_INTERMEDIATE); 754 - htProcessJoint(htd, hand->kps[15], put_in_set, XRT_HAND_JOINT_RING_DISTAL); 755 - htProcessJoint(htd, hand->kps[16], put_in_set, XRT_HAND_JOINT_RING_TIP); 756 - 757 - htProcessJoint(htd, pinky_metacarpal, put_in_set, XRT_HAND_JOINT_LITTLE_METACARPAL); 758 - htProcessJoint(htd, hand->kps[17], put_in_set, XRT_HAND_JOINT_LITTLE_PROXIMAL); 759 - htProcessJoint(htd, hand->kps[18], put_in_set, XRT_HAND_JOINT_LITTLE_INTERMEDIATE); 760 - htProcessJoint(htd, hand->kps[19], put_in_set, XRT_HAND_JOINT_LITTLE_DISTAL); 761 - htProcessJoint(htd, hand->kps[20], put_in_set, XRT_HAND_JOINT_LITTLE_TIP); 762 - 763 - put_in_set->is_active = true; 764 - math_pose_identity(&put_in_set->hand_pose.pose); 765 - 766 - 767 - put_in_set->hand_pose.pose.orientation = htd->stereo_camera_to_left_camera; 768 - 769 - put_in_set->hand_pose.relation_flags = valid_flags_ht; 770 - 771 - applyJointWidths(put_in_set); 772 - applyJointOrientations(put_in_set, xr_indices[i]); 773 - } 774 - 775 - htExitFrame(htd, false, final_hands_ordered_by_handedness, filtered_hands[0].timestamp); 776 - } 15 + htRunAlgorithm(struct ht_device *htd);
+1 -2
src/xrt/drivers/ht/ht_driver.cpp
··· 39 39 #include "tracking/t_frame_cv_mat_wrapper.hpp" 40 40 #include "tracking/t_calibration_opencv.hpp" 41 41 42 - #include "templates/NaivePermutationSort.hpp" 43 - 44 42 #include "ht_algorithm.hpp" 43 + #include "ht_models.hpp" 45 44 46 45 #include <cjson/cJSON.h> 47 46 #include <opencv2/core/mat.hpp>
+1 -1
src/xrt/drivers/meson.build
··· 87 87 lib_drv_ht = static_library( 88 88 'drv_ht', 89 89 files( 90 + 'ht/ht_algorithm.cpp', 90 91 'ht/ht_driver.cpp', 91 92 'ht/ht_driver.hpp', 92 93 'ht/ht_interface.h', ··· 94 95 'ht/ht_hand_math.cpp', 95 96 'ht/ht_image_math.cpp', 96 97 'ht/ht_nms.cpp', 97 - 'ht/templates/NaivePermutationSort.hpp', 98 98 ), 99 99 include_directories: [xrt_include, cjson_include], 100 100 dependencies: [aux, opencv, onnxruntime, eigen3],