···11+// Copyright 2022, Collabora, Ltd.
22+// SPDX-License-Identifier: BSL-1.0
33+/*!
44+ * @file
55+ * @brief Interface for Levenberg-Marquardt kinematic optimizer
66+ * @author Moses Turner <moses@collabora.com>
77+ * @ingroup tracking
88+ */
99+#pragma once
1010+#include "xrt/xrt_defines.h"
1111+#include "util/u_logging.h"
1212+// #include "lm_defines.hpp"
1313+#include "../kine_common.hpp"
1414+1515+namespace xrt::tracking::hand::mercury::lm {
1616+1717+// Yes, this is a weird in-between-C-and-C++ API. Fight me, I like it this way.
1818+1919+// Opaque struct.
2020+struct KinematicHandLM;
2121+2222+// Constructor
2323+void
2424+optimizer_create(xrt_pose left_in_right,
2525+ bool is_right,
2626+ u_logging_level log_level,
2727+ KinematicHandLM **out_kinematic_hand);
2828+2929+/*!
3030+ * The main tracking code calls this function with some 2D(ish) camera observations of the hand, and this function
3131+ * calculates a good 3D hand pose and writes it to out_viz_hand.
3232+ *
3333+ * @param observation The observation of the hand joints. Warning, this function will mutate the observation
3434+ * unpredictably. Keep a copy of it if you need it after.
3535+ * @param hand_was_untracked_last_frame: If the hand was untracked last frame (it was out of view, obscured, ML models
3636+ * failed, etc.) - if it was, we don't want to enforce temporal consistency because we have no good previous hand state
3737+ * with which to do that.
3838+ * @param optimize_hand_size: Whether or not it's allowed to tweak the hand size - when we're calibrating the user's
3939+ * hand size, we want to do that; afterwards we don't want to waste the compute.
4040+ * @param target_hand_size: The hand size we want it to get close to
4141+ * @param hand_size_err_mul: A multiplier to help determine how close it has to get to that hand size
4242+ * @param[out] out_hand: The xrt_hand_joint_set to output its result to
4343+ * @param[out] out_hand_size: The hand size it ended up at
4444+ * @param[out] out_reprojection_error: The reprojection error it ended up at
4545+ */
4646+4747+void
4848+optimizer_run(KinematicHandLM *hand,
4949+ one_frame_input &observation,
5050+ bool hand_was_untracked_last_frame,
5151+ bool optimize_hand_size,
5252+ float target_hand_size,
5353+ float hand_size_err_mul,
5454+ xrt_hand_joint_set &out_hand,
5555+ float &out_hand_size,
5656+ float &out_reprojection_error);
5757+5858+// Destructor
5959+void
6060+optimizer_destroy(KinematicHandLM **hand);
6161+6262+} // namespace xrt::tracking::hand::mercury::lm
+992
src/xrt/tracking/hand/mercury/kine_lm/lm_main.cpp
···11+// Copyright 2022, Collabora, Ltd.
22+// SPDX-License-Identifier: BSL-1.0
33+/*!
44+ * @file
55+ * @brief Levenberg-Marquardt kinematic optimizer
66+ * @author Moses Turner <moses@collabora.com>
77+ * @author Charlton Rodda <charlton.rodda@collabora.com>
88+ * @ingroup tracking
99+ */
1010+1111+#include "math/m_api.h"
1212+#include "math/m_vec3.h"
1313+#include "os/os_time.h"
1414+#include "util/u_logging.h"
1515+#include "util/u_misc.h"
1616+#include "util/u_trace_marker.h"
1717+1818+#include "tinyceres/tiny_solver.hpp"
1919+#include "tinyceres/tiny_solver_autodiff_function.hpp"
2020+#include "lm_rotations.inl"
2121+2222+#include <iostream>
2323+#include <cmath>
2424+#include <random>
2525+#include "lm_interface.hpp"
2626+#include "lm_optimizer_params_packer.inl"
2727+#include "lm_defines.hpp"
2828+2929+/*
3030+3131+Some notes:
3232+Everything templated with <typename T> is basically just a scalar template, usually taking float or ceres::Jet<float, N>
3333+3434+*/
3535+3636+namespace xrt::tracking::hand::mercury::lm {
3737+3838+template <typename T> struct StereographicObservation
3939+{
4040+ // T obs[kNumNNJoints][2];
4141+ Vec2<T> obs[kNumNNJoints];
4242+};
4343+4444+struct KinematicHandLM
4545+{
4646+ bool first_frame = true;
4747+ bool use_stability = false;
4848+ bool optimize_hand_size = true;
4949+ bool is_right = false;
5050+ int num_observation_views = 0;
5151+ one_frame_input *observation;
5252+5353+ HandScalar target_hand_size;
5454+ HandScalar hand_size_err_mul;
5555+ u_logging_level log_level;
5656+5757+5858+ StereographicObservation<HandScalar> sgo[2];
5959+6060+ Quat<HandScalar> last_frame_pre_rotation;
6161+ OptimizerHand<HandScalar> last_frame;
6262+6363+ // The pose that will take you from the right camera's space to the left camera's space.
6464+ xrt_pose left_in_right;
6565+6666+ // The translation part of the same pose, just easier for Ceres to consume
6767+ Vec3<HandScalar> left_in_right_translation;
6868+6969+ // The orientation part of the same pose, just easier for Ceres to consume
7070+ Quat<HandScalar> left_in_right_orientation;
7171+7272+ Eigen::Matrix<HandScalar, calc_input_size(true), 1> TinyOptimizerInput;
7373+};
7474+7575+template <typename T> struct Translations55
7676+{
7777+ Vec3<T> t[kNumFingers][kNumJointsInFinger];
7878+};
7979+8080+template <typename T> struct Orientations54
8181+{
8282+ Quat<T> q[kNumFingers][kNumJointsInFinger];
8383+};
8484+8585+template <bool optimize_hand_size> struct CostFunctor
8686+{
8787+ KinematicHandLM &parent;
8888+ size_t num_residuals_;
8989+9090+ template <typename T>
9191+ bool
9292+ operator()(const T *const x, T *residual) const;
9393+9494+ CostFunctor(KinematicHandLM &in_last_hand, size_t const &num_residuals)
9595+ : parent(in_last_hand), num_residuals_(num_residuals)
9696+ {}
9797+9898+ size_t
9999+ NumResiduals() const
100100+ {
101101+ return num_residuals_;
102102+ }
103103+};
104104+105105+template <typename T>
106106+static inline void
107107+eval_hand_set_rel_translations(const OptimizerHand<T> &opt, Translations55<T> &rel_translations)
108108+{
109109+ // Basically, we're walking up rel_translations, writing strictly sequentially. Hopefully this is fast.
110110+111111+ // Thumb metacarpal translation.
112112+ rel_translations.t[0][0] = {(T)0.33097, T(-0.1), (T)-0.25968};
113113+114114+ // Comes after the invisible joint.
115115+ rel_translations.t[0][1] = {T(0), T(0), T(0)};
116116+ // prox, distal, tip
117117+ rel_translations.t[0][2] = {T(0), T(0), T(-0.389626)};
118118+ rel_translations.t[0][3] = {T(0), T(0), T(-0.311176)};
119119+ rel_translations.t[0][4] = {T(0), T(0), (T)-0.232195};
120120+121121+ // What's the best place to put this? Here works, but is there somewhere we could put it where it gets accessed
122122+ // faster?
123123+ T finger_joint_lengths[4][4] = {
124124+ {
125125+ T(-0.66),
126126+ T(-0.365719),
127127+ T(-0.231581),
128128+ T(-0.201790),
129129+ },
130130+ {
131131+ T(-0.645),
132132+ T(-0.404486),
133133+ T(-0.247749),
134134+ T(-0.210121),
135135+ },
136136+ {
137137+ T(-0.58),
138138+ T(-0.365639),
139139+ T(-0.225666),
140140+ T(-0.187089),
141141+ },
142142+ {
143143+ T(-0.52),
144144+ T(-0.278197),
145145+ T(-0.176178),
146146+ T(-0.157566),
147147+ },
148148+ };
149149+150150+ // Index metacarpal
151151+ rel_translations.t[1][0] = {T(0.16926), T(0), T(-0.34437)};
152152+ // Middle
153153+ rel_translations.t[2][0] = {T(0.034639), T(0.01), T(-0.35573)};
154154+ // Ring
155155+ rel_translations.t[3][0] = {T(-0.063625), T(0.005), T(-0.34164)};
156156+ // Little
157157+ rel_translations.t[4][0] = {T(-0.1509), T(-0.005), T(-0.30373)};
158158+159159+ // Index to little finger
160160+ for (int finger = 0; finger < 4; finger++) {
161161+ for (int i = 0; i < 4; i++) {
162162+ int bone = i + 1;
163163+ rel_translations.t[finger + 1][bone] = {T(0), T(0), T(finger_joint_lengths[finger][i])};
164164+ }
165165+ }
166166+167167+ // This is done in UnitQuaternionRotateAndScale now.
168168+ // for (int finger = 0; finger < 5; finger++) {
169169+ // for (int bone = 0; bone < 5; bone++) {
170170+ // rel_translations[finger][bone][0] *= opt.hand_size;
171171+ // rel_translations[finger][bone][1] *= opt.hand_size;
172172+ // rel_translations[finger][bone][2] *= opt.hand_size;
173173+ // }
174174+ // }
175175+}
176176+177177+178178+179179+template <typename T>
180180+inline void
181181+eval_hand_set_rel_orientations(const OptimizerHand<T> &opt, Orientations54<T> &rel_orientations)
182182+{
183183+184184+// Thumb MCP hidden orientation
185185+#if 0
186186+ Vec2<T> mcp_root_swing;
187187+188188+ mcp_root_swing.x = rad<T>((T)(-10));
189189+ mcp_root_swing.y = rad<T>((T)(-40));
190190+191191+ T mcp_root_twist = rad<T>((T)(-80));
192192+193193+ SwingTwistToQuaternion(mcp_root_swing, mcp_root_twist, rel_orientations.q[0][0]);
194194+195195+ std::cout << "\n\n\n\nHIDDEN ORIENTATION\n";
196196+ std::cout << std::setprecision(100);
197197+ std::cout << rel_orientations.q[0][0].w << std::endl;
198198+ std::cout << rel_orientations.q[0][0].x << std::endl;
199199+ std::cout << rel_orientations.q[0][0].y << std::endl;
200200+ std::cout << rel_orientations.q[0][0].z << std::endl;
201201+#else
202202+ // This should be exactly equivalent to the above
203203+ rel_orientations.q[0][0].w = T(0.716990172863006591796875);
204204+ rel_orientations.q[0][0].x = T(0.1541481912136077880859375);
205205+ rel_orientations.q[0][0].y = T(-0.31655871868133544921875);
206206+ rel_orientations.q[0][0].z = T(-0.6016261577606201171875);
207207+#endif
208208+209209+ // Thumb MCP orientation
210210+ SwingTwistToQuaternion(opt.thumb.metacarpal.swing, //
211211+ opt.thumb.metacarpal.twist, //
212212+ rel_orientations.q[0][1]);
213213+214214+ // Thumb curls
215215+ CurlToQuaternion(opt.thumb.rots[0], rel_orientations.q[0][2]);
216216+ CurlToQuaternion(opt.thumb.rots[1], rel_orientations.q[0][3]);
217217+218218+ // Finger orientations
219219+ for (int i = 0; i < 4; i++) {
220220+ SwingTwistToQuaternion(opt.finger[i].metacarpal.swing, //
221221+ opt.finger[i].metacarpal.twist, //
222222+ rel_orientations.q[i + 1][0]);
223223+224224+ SwingToQuaternion(opt.finger[i].proximal_swing, //
225225+ rel_orientations.q[i + 1][1]);
226226+227227+ CurlToQuaternion(opt.finger[i].rots[0], rel_orientations.q[i + 1][2]);
228228+ CurlToQuaternion(opt.finger[i].rots[1], rel_orientations.q[i + 1][3]);
229229+ }
230230+}
231231+232232+233233+234234+template <typename T>
235235+void
236236+eval_hand_with_orientation(const OptimizerHand<T> &opt,
237237+ bool is_right,
238238+ Translations55<T> &translations_absolute,
239239+ Orientations54<T> &orientations_absolute)
240240+241241+{
242242+ XRT_TRACE_MARKER();
243243+244244+245245+ Translations55<T> rel_translations; //[kNumFingers][kNumJointsInFinger];
246246+ Orientations54<T> rel_orientations; //[kNumFingers][kNumOrientationsInFinger];
247247+248248+ eval_hand_set_rel_orientations(opt, rel_orientations);
249249+250250+ eval_hand_set_rel_translations(opt, rel_translations);
251251+252252+ Quat<T> orientation_root;
253253+254254+ Quat<T> post_orientation_quat;
255255+256256+ AngleAxisToQuaternion(opt.wrist_post_orientation_aax, post_orientation_quat);
257257+258258+ QuaternionProduct(opt.wrist_pre_orientation_quat, post_orientation_quat, orientation_root);
259259+260260+ // Get each joint's tracking-relative orientation by rotating its parent-relative orientation by the
261261+ // tracking-relative orientation of its parent.
262262+ for (size_t finger = 0; finger < kNumFingers; finger++) {
263263+ Quat<T> *last_orientation = &orientation_root;
264264+ for (size_t bone = 0; bone < kNumOrientationsInFinger; bone++) {
265265+ Quat<T> &out_orientation = orientations_absolute.q[finger][bone];
266266+ Quat<T> &rel_orientation = rel_orientations.q[finger][bone];
267267+268268+ QuaternionProduct(*last_orientation, rel_orientation, out_orientation);
269269+ last_orientation = &out_orientation;
270270+ }
271271+ }
272272+273273+ // Get each joint's tracking-relative position by rotating its parent-relative translation by the
274274+ // tracking-relative orientation of its parent, then adding that to its parent's tracking-relative position.
275275+ for (size_t finger = 0; finger < kNumFingers; finger++) {
276276+ const Vec3<T> *last_translation = &opt.wrist_location;
277277+ const Quat<T> *last_orientation = &orientation_root;
278278+ for (size_t bone = 0; bone < kNumJointsInFinger; bone++) {
279279+ Vec3<T> &out_translation = translations_absolute.t[finger][bone];
280280+ Vec3<T> &rel_translation = rel_translations.t[finger][bone];
281281+282282+ UnitQuaternionRotateAndScalePoint(*last_orientation, rel_translation, opt.hand_size,
283283+ out_translation);
284284+285285+ // If this is a right hand, mirror it.
286286+ if (is_right) {
287287+ out_translation.x *= -1;
288288+ }
289289+290290+ out_translation.x += last_translation->x;
291291+ out_translation.y += last_translation->y;
292292+ out_translation.z += last_translation->z;
293293+294294+ // Next iteration, the orientation to rotate by should be the tracking-relative orientation of
295295+ // this joint.
296296+297297+ // If bone < 4 so we don't go over the end of orientations_absolute. I hope this gets optimized
298298+ // out anyway.
299299+ if (bone < 4) {
300300+ last_orientation = &orientations_absolute.q[finger][bone];
301301+ // Ditto for translation
302302+ last_translation = &out_translation;
303303+ }
304304+ }
305305+ }
306306+}
307307+308308+template <typename T>
309309+void
310310+computeResidualStability_Finger(const OptimizerFinger<T> &finger,
311311+ const OptimizerFinger<HandScalar> &finger_last,
312312+ ResidualHelper<T> &helper)
313313+{
314314+ helper.AddValue((finger.metacarpal.swing.x - finger_last.metacarpal.swing.x) * kStabilityFingerMCPSwing);
315315+316316+ helper.AddValue((finger.metacarpal.swing.y - finger_last.metacarpal.swing.y) * kStabilityFingerMCPSwing);
317317+318318+319319+320320+ helper.AddValue((finger.metacarpal.twist - finger_last.metacarpal.twist) * kStabilityFingerMCPTwist);
321321+322322+323323+324324+ helper.AddValue((finger.proximal_swing.x - finger_last.proximal_swing.x) * kStabilityFingerPXMSwingX);
325325+ helper.AddValue((finger.proximal_swing.y - finger_last.proximal_swing.y) * kStabilityFingerPXMSwingY);
326326+327327+ helper.AddValue((finger.rots[0] - finger_last.rots[0]) * kStabilityCurlRoot);
328328+ helper.AddValue((finger.rots[1] - finger_last.rots[1]) * kStabilityCurlRoot);
329329+330330+#ifdef USE_HAND_PLAUSIBILITY
331331+ if (finger.rots[0] < finger.rots[1]) {
332332+ helper.AddValue((finger.rots[0] - finger.rots[1]) * kPlausibilityCurlSimilarityHard);
333333+ } else {
334334+ helper.AddValue((finger.rots[0] - finger.rots[1]) * kPlausibilityCurlSimilaritySoft);
335335+ }
336336+#endif
337337+}
338338+339339+template <bool optimize_hand_size, typename T>
340340+void
341341+computeResidualStability(const OptimizerHand<T> &hand,
342342+ const OptimizerHand<HandScalar> &last_hand,
343343+ KinematicHandLM &state,
344344+ ResidualHelper<T> &helper)
345345+{
346346+347347+ if constexpr (optimize_hand_size) {
348348+ helper.AddValue( //
349349+ (hand.hand_size - state.target_hand_size) * (T)(kStabilityHandSize * state.hand_size_err_mul));
350350+ }
351351+ if (state.first_frame) {
352352+ return;
353353+ }
354354+355355+ helper.AddValue((last_hand.wrist_location.x - hand.wrist_location.x) * kStabilityRootPosition);
356356+ helper.AddValue((last_hand.wrist_location.y - hand.wrist_location.y) * kStabilityRootPosition);
357357+ helper.AddValue((last_hand.wrist_location.z - hand.wrist_location.z) * kStabilityRootPosition);
358358+359359+360360+ helper.AddValue((hand.wrist_post_orientation_aax.x) * (T)(kStabilityHandOrientation));
361361+ helper.AddValue((hand.wrist_post_orientation_aax.y) * (T)(kStabilityHandOrientation));
362362+ helper.AddValue((hand.wrist_post_orientation_aax.z) * (T)(kStabilityHandOrientation));
363363+364364+365365+366366+ helper.AddValue((hand.thumb.metacarpal.swing.x - last_hand.thumb.metacarpal.swing.x) * kStabilityThumbMCPSwing);
367367+ helper.AddValue((hand.thumb.metacarpal.swing.y - last_hand.thumb.metacarpal.swing.y) * kStabilityThumbMCPSwing);
368368+ helper.AddValue((hand.thumb.metacarpal.twist - last_hand.thumb.metacarpal.twist) * kStabilityThumbMCPTwist);
369369+370370+ helper.AddValue((hand.thumb.rots[0] - last_hand.thumb.rots[0]) * kStabilityCurlRoot);
371371+ helper.AddValue((hand.thumb.rots[1] - last_hand.thumb.rots[1]) * kStabilityCurlRoot);
372372+#ifdef USE_HAND_PLAUSIBILITY
373373+ helper.AddValue((hand.finger[1].proximal_swing.x - hand.finger[2].proximal_swing.x) *
374374+ kPlausibilityProximalSimilarity);
375375+ helper.AddValue((hand.finger[2].proximal_swing.x - hand.finger[3].proximal_swing.x) *
376376+ kPlausibilityProximalSimilarity);
377377+#endif
378378+379379+380380+ for (int finger_idx = 0; finger_idx < 4; finger_idx++) {
381381+ const OptimizerFinger<HandScalar> &finger_last = last_hand.finger[finger_idx];
382382+383383+ const OptimizerFinger<T> &finger = hand.finger[finger_idx];
384384+385385+ computeResidualStability_Finger(finger, finger_last, helper);
386386+ }
387387+}
388388+389389+template <typename T>
390390+inline void
391391+normalize_vector_inplace(Vec3<T> &vector)
392392+{
393393+ T len = (T)(0);
394394+395395+ len += vector.x * vector.x;
396396+ len += vector.y * vector.y;
397397+ len += vector.z * vector.z;
398398+399399+ len = sqrt(len);
400400+401401+ // This is *a* solution ;)
402402+ //!@todo any good template ways to get epsilon for float,double,jet?
403403+ if (len <= FLT_EPSILON) {
404404+ vector.z = T(-1);
405405+ return;
406406+ }
407407+408408+ vector.x /= len;
409409+ vector.y /= len;
410410+ vector.z /= len;
411411+}
412412+413413+// in size: 3, out size: 2
414414+template <typename T>
415415+static inline void
416416+unit_vector_stereographic_projection(const Vec3<T> &in, Vec2<T> &out)
417417+{
418418+ out.x = in.x / ((T)1 - in.z);
419419+ out.y = in.y / ((T)1 - in.z);
420420+}
421421+422422+423423+template <typename T>
424424+static inline void
425425+unit_xrt_vec3_stereographic_projection(const xrt_vec3 in, Vec2<T> &out)
426426+{
427427+ Vec3<T> vec;
428428+ vec.x = (T)(in.x);
429429+ vec.y = (T)(in.y);
430430+ vec.z = (T)(in.z);
431431+432432+ normalize_vector_inplace(vec);
433433+434434+ unit_vector_stereographic_projection(vec, out);
435435+}
436436+437437+template <typename T>
438438+static void
439439+diff(const Vec3<T> &model_joint_pos,
440440+ const Vec3<T> &move_joint_translation,
441441+ const Quat<T> &move_joint_orientation,
442442+ const StereographicObservation<HandScalar> &observation,
443443+ const HandScalar *confidences,
444444+ const HandScalar amount_we_care,
445445+ int &hand_joint_idx,
446446+ ResidualHelper<T> &helper)
447447+{
448448+449449+ Vec3<T> model_joint_dir_rel_camera;
450450+ UnitQuaternionRotatePoint(move_joint_orientation, model_joint_pos, model_joint_dir_rel_camera);
451451+452452+ model_joint_dir_rel_camera.x = model_joint_dir_rel_camera.x + move_joint_translation.x;
453453+ model_joint_dir_rel_camera.y = model_joint_dir_rel_camera.y + move_joint_translation.y;
454454+ model_joint_dir_rel_camera.z = model_joint_dir_rel_camera.z + move_joint_translation.z;
455455+456456+ normalize_vector_inplace(model_joint_dir_rel_camera);
457457+458458+ Vec2<T> stereographic_model_dir;
459459+ unit_vector_stereographic_projection(model_joint_dir_rel_camera, stereographic_model_dir);
460460+461461+462462+ const HandScalar confidence = confidences[hand_joint_idx] * amount_we_care;
463463+ const Vec2<T> &observed_ray_sg = observation.obs[hand_joint_idx];
464464+465465+ helper.AddValue((stereographic_model_dir.x - (T)(observed_ray_sg.x)) * confidence);
466466+ helper.AddValue((stereographic_model_dir.y - (T)(observed_ray_sg.y)) * confidence);
467467+468468+ hand_joint_idx++;
469469+}
470470+471471+472472+473473+template <typename T>
474474+void
475475+CostFunctor_PositionsPart(OptimizerHand<T> &hand, KinematicHandLM &state, ResidualHelper<T> &helper)
476476+{
477477+478478+ Translations55<T> translations_absolute;
479479+ Orientations54<T> orientations_absolute;
480480+481481+ HandScalar we_care_joint[] = {1.3, 0.9, 0.9, 1.3};
482482+ HandScalar we_care_finger[] = {1.0, 1.0, 0.8, 0.8, 0.8};
483483+484484+ eval_hand_with_orientation(hand, state.is_right, translations_absolute, orientations_absolute);
485485+486486+ for (int view = 0; view < 2; view++) {
487487+ if (!state.observation->views[view].active) {
488488+ continue;
489489+ }
490490+ Vec3<T> move_direction;
491491+ Quat<T> move_orientation;
492492+493493+ if (view == 0) {
494494+ move_direction = Vec3<T>::Zero();
495495+ move_orientation = Quat<T>::Identity();
496496+ } else {
497497+ move_direction.x = T(state.left_in_right_translation.x);
498498+ move_direction.y = T(state.left_in_right_translation.y);
499499+ move_direction.z = T(state.left_in_right_translation.z);
500500+501501+ move_orientation.w = T(state.left_in_right_orientation.w);
502502+ move_orientation.x = T(state.left_in_right_orientation.x);
503503+ move_orientation.y = T(state.left_in_right_orientation.y);
504504+ move_orientation.z = T(state.left_in_right_orientation.z);
505505+ }
506506+ int joint_acc_idx = 0;
507507+508508+ HandScalar *confidences = state.observation->views[view].confidences;
509509+510510+ diff<T>(hand.wrist_location, move_direction, move_orientation, state.sgo[view], confidences, 1.5,
511511+ joint_acc_idx, helper);
512512+513513+ for (int finger_idx = 0; finger_idx < 5; finger_idx++) {
514514+ for (int joint_idx = 0; joint_idx < 4; joint_idx++) {
515515+ diff<T>(translations_absolute.t[finger_idx][joint_idx + 1], move_direction,
516516+ move_orientation, state.sgo[view], confidences,
517517+ we_care_finger[finger_idx] * we_care_joint[joint_idx], joint_acc_idx, helper);
518518+ }
519519+ }
520520+ }
521521+}
522522+523523+// template <typename T>
524524+// void CostFunctor_HandSizePart(OptimizerHand<T> &hand, KinematicHandLM &state, T *residual, size_t &out_residual_idx)
525525+// {
526526+527527+// }
528528+529529+template <bool optimize_hand_size>
530530+template <typename T>
531531+bool
532532+CostFunctor<optimize_hand_size>::operator()(const T *const x, T *residual) const
533533+{
534534+ struct KinematicHandLM &state = this->parent;
535535+ OptimizerHand<T> hand = {};
536536+ // ??? should I do the below? probably.
537537+ Quat<T> tmp = this->parent.last_frame_pre_rotation;
538538+ OptimizerHandInit<T>(hand, tmp);
539539+ OptimizerHandUnpackFromVector(x, state.optimize_hand_size, T(state.target_hand_size), hand);
540540+541541+ XRT_MAYBE_UNUSED size_t residual_size =
542542+ calc_residual_size(state.use_stability, optimize_hand_size, state.num_observation_views);
543543+544544+// When you're hacking, you want to set the residuals to always-0 so that any of them you forget to touch keep their 0
545545+// gradient.
546546+// But then later this just becomes a waste.
547547+#if 0
548548+ for (size_t i = 0; i < residual_size; i++) {
549549+ residual[i] = (T)(0);
550550+ }
551551+#endif
552552+553553+ ResidualHelper<T> helper(residual);
554554+555555+ CostFunctor_PositionsPart(hand, state, helper);
556556+ computeResidualStability<optimize_hand_size, T>(hand, state.last_frame, state, helper);
557557+558558+ // Bounds checking - we should have written exactly to the end.
559559+ // U_LOG_E("%zu %zu", helper.out_residual_idx, residual_size);
560560+ assert(helper.out_residual_idx == residual_size);
561561+ // If you're hacking, feel free to turn this off; just remember to not write off the end, and to initialize
562562+ // everything somewhere (maybe change the above to an #if 1? )
563563+564564+ return true;
565565+}
566566+567567+568568+template <typename T>
569569+static inline void
570570+zldtt_ori_right(Quat<T> &orientation, xrt_quat *out)
571571+{
572572+ struct xrt_quat tmp;
573573+ tmp.w = orientation.w;
574574+ tmp.x = orientation.x;
575575+ tmp.y = orientation.y;
576576+ tmp.z = orientation.z;
577577+578578+ xrt_vec3 x = XRT_VEC3_UNIT_X;
579579+ xrt_vec3 z = XRT_VEC3_UNIT_Z;
580580+581581+ math_quat_rotate_vec3(&tmp, &x, &x);
582582+ math_quat_rotate_vec3(&tmp, &z, &z);
583583+584584+ // This is a very squashed change-of-basis from left-handed coordinate systems to right-handed coordinate
585585+ // systems: you multiply everything by (-1 0 0) then negate the X axis.
586586+587587+ x.y *= -1;
588588+ x.z *= -1;
589589+590590+ z.x *= -1;
591591+592592+ math_quat_from_plus_x_z(&x, &z, out);
593593+}
594594+595595+template <typename T>
596596+static inline void
597597+zldtt_ori_left(Quat<T> &orientation, xrt_quat *out)
598598+{
599599+ out->w = orientation.w;
600600+ out->x = orientation.x;
601601+ out->y = orientation.y;
602602+ out->z = orientation.z;
603603+}
604604+605605+template <typename T>
606606+static inline void
607607+zldtt(Vec3<T> &trans, Quat<T> &orientation, bool is_right, xrt_space_relation &out)
608608+{
609609+610610+ out.relation_flags = (enum xrt_space_relation_flags)(
611611+ XRT_SPACE_RELATION_ORIENTATION_VALID_BIT | XRT_SPACE_RELATION_ORIENTATION_TRACKED_BIT |
612612+ XRT_SPACE_RELATION_POSITION_VALID_BIT | XRT_SPACE_RELATION_POSITION_TRACKED_BIT);
613613+ out.pose.position.x = trans.x;
614614+ out.pose.position.y = trans.y;
615615+ out.pose.position.z = trans.z;
616616+ if (is_right) {
617617+ zldtt_ori_right(orientation, &out.pose.orientation);
618618+ } else {
619619+ zldtt_ori_left(orientation, &out.pose.orientation);
620620+ }
621621+}
622622+623623+static void
624624+eval_to_viz_hand(KinematicHandLM &state, xrt_hand_joint_set &out_viz_hand)
625625+{
626626+ XRT_TRACE_MARKER();
627627+628628+ //!@todo It's _probably_ fine to have the bigger size?
629629+ Eigen::Matrix<HandScalar, calc_input_size(true), 1> pose = state.TinyOptimizerInput.cast<HandScalar>();
630630+631631+ OptimizerHand<HandScalar> opt = {};
632632+ OptimizerHandInit(opt, state.last_frame_pre_rotation);
633633+ OptimizerHandUnpackFromVector(pose.data(), state.optimize_hand_size, state.target_hand_size, opt);
634634+635635+ Translations55<HandScalar> translations_absolute;
636636+ Orientations54<HandScalar> orientations_absolute;
637637+ // Vec3<HandScalar> translations_absolute[kNumFingers][kNumJointsInFinger];
638638+ // Quat<HandScalar> orientations_absolute[kNumFingers][kNumOrientationsInFinger];
639639+640640+ eval_hand_with_orientation(opt, state.is_right, translations_absolute, orientations_absolute);
641641+642642+ Quat<HandScalar> post_wrist_orientation;
643643+644644+ AngleAxisToQuaternion(opt.wrist_post_orientation_aax, post_wrist_orientation);
645645+646646+ Quat<HandScalar> pre_wrist_orientation = state.last_frame_pre_rotation;
647647+648648+ // for (int i = 0; i < 4; i++) {
649649+ // pre_wrist_orientation[i] = state.last_frame_pre_rotation[i];
650650+ // }
651651+652652+ Quat<HandScalar> final_wrist_orientation;
653653+654654+ QuaternionProduct(pre_wrist_orientation, post_wrist_orientation, final_wrist_orientation);
655655+656656+ int joint_acc_idx = 0;
657657+658658+ // Palm.
659659+660660+ Vec3<HandScalar> palm_position;
661661+ palm_position.x = (translations_absolute.t[2][0].x + translations_absolute.t[2][1].x) / 2;
662662+ palm_position.y = (translations_absolute.t[2][0].y + translations_absolute.t[2][1].y) / 2;
663663+ palm_position.z = (translations_absolute.t[2][0].z + translations_absolute.t[2][1].z) / 2;
664664+665665+ Quat<HandScalar> &palm_orientation = orientations_absolute.q[2][0];
666666+667667+ zldtt(palm_position, palm_orientation, state.is_right,
668668+ out_viz_hand.values.hand_joint_set_default[joint_acc_idx++].relation);
669669+670670+ // Wrist.
671671+ zldtt(opt.wrist_location, final_wrist_orientation, state.is_right,
672672+ out_viz_hand.values.hand_joint_set_default[joint_acc_idx++].relation);
673673+674674+ for (int finger = 0; finger < 5; finger++) {
675675+ for (int joint = 0; joint < 5; joint++) {
676676+ // This one is necessary
677677+ if (finger == 0 && joint == 0) {
678678+ continue;
679679+ }
680680+ Quat<HandScalar> *orientation;
681681+ if (joint != 4) {
682682+ orientation = &orientations_absolute.q[finger][joint];
683683+ } else {
684684+ orientation = &orientations_absolute.q[finger][joint - 1];
685685+ }
686686+ zldtt(translations_absolute.t[finger][joint], *orientation, state.is_right,
687687+ out_viz_hand.values.hand_joint_set_default[joint_acc_idx++].relation);
688688+ }
689689+ }
690690+ out_viz_hand.is_active = true;
691691+}
692692+693693+//!@todo
694694+// This reprojection error thing is probably the wrong way of doing it.
695695+// I think that the right way is to hack (?) TinySolver such that we get the last set of residuals back, and use those
696696+// somehow. Maybe scale them simply by the spread of the input hand rays? What about handling stereographic projections?
697697+// worth it? The main bit is that we should just use the last set of residuals somehow, calculating all this is a waste
698698+// of cycles when we have something that already should work.
699699+700700+static void
701701+repro_error_make_joint_ray(const xrt_hand_joint_value &joint, const xrt_pose &cam_pose, xrt_vec3 &out_ray)
702702+{
703703+ // by VALUE
704704+ xrt_vec3 position = joint.relation.pose.position;
705705+706706+ math_quat_rotate_vec3(&cam_pose.orientation, &position, &position);
707707+ position = position + cam_pose.position;
708708+709709+ out_ray = m_vec3_normalize(position);
710710+}
711711+712712+static enum xrt_hand_joint
713713+joint_from_21(int finger, int joint)
714714+{
715715+ if (finger > 0) {
716716+ return xrt_hand_joint(2 + (finger * 5) + joint);
717717+ } else {
718718+ return xrt_hand_joint(joint + 3);
719719+ }
720720+}
721721+722722+static inline float
723723+simple_vec3_difference(const xrt_vec3 one, const xrt_vec3 two)
724724+{
725725+ return (1.0 - m_vec3_dot(one, two));
726726+}
727727+728728+float
729729+reprojection_error_2d(KinematicHandLM &state, const one_frame_input &observation, const xrt_hand_joint_set &set)
730730+{
731731+ float final_err = 0;
732732+ int views_looked_at = 0;
733733+ for (int view = 0; view < 2; view++) {
734734+ if (!observation.views[view].active) {
735735+ continue;
736736+ }
737737+ views_looked_at++;
738738+ xrt_pose move_amount;
739739+740740+ if (view == 0) {
741741+ // left camera.
742742+ move_amount = XRT_POSE_IDENTITY;
743743+ } else {
744744+ move_amount = state.left_in_right;
745745+ }
746746+747747+ xrt_vec3 lm_rays[21];
748748+ const xrt_vec3 *obs_rays = observation.views[view].rays;
749749+750750+751751+ int acc_idx = 0;
752752+753753+ repro_error_make_joint_ray(set.values.hand_joint_set_default[XRT_HAND_JOINT_WRIST], move_amount,
754754+ lm_rays[acc_idx++]);
755755+756756+ for (int finger = 0; finger < 5; finger++) {
757757+ for (int joint = 0; joint < 4; joint++) {
758758+ repro_error_make_joint_ray(
759759+ set.values.hand_joint_set_default[joint_from_21(finger, joint)], move_amount,
760760+ lm_rays[acc_idx++]);
761761+ }
762762+ }
763763+764764+ xrt_vec3 obs_center = {};
765765+ xrt_vec3 lm_center = {};
766766+767767+ float err = 0;
768768+ float obs_spread = 0;
769769+ float lm_spread = 0;
770770+771771+ for (int i = 0; i < 21; i++) {
772772+ obs_center += obs_rays[i];
773773+ lm_center += lm_rays[i];
774774+ err += simple_vec3_difference(lm_rays[i], obs_rays[i]);
775775+ }
776776+777777+ math_vec3_scalar_mul(1.0f / 21.0f, &obs_center);
778778+ math_vec3_scalar_mul(1.0f / 21.0f, &lm_center);
779779+780780+ for (int i = 0; i < 21; i++) {
781781+ obs_spread += simple_vec3_difference(obs_center, obs_rays[i]);
782782+ }
783783+ for (int i = 0; i < 21; i++) {
784784+ lm_spread += simple_vec3_difference(lm_center, lm_rays[i]);
785785+ }
786786+787787+788788+789789+ // std::cout << err << std::endl;
790790+ // std::cout << err / (obs_spread + lm_spread) << std::endl;
791791+792792+ // return ;
793793+ final_err += err / (obs_spread + lm_spread);
794794+ }
795795+ return final_err / (float)views_looked_at;
796796+}
797797+798798+template <bool optimize_hand_size>
799799+inline float
800800+opt_run(KinematicHandLM &state, one_frame_input &observation, xrt_hand_joint_set &out_viz_hand)
801801+{
802802+ constexpr size_t input_size = calc_input_size(optimize_hand_size);
803803+804804+ size_t residual_size = calc_residual_size(state.use_stability, optimize_hand_size, state.num_observation_views);
805805+806806+ LM_DEBUG(state, "Running with %zu inputs and %zu residuals, viewed in %d cameras", input_size, residual_size,
807807+ state.num_observation_views);
808808+809809+ CostFunctor<optimize_hand_size> cf(state, residual_size);
810810+811811+ using AutoDiffCostFunctor =
812812+ ceres::TinySolverAutoDiffFunction<CostFunctor<optimize_hand_size>, Eigen::Dynamic, input_size, HandScalar>;
813813+814814+ AutoDiffCostFunctor f(cf);
815815+816816+817817+ // Okay I have no idea if this should be {}-initialized or not. Previous me seems to have thought no, but it
818818+ // works either way.
819819+ ceres::TinySolver<AutoDiffCostFunctor> solver = {};
820820+ solver.options.max_num_iterations = 50;
821821+ // We need to do a parameter sweep for the trust region and see what's fastest.
822822+ // solver.options.initial_trust_region_radius = 1e3;
823823+ solver.options.function_tolerance = 1e-6;
824824+825825+ Eigen::Matrix<HandScalar, input_size, 1> inp = state.TinyOptimizerInput.head<input_size>();
826826+827827+ XRT_MAYBE_UNUSED uint64_t start = os_monotonic_get_ns();
828828+ XRT_MAYBE_UNUSED auto summary = solver.Solve(f, &inp);
829829+ XRT_MAYBE_UNUSED uint64_t end = os_monotonic_get_ns();
830830+831831+ //!@todo Is there a zero-copy way of doing this?
832832+ state.TinyOptimizerInput.head<input_size>() = inp;
833833+834834+ if (state.log_level <= U_LOGGING_DEBUG) {
835835+836836+ uint64_t diff = end - start;
837837+ double time_taken = (double)diff / (double)U_TIME_1MS_IN_NS;
838838+839839+ const char *status;
840840+841841+ switch (summary.status) {
842842+ case 0: {
843843+ status = "GRADIENT_TOO_SMALL";
844844+ } break;
845845+ case 1: {
846846+ status = "RELATIVE_STEP_SIZE_TOO_SMALL";
847847+ } break;
848848+ case 2: {
849849+ status = "COST_TOO_SMALL";
850850+ } break;
851851+ case 3: {
852852+ status = "HIT_MAX_ITERATIONS";
853853+ } break;
854854+ case 4: {
855855+ status = "COST_CHANGE_TOO_SMALL";
856856+ } break;
857857+ }
858858+859859+ LM_DEBUG(state, "Status: %s, num_iterations %d, max_norm %E, gtol %E", status, summary.iterations,
860860+ summary.gradient_max_norm, solver.options.gradient_tolerance);
861861+ LM_DEBUG(state, "Took %f ms", time_taken);
862862+ if (summary.iterations < 3) {
863863+ LM_DEBUG(state, "Suspiciouisly low number of iterations!");
864864+ }
865865+ }
866866+ return 0;
867867+}
868868+869869+870870+void
871871+hand_was_untracked(KinematicHandLM *hand)
872872+{
873873+ hand->first_frame = true;
874874+ hand->last_frame_pre_rotation.w = 1.0;
875875+ hand->last_frame_pre_rotation.x = 0.0;
876876+ hand->last_frame_pre_rotation.y = 0.0;
877877+ hand->last_frame_pre_rotation.z = 0.0;
878878+879879+ OptimizerHandInit(hand->last_frame, hand->last_frame_pre_rotation);
880880+ OptimizerHandPackIntoVector(hand->last_frame, hand->optimize_hand_size, hand->TinyOptimizerInput.data());
881881+}
882882+883883+void
884884+optimizer_run(KinematicHandLM *hand,
885885+ one_frame_input &observation,
886886+ bool hand_was_untracked_last_frame,
887887+ bool optimize_hand_size,
888888+ float target_hand_size,
889889+ float hand_size_err_mul,
890890+ xrt_hand_joint_set &out_viz_hand,
891891+ float &out_hand_size,
892892+ float &out_reprojection_error) // NOLINT(bugprone-easily-swappable-parameters)
893893+{
894894+ KinematicHandLM &state = *hand;
895895+896896+ if (hand_was_untracked_last_frame) {
897897+ hand_was_untracked(hand);
898898+ }
899899+900900+ state.num_observation_views = 0;
901901+ for (int i = 0; i < 2; i++) {
902902+ if (observation.views[i].active) {
903903+ state.num_observation_views++;
904904+ }
905905+ }
906906+907907+ state.optimize_hand_size = optimize_hand_size;
908908+ state.target_hand_size = target_hand_size;
909909+ state.hand_size_err_mul = hand_size_err_mul;
910910+911911+ state.use_stability = !state.first_frame;
912912+913913+ state.observation = &observation;
914914+915915+ for (int i = 0; i < 21; i++) {
916916+ for (int view = 0; view < 2; view++) {
917917+ unit_xrt_vec3_stereographic_projection(observation.views[view].rays[i], state.sgo[view].obs[i]);
918918+ }
919919+ }
920920+921921+922922+ // For now, we have to statically instantiate different versions of the optimizer depending on how many input
923923+ // parameters there are. For now, there are only two cases - either we are optimizing the hand size or we are
924924+ // not optimizing it.
925925+ // !@todo Can we make a magic template that automatically instantiates the right one, and also make it so we can
926926+ // decide to either make the residual size dynamic or static? Currently, it's dynamic, which is easier for us
927927+ // and makes compile times a lot lower, but it probably makes things some amount slower at runtime.
928928+929929+ if (optimize_hand_size) {
930930+ opt_run<true>(state, observation, out_viz_hand);
931931+ } else {
932932+ opt_run<false>(state, observation, out_viz_hand);
933933+ }
934934+935935+936936+937937+ // Postfix - unpack,
938938+ OptimizerHandUnpackFromVector(state.TinyOptimizerInput.data(), state.optimize_hand_size, state.target_hand_size,
939939+ state.last_frame);
940940+941941+942942+943943+ // Squash the orientations
944944+ OptimizerHandSquashRotations(state.last_frame, state.last_frame_pre_rotation);
945945+946946+ // Repack - brings the curl values back into original domain. Look at ModelToLM/LMToModel, we're using sin/asin.
947947+ OptimizerHandPackIntoVector(state.last_frame, hand->optimize_hand_size, state.TinyOptimizerInput.data());
948948+949949+950950+951951+ eval_to_viz_hand(state, out_viz_hand);
952952+953953+ state.first_frame = false;
954954+955955+ out_hand_size = state.last_frame.hand_size;
956956+957957+ out_reprojection_error = reprojection_error_2d(state, observation, out_viz_hand);
958958+}
959959+960960+961961+962962+void
963963+optimizer_create(xrt_pose left_in_right, bool is_right, u_logging_level log_level, KinematicHandLM **out_kinematic_hand)
964964+{
965965+ KinematicHandLM *hand = new KinematicHandLM;
966966+967967+ hand->is_right = is_right;
968968+ hand->left_in_right = left_in_right;
969969+ hand->log_level = log_level;
970970+971971+ hand->left_in_right_translation.x = left_in_right.position.x;
972972+ hand->left_in_right_translation.y = left_in_right.position.y;
973973+ hand->left_in_right_translation.z = left_in_right.position.z;
974974+975975+ hand->left_in_right_orientation.w = left_in_right.orientation.w;
976976+ hand->left_in_right_orientation.x = left_in_right.orientation.x;
977977+ hand->left_in_right_orientation.y = left_in_right.orientation.y;
978978+ hand->left_in_right_orientation.z = left_in_right.orientation.z;
979979+980980+ // Probably unnecessary.
981981+ hand_was_untracked(hand);
982982+983983+ *out_kinematic_hand = hand;
984984+}
985985+986986+void
987987+optimizer_destroy(KinematicHandLM **hand)
988988+{
989989+ delete *hand;
990990+ hand = NULL;
991991+}
992992+} // namespace xrt::tracking::hand::mercury::lm