The primary limitation of Motion Matching is that the memory usage (and run time performance to some degree) scale linearly with the amount of data that is provided and the number of features to be matched.
Inspiration
Recent methods have shown that neural-network-based models can be effectively applied to generate realistic motion in a number of difficult cases including navigation over rough terrain [Holden et al. 2017], quadruped motion [Zhang et al. 2018], and interactions with the environment or other characters [Lee et al. 2018; Starke et al. 2019].
Yet, such models are often difficut to control, have unpredictable behaviour, long training times, and can produce animation of lower quality than that of the original training set.
Methods
Basic Motion Matching
a feature vector $\pmb{x}$, which describes the features we wish to match at each frame. $$ \pmb{x} = \lbrace \pmb{t^t} \pmb{t^d} \pmb{f^t} \pmb{\dot{f}^t} \pmb{\dot{h}^t}\rbrace \in R^{27} $$
$\pmb{t^t} \in R^6$, 2D future trajectory positions projected on the ground, 20, 40, and 60 frames in the future (at 60Hz) local to the character.
$\pmb{t^d} \in R^6$, the future trajectory facing directions 20, 40, and 60 frames in the future local to the character.
$\pmb{f^t} \in R^6$, the two foot joint positions local to the character.
$\pmb{\dot{f}^t} \in R^6$, the two foot joint velocities local to the character.
$\pmb{\dot{h}^t} \in R^3$, the hip joint velocity local to the character.
//Z-score normalization: (x - mu) / std voidnormalize_feature( slice2d<float> features, slice1d<float> features_offset, slice1d<float> features_scale, constint offset, constintsize, constfloat weight = 1.0f) { // First compute what is essentially the mean // value for each feature dimension for (int j = 0; j < size; j++) { features_offset(offset + j) = 0.0f; } for (int i = 0; i < features.rows; i++) { for (int j = 0; j < size; j++) { features_offset(offset + j) += features(i, offset + j) / features.rows; } } // Now compute the variance of each feature dimension array1d<float> vars(size); vars.zero(); for (int i = 0; i < features.rows; i++) { for (int j = 0; j < size; j++) { vars(j) += squaref(features(i, offset + j) - features_offset(offset + j)) / features.rows; } } // We compute the overall std of the feature as the average // std across all dimensions floatstd = 0.0f; for (int j = 0; j < size; j++) { std += sqrtf(vars(j)) / size; } // Features with no variation can have zero std which is // almost always a bug. assert(std > 0.0); // The scale of a feature is just the std divided by the weight for (int j = 0; j < size; j++) { features_scale(offset + j) = std / weight; } // Using the offset and scale we can then normalize the features for (int i = 0; i < features.rows; i++) { for (int j = 0; j < size; j++) { features(i, offset + j) = (features(i, offset + j) - features_offset(offset + j)) / features_scale(offset + j); } } }
a pose vector $\pmb{y}$, which contains all the pose information for a single frame of animation.
$\pmb{y^t} \pmb{y^r}$ are the joint local translations and rotations, local to the character forward facing direction.
$\pmb{\dot{y}^t} \pmb{\dot{y}^r}$ are the joint local translational and rotational velocities, local to the character forward facing direction.
$\pmb{\dot{r}^t} \pmb{\dot{r}^r}$ are the character root translational and rotational velocity, local to the character forward facing direction.
$\pmb{o^{\ast}}$ are all the other additional task specific outputs, such as foot contact information, the position or trajectory of other characters in the scene, or the future positions of some joints of the character.
at runtime, every $N$ frames, or when the user input changes significantly, a query vector $\pmb{\hat{x}}$ is constructed which contains the desired feature vector. To control the features corresponding to the future trajectory position and direction we use a spring-damper based system which generates a desired velocity and direction based on the current state of the joystick.
/* * convert the gamepad stick direction into a desired target velocity in the world space, * allowing for separate speeds for forward, backward, and sideways movement */ vec3 desired_velocity_update( const vec3 gamepadstick_left, constfloat camera_azimuth, const quat simulation_rotation, constfloat fwrd_speed, constfloat side_speed, constfloat back_speed) { // Find stick position in world space by rotating using camera azimuth vec3 global_stick_direction = quat_mul_vec3( quat_from_angle_axis(camera_azimuth, vec3(0, 1, 0)), gamepadstick_left); // Find stick position local to current facing direction vec3 local_stick_direction = quat_inv_mul_vec3( simulation_rotation, global_stick_direction); // Scale stick by forward, sideways and backwards speeds vec3 local_desired_velocity = local_stick_direction.z > 0.0 ? vec3(side_speed, 0.0f, fwrd_speed) * local_stick_direction : vec3(side_speed, 0.0f, back_speed) * local_stick_direction; // Re-orientate into the world space return quat_mul_vec3(simulation_rotation, local_desired_velocity); } quat desired_rotation_update( const quat desired_rotation, const vec3 gamepadstick_left, const vec3 gamepadstick_right, constfloat camera_azimuth, constbool desired_strafe, const vec3 desired_velocity) { quat desired_rotation_curr = desired_rotation; // If strafe is active then desired direction is coming from right // stick as long as that stick is being used, otherwise we assume // forward facing if (desired_strafe) { vec3 desired_direction = quat_mul_vec3(quat_from_angle_axis(camera_azimuth, vec3(0, 1, 0)), vec3(0, 0, -1));
if (length(gamepadstick_right) > 0.01f) { desired_direction = quat_mul_vec3(quat_from_angle_axis(camera_azimuth, vec3(0, 1, 0)), normalize(gamepadstick_right)); } return quat_from_angle_axis(atan2f(desired_direction.x, desired_direction.z), vec3(0, 1, 0)); } // If strafe is not active the desired direction comes from the left // stick as long as that stick is being used elseif (length(gamepadstick_left) > 0.01f) { vec3 desired_direction = normalize(desired_velocity); return quat_from_angle_axis(atan2f(desired_direction.x, desired_direction.z), vec3(0, 1, 0)); } // Otherwise desired direction remains the same else { return desired_rotation_curr; } }
find the entry in the Matching Database which minimizes the squared euclidean distance to the query vector. $$ k^{\ast} = \underbrace{argmin}_{k} ||\pmb{\hat{x}} - \pmb{x_k}||^2 $$
// Motion Matching search function essentially consists // of comparing every feature vector in the database, // against the query feature vector, first checking the // query distance to the axis aligned bounding boxes used // for the acceleration structure. voidmotion_matching_search( int& __restrict__ best_index, float& __restrict__ best_cost, const slice1d<int> range_starts, const slice1d<int> range_stops, const slice2d<float> features, const slice1d<float> features_offset, const slice1d<float> features_scale, const slice2d<float> bound_sm_min, const slice2d<float> bound_sm_max, const slice2d<float> bound_lr_min, const slice2d<float> bound_lr_max, const slice1d<float> query_normalized, constfloat transition_cost, constint ignore_range_end, constint ignore_surrounding) { int nfeatures = query_normalized.size; int nranges = range_starts.size; int curr_index = best_index; // Find cost for current frame if (best_index != -1) { best_cost = 0.0; for (int i = 0; i < nfeatures; i++) { best_cost += squaref(query_normalized(i) - features(best_index, i)); } } float curr_cost = 0.0f; // Search rest of database for (int r = 0; r < nranges; r++) { // Exclude end of ranges from search int i = range_starts(r); int range_end = range_stops(r) - ignore_range_end; while (i < range_end) { // Find index of current and next large box int i_lr = i / BOUND_LR_SIZE; int i_lr_next = (i_lr + 1) * BOUND_LR_SIZE; // Find distance to box curr_cost = transition_cost; for (int j = 0; j < nfeatures; j++) { curr_cost += squaref(query_normalized(j) - clampf(query_normalized(j), bound_lr_min(i_lr, j), bound_lr_max(i_lr, j))); if (curr_cost >= best_cost) { break; } } // If distance is greater than current best jump to next box if (curr_cost >= best_cost) { i = i_lr_next; continue; } // Check against small box while (i < i_lr_next && i < range_end) { // Find index of current and next small box int i_sm = i / BOUND_SM_SIZE; int i_sm_next = (i_sm + 1) * BOUND_SM_SIZE; // Find distance to box curr_cost = transition_cost; for (int j = 0; j < nfeatures; j++) { curr_cost += squaref(query_normalized(j) - clampf(query_normalized(j), bound_sm_min(i_sm, j), bound_sm_max(i_sm, j))); if (curr_cost >= best_cost) { break; } } // If distance is greater than current best jump to next box if (curr_cost >= best_cost) { i = i_sm_next; continue; } // Search inside small box while (i < i_sm_next && i < range_end) { // Skip surrounding frames if (curr_index != - 1 && abs(i - curr_index) < ignore_surrounding) { i++; continue; } // Check against each frame inside small box curr_cost = transition_cost; for (int j = 0; j < nfeatures; j++) { curr_cost += squaref(query_normalized(j) - features(i, j)); if (curr_cost >= best_cost) { break; } } // If cost is lower than current best then update best if (curr_cost < best_cost) { best_index = i; best_cost = curr_cost; } i++; } } } } }
Once found, if the current frame $i$ does not match the nearest frame, $i$ , $k^{\ast}$, then animation playback continues from this point $i := k^{\ast}$, and a transition is inserted using inertialization blending [Bollo 2016, 2017].