@@ -591,6 +591,62 @@ void ComposeXinvXImpl(const double* X_BA, const double* X_BC, double* X_AC) {
591
591
hn::StoreN (stu_, tag, X_AC + 9 , 3 ); // 3-wide write to stay in bounds
592
592
}
593
593
594
+ /* Re-express SpatialVector via V_A = R_AB * V_B.
595
+
596
+ R_AB is 9 consecutive doubles in column-major order, the vectors are 6
597
+ consecutive elements comprising two independent 3-vectors. It is allowable for
598
+ V_A and V_B to be the same memory.
599
+
600
+ R_AB = abcdefghi
601
+ V_B = xyzrst
602
+ V_A = XYZRST
603
+
604
+ We want to perform two matrix-vector products:
605
+
606
+ V_A R_AB V_B
607
+
608
+ X a d g x
609
+ Y = b e h @ y 15 flops
610
+ Z c f i z
611
+
612
+ R a d g r
613
+ S = b e h @ s 15 flops
614
+ T c f i t
615
+
616
+ We can do this in 6 SIMD instructions. We end up doing 40 flops and throwing
617
+ 10 of them away.
618
+ */
619
+ void ReexpressSpatialVectorImpl (const double * R_AB,
620
+ const double * V_B,
621
+ double * V_A) {
622
+ const hn::FixedTag<double , 4 > tag;
623
+
624
+ const auto abc_ = hn::LoadU (tag, R_AB); // (d is loaded but unused)
625
+ const auto def_ = hn::LoadU (tag, R_AB + 3 ); // (g is loaded but unused)
626
+ const auto ghi_ = hn::LoadN (tag, R_AB + 6 , 3 );
627
+
628
+ const auto xxx_ = hn::Set (tag, V_B[0 ]);
629
+ const auto yyy_ = hn::Set (tag, V_B[1 ]);
630
+ const auto zzz_ = hn::Set (tag, V_B[2 ]);
631
+
632
+ // Vector XYZ: X Y Z _
633
+ auto XYZ_ = hn::Mul (abc_, xxx_); // ax bx cx _
634
+ XYZ_ = hn::MulAdd (def_, yyy_, XYZ_); // +dy +ey +fy _
635
+ XYZ_ = hn::MulAdd (ghi_, zzz_, XYZ_); // +gz +hz +iz _
636
+
637
+ const auto rrr_ = hn::Set (tag, V_B[3 ]);
638
+ const auto sss_ = hn::Set (tag, V_B[4 ]);
639
+ const auto ttt_ = hn::Set (tag, V_B[5 ]);
640
+
641
+ // Vector RST: R S T _
642
+ auto RST_ = hn::Mul (abc_, rrr_); // ar br cr _
643
+ RST_ = hn::MulAdd (def_, sss_, RST_); // +ds +es +fs _
644
+ RST_ = hn::MulAdd (ghi_, ttt_, RST_); // +gt +ht +it _
645
+
646
+ hn::StoreU (XYZ_, tag, V_A); // 4-wide write temporarily overwrites R
647
+ hn::StoreN (RST_, tag, V_A + 3 , 3 ); // 3-wide write to stay in bounds
648
+ }
649
+
594
650
#else // HWY_MAX_BYTES
595
651
596
652
/* The portable versions are always defined. They should be written to maximize
@@ -697,6 +753,20 @@ void ComposeXinvXImpl(const double* X_BA, const double* X_BC, double* X_AC) {
697
753
ComposeXinvXNoAlias (X_BA, X_BC, X_AC_temp);
698
754
std::copy (X_AC_temp, X_AC_temp + 12 , X_AC);
699
755
}
756
+ void ReexpressSpatialVectorImpl (const double * R_AB,
757
+ const double * V_B,
758
+ double * V_A) {
759
+ DRAKE_ASSERT (V_A != nullptr );
760
+ double x, y, z; // Protect from overlap with V_B.
761
+ x = row_x_col (&R_AB[0 ], &V_B[0 ]);
762
+ y = row_x_col (&R_AB[1 ], &V_B[0 ]);
763
+ z = row_x_col (&R_AB[2 ], &V_B[0 ]);
764
+ V_A[0 ] = x; V_A[1 ] = y; V_A[2 ] = z;
765
+ x = row_x_col (&R_AB[0 ], &V_B[3 ]);
766
+ y = row_x_col (&R_AB[1 ], &V_B[3 ]);
767
+ z = row_x_col (&R_AB[2 ], &V_B[3 ]);
768
+ V_A[3 ] = x; V_A[4 ] = y; V_A[5 ] = z;
769
+ }
700
770
701
771
#endif // HWY_MAX_BYTES
702
772
@@ -732,6 +802,10 @@ HWY_EXPORT(ComposeXinvXImpl);
732
802
struct ChooseBestComposeXinvX {
733
803
auto operator ()() { return HWY_DYNAMIC_POINTER (ComposeXinvXImpl); }
734
804
};
805
+ HWY_EXPORT (ReexpressSpatialVectorImpl);
806
+ struct ChooseBestReexpressSpatialVector {
807
+ auto operator ()() { return HWY_DYNAMIC_POINTER (ReexpressSpatialVectorImpl); }
808
+ };
735
809
736
810
// These sugar functions convert C++ types into bare arrays.
737
811
const double * GetRawData (const RotationMatrix<double >& R) {
@@ -750,6 +824,12 @@ double* GetRawData(RigidTransform<double>* X) {
750
824
// the rotation matrix first followed by the translation.
751
825
return const_cast <double *>(X->rotation ().matrix ().data ());
752
826
}
827
+ const double * GetRawData (const Vector6<double >& V) {
828
+ return V.data ();
829
+ }
830
+ double * GetRawData (Vector6<double >* V) {
831
+ return V->data ();
832
+ }
753
833
754
834
} // namespace
755
835
@@ -781,6 +861,13 @@ void ComposeXinvX(const RigidTransform<double>& X_BA,
781
861
GetRawData (X_BA), GetRawData (X_BC), GetRawData (X_AC));
782
862
}
783
863
864
+ void ReexpressSpatialVector (const RotationMatrix<double >& R_AB,
865
+ const Vector6<double >& V_B,
866
+ Vector6<double >* V_A) {
867
+ LateBoundFunction<ChooseBestReexpressSpatialVector>::Call (
868
+ GetRawData (R_AB), GetRawData (V_B), GetRawData (V_A));
869
+ }
870
+
784
871
} // namespace internal
785
872
} // namespace math
786
873
} // namespace drake
0 commit comments