1- // File: exp_log.hpp
2- // Purpose: Provide routines for taking bivector/motor exponentials and
3- // logarithms.
4-
51#pragma once
62
7- #include " sse.hpp"
8- #include < cmath>
9-
10- namespace kln
11- {
12- inline namespace detail
13- {
14- // Partition memory layouts
15- // LSB --> MSB
16- // p0: (e0, e1, e2, e3)
17- // p1: (1, e23, e31, e12)
18- // p2: (e0123, e01, e02, e03)
19- // p3: (e123, e032, e013, e021)
20-
21- // a := p1
22- // b := p2
23- // a + b is a general bivector but it is most likely *non-simple* meaning
24- // that it is neither purely real nor purely ideal.
25- // Exponentiates the bivector and returns the motor defined by partitions 1
26- // and 2.
27- KLN_INLINE void KLN_VEC_CALL exp (__m128 const & a,
28- __m128 const & b,
29- __m128& p1_out,
30- __m128& p2_out)
31- {
32- // The exponential map produces a continuous group of rotations about an
33- // axis. We'd *like* to evaluate the exp(a + b) as exp(a)exp(b) but we
34- // cannot do that in general because a and b do not commute (consider
35- // the differences between the Taylor expansion of exp(ab) and
36- // exp(a)exp(b)).
37-
38- // First, we need to decompose the bivector into the sum of two
39- // commutative bivectors (the product of these two parts will be a
40- // scalar multiple of the pseudoscalar; see "Bivector times its ideal
41- // axis and vice versa in demo.klein"). To do this, we compute the
42- // squared norm of the bivector:
43- //
44- // NOTE: a sign flip is introduced since the square of a Euclidean
45- // line is negative
46- //
47- // (a1^2 + a2^2 + a3^2) - 2(a1 b1 + a2 b2 + a3 b3) e0123
48-
49- // Broadcast dot(a, a) ignoring the scalar component to all components
50- // of a2
51- __m128 a2 = _mm_dp_ps (a, a, 0b11101111 );
52- __m128 ab = _mm_dp_ps (a, b, 0b11101111 );
53-
54- // Next, we need the sqrt of that quantity. Since e0123 squares to 0,
55- // this has a closed form solution.
56- //
57- // sqrt(a1^2 + a2^2 + a3^2)
58- // - (a1 b1 + a2 b2 + a3 b3) / sqrt(a1^2 + a2^2 + a3^2) e0123
59- //
60- // (relabeling) = u + vI
61- //
62- // (square the above quantity yourself to quickly verify the claim)
63- // Maximum relative error < 1.5*2e-12
64- __m128 a2_sqrt_rcp = _mm_rsqrt_ps (a2);
65- __m128 u = _mm_rcp_ps (a2_sqrt_rcp);
66- // Don't forget the minus later!
67- __m128 minus_v = _mm_mul_ps (ab, a2_sqrt_rcp);
68-
69- // Last, we need the reciprocal of the norm to compute the normalized
70- // bivector.
71- //
72- // 1 / sqrt(a1^2 + a2^2 + a3^2)
73- // + (a1 b1 + a2 b2 + a3 b3) / (a1^2 + a2^2 + a3^2)^(3/2) e0123
74- //
75- // The original bivector * the inverse norm gives us a normalized
76- // bivector.
77- __m128 norm_real = _mm_mul_ps (a, a2_sqrt_rcp);
78- __m128 norm_ideal = _mm_mul_ps (b, a2_sqrt_rcp);
79- // The real part of the bivector also interacts with the pseudoscalar to
80- // produce a portion of the normalized ideal part
81- // e12 e0123 = -e03, e31 e0123 = -e02, e23 e0123 = -e01
82- // Notice how the products above actually commute
83- norm_ideal = _mm_sub_ps (
84- norm_ideal,
85- _mm_mul_ps (
86- a, _mm_mul_ps (ab, _mm_mul_ps (a2_sqrt_rcp, _mm_rcp_ps (a2)))));
87-
88- // The norm * our normalized bivector is the original bivector (a + b).
89- // Thus, we have:
90- //
91- // (u + vI)n = u n + v n e0123
92- //
93- // Note that n and n e0123 are perpendicular (n e0123 lies on the ideal
94- // plane, and all ideal components of n are extinguished after
95- // polarization). As a result, we can now decompose the exponential.
96- //
97- // e^(u n + v n e0123) = e^(u n) e^(v n e0123) =
98- // (cosu + sinu n) * (1 + v n e0123) =
99- // cosu + sinu n + v n cosu e0123 + v sinu n^2 e0123 =
100- // cosu + sinu n + v n cosu e0123 - v sinu e0123
101- //
102- // where we've used the fact that n is normalized and squares to -1.
103- float uv[2 ];
104- _mm_store_ss (uv, u);
105- // Note the v here corresponds to minus_v
106- _mm_store_ss (uv + 1 , minus_v);
107-
108- float sincosu[2 ];
109- sincosu[0 ] = std::sin (uv[0 ]);
110- sincosu[1 ] = std::cos (uv[0 ]);
111-
112- __m128 sinu = _mm_set1_ps (sincosu[0 ]);
113- p1_out = _mm_add_ps (_mm_set_ss (sincosu[1 ]), _mm_mul_ps (sinu, norm_real));
114-
115- // The second partition has contributions from both the real and ideal
116- // parts.
117- __m128 cosu = _mm_set_ps (sincosu[1 ], sincosu[1 ], sincosu[1 ], 0 .f );
118- __m128 minus_vcosu = _mm_mul_ps (minus_v, cosu);
119- p2_out = _mm_mul_ps (sinu, norm_ideal);
120- p2_out = _mm_add_ps (p2_out, _mm_mul_ps (minus_vcosu, norm_real));
121- float minus_vsinu = uv[1 ] * sincosu[0 ];
122- p2_out = _mm_add_ps (_mm_set_ss (minus_vsinu), p2_out);
123- }
124-
125- KLN_INLINE void KLN_VEC_CALL log (__m128 const & p1,
126- __m128 const & p2,
127- __m128& p1_out,
128- __m128& p2_out)
129- {
130- // The logarithm follows from the derivation of the exponential. Working
131- // backwards, we ended up computing the exponential like so:
132- //
133- // cosu + sinu n + v n cosu e0123 - v sinu e0123 =
134- // (cosu - v sinu e0123) + (sinu + v cosu e0123) n
135- //
136- // where n is the normalized bivector. If we compute the norm, that will
137- // allow us to match it to sinu + vcosu e0123, which will then allow us
138- // to deduce u and v.
139-
140- // The first thing we need to do is extract only the bivector components
141- // from the motor.
142- __m128 bv_mask = _mm_set_ps (1 .f , 1 .f , 1 .f , 0 .f );
143- __m128 a = _mm_mul_ps (bv_mask, p1);
144- __m128 b = _mm_mul_ps (bv_mask, p2);
145-
146- // Next, we need to compute the norm as in the exponential.
147- __m128 a2 = _mm_dp_ps (a, a, 0b11101111 );
148- // TODO: handle case when a2 is 0
149- __m128 ab = _mm_dp_ps (a, b, 0b11101111 );
150- __m128 s = _mm_sqrt_ps (a2);
151- __m128 a2_sqrt_rcp = _mm_rcp_ps (s);
152- __m128 minus_t = _mm_mul_ps (ab, a2_sqrt_rcp);
153- // s + t e0123 is the norm of our bivector.
154-
155- // Store the scalar component
156- float p;
157- _mm_store_ss (&p, p1);
158-
159- // Store the pseudoscalar component
160- float q;
161- _mm_store_ss (&q, p2);
162-
163- float s_scalar;
164- _mm_store_ss (&s_scalar, s);
165- float t_scalar;
166- _mm_store_ss (&t_scalar, minus_t );
167- t_scalar *= -1 .f ;
168- // p = cosu
169- // q = -v sinu
170- // s_scalar = sinu
171- // t_scalar = v cosu
172-
173- bool p_zero = std::abs (p) < 1e-6 ;
174- float u = p_zero ? std::atan2 (-q, t_scalar) : std::atan2 (s_scalar, p);
175- float v = p_zero ? -q / s_scalar : t_scalar / p;
176-
177- // Now, (u + v e0123) * n when exponentiated will give us the motor, so
178- // (u + v e0123) * n is the logarithm. To proceed, we need to compute
179- // the normalized bivector.
180- __m128 norm_real = _mm_mul_ps (a, a2_sqrt_rcp);
181- __m128 norm_ideal = _mm_mul_ps (b, a2_sqrt_rcp);
182- norm_ideal = _mm_sub_ps (
183- norm_ideal,
184- _mm_mul_ps (
185- a, _mm_mul_ps (ab, _mm_mul_ps (a2_sqrt_rcp, _mm_rcp_ps (a2)))));
186-
187- __m128 uvec = _mm_set1_ps (u);
188- p1_out = _mm_mul_ps (uvec, norm_real);
189- p2_out = _mm_mul_ps (uvec, norm_ideal);
190- p2_out = _mm_sub_ps (p2_out, _mm_mul_ps (_mm_set1_ps (v), norm_real));
191- }
192- } // namespace detail
193- } // namespace kln
3+ #include " x86/x86_exp_log.hpp"
0 commit comments