-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy path0003-Make-assignment-constexpr.patch
305 lines (286 loc) · 16.4 KB
/
0003-Make-assignment-constexpr.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Tyler Veness <[email protected]>
Date: Sun, 12 Jan 2025 21:04:07 -0800
Subject: [PATCH 3/3] Make assignment constexpr
---
Eigen/src/Core/AssignEvaluator.h | 165 +++++++++++--------
Eigen/src/Core/EigenBase.h | 2 +-
Eigen/src/Core/functors/AssignmentFunctors.h | 2 +-
3 files changed, 102 insertions(+), 67 deletions(-)
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index f7f0b238b8ca70bbc9100262479cc1dbebab9979..9c2436afa7fe98692a036e6ef255ed104a5bf388 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -263,7 +263,7 @@ struct copy_using_evaluator_innervec_CompleteUnrolling {
DstAlignment = Kernel::AssignmentTraits::DstAlignment
};
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, inner);
enum { NextIndex = Index + unpacket_traits<PacketType>::size };
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, NextIndex, Stop>::run(kernel);
@@ -431,17 +431,25 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, NoUnrolling> {
template <typename Kernel>
struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrolling> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- typedef typename Kernel::PacketType PacketType;
-
- enum {
- size = DstXprType::SizeAtCompileTime,
- packetSize = unpacket_traits<PacketType>::size,
- alignedSize = (int(size) / packetSize) * packetSize
- };
-
- copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+ if (internal::is_constant_evaluated()) {
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
+ for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ }
+ }
+ } else {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ typedef typename Kernel::PacketType PacketType;
+
+ enum {
+ size = DstXprType::SizeAtCompileTime,
+ packetSize = unpacket_traits<PacketType>::size,
+ alignedSize = (int(size) / packetSize) * packetSize
+ };
+
+ copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, alignedSize, size>::run(kernel);
+ }
}
};
@@ -465,9 +473,17 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, NoUnrolling> {
template <typename Kernel>
struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, CompleteUnrolling> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) {
+ if (internal::is_constant_evaluated()) {
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
+ for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ }
+ }
+ } else {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
}
};
@@ -498,8 +514,16 @@ struct dense_assignment_loop<Kernel, LinearTraversal, NoUnrolling> {
template <typename Kernel>
struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
- typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
- copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ if (internal::is_constant_evaluated()) {
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
+ for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ }
+ }
+ } else {
+ typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
+ copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, 0, DstXprType::SizeAtCompileTime>::run(kernel);
+ }
}
};
@@ -510,41 +534,49 @@ struct dense_assignment_loop<Kernel, LinearTraversal, CompleteUnrolling> {
template <typename Kernel>
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling> {
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
- typedef typename Kernel::Scalar Scalar;
- typedef typename Kernel::PacketType PacketType;
- enum {
- packetSize = unpacket_traits<PacketType>::size,
- requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
- alignable =
- packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar),
- dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment),
- dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment)
- };
- const Scalar* dst_ptr = kernel.dstDataPtr();
- if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) {
- // the pointer is not aligned-on scalar, so alignment is not possible
- return dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>::run(kernel);
- }
- const Index packetAlignedMask = packetSize - 1;
- const Index innerSize = kernel.innerSize();
- const Index outerSize = kernel.outerSize();
- const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
- Index alignedStart =
- ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
-
- for (Index outer = 0; outer < outerSize; ++outer) {
- const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask);
- // do the non-vectorizable part of the assignment
- for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
-
- // do the vectorizable part of the assignment
- for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize)
- kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
-
- // do the non-vectorizable part of the assignment
- for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
-
- alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize);
+ if (internal::is_constant_evaluated()) {
+ for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
+ for (Index inner = 0; inner < kernel.innerSize(); ++inner) {
+ kernel.assignCoeffByOuterInner(outer, inner);
+ }
+ }
+ } else {
+ typedef typename Kernel::Scalar Scalar;
+ typedef typename Kernel::PacketType PacketType;
+ enum {
+ packetSize = unpacket_traits<PacketType>::size,
+ requestedAlignment = int(Kernel::AssignmentTraits::InnerRequiredAlignment),
+ alignable =
+ packet_traits<Scalar>::AlignedOnScalar || int(Kernel::AssignmentTraits::DstAlignment) >= sizeof(Scalar),
+ dstIsAligned = int(Kernel::AssignmentTraits::DstAlignment) >= int(requestedAlignment),
+ dstAlignment = alignable ? int(requestedAlignment) : int(Kernel::AssignmentTraits::DstAlignment)
+ };
+ const Scalar* dst_ptr = kernel.dstDataPtr();
+ if ((!bool(dstIsAligned)) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) {
+ // the pointer is not aligned-on scalar, so alignment is not possible
+ return dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>::run(kernel);
+ }
+ const Index packetAlignedMask = packetSize - 1;
+ const Index innerSize = kernel.innerSize();
+ const Index outerSize = kernel.outerSize();
+ const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0;
+ Index alignedStart =
+ ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned<requestedAlignment>(dst_ptr, innerSize);
+
+ for (Index outer = 0; outer < outerSize; ++outer) {
+ const Index alignedEnd = alignedStart + ((innerSize - alignedStart) & ~packetAlignedMask);
+ // do the non-vectorizable part of the assignment
+ for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
+
+ // do the vectorizable part of the assignment
+ for (Index inner = alignedStart; inner < alignedEnd; inner += packetSize)
+ kernel.template assignPacketByOuterInner<dstAlignment, Unaligned, PacketType>(outer, inner);
+
+ // do the non-vectorizable part of the assignment
+ for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
+
+ alignedStart = numext::mini((alignedStart + alignedStep) % packetSize, innerSize);
+ }
}
}
};
@@ -594,9 +626,9 @@ class generic_dense_assignment_kernel {
typedef copy_using_evaluator_traits<DstEvaluatorTypeT, SrcEvaluatorTypeT, Functor> AssignmentTraits;
typedef typename AssignmentTraits::PacketType PacketType;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE generic_dense_assignment_kernel(DstEvaluatorType& dst,
- const SrcEvaluatorType& src,
- const Functor& func, DstXprType& dstExpr)
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst, const SrcEvaluatorType& src,
+ const Functor& func, DstXprType& dstExpr)
: m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) {
#ifdef EIGEN_DEBUG_ASSIGN
AssignmentTraits::debug();
@@ -614,7 +646,7 @@ class generic_dense_assignment_kernel {
EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const EIGEN_NOEXCEPT { return m_src; }
/// Assign src(row,col) to dst(row,col) through the assignment functor.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index row, Index col) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) {
m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col));
}
@@ -624,7 +656,7 @@ class generic_dense_assignment_kernel {
}
/// \sa assignCoeff(Index,Index)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeffByOuterInner(Index outer, Index inner) {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) {
Index row = rowIndexByOuterInner(outer, inner);
Index col = colIndexByOuterInner(outer, inner);
assignCoeff(row, col);
@@ -648,7 +680,7 @@ class generic_dense_assignment_kernel {
assignPacket<StoreMode, LoadMode, Packet>(row, col);
}
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
: int(Traits::ColsAtCompileTime) == 1 ? inner
@@ -656,7 +688,7 @@ class generic_dense_assignment_kernel {
: inner;
}
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) {
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
: int(Traits::RowsAtCompileTime) == 1 ? inner
@@ -708,8 +740,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, co
}
template <typename DstXprType, typename SrcXprType, typename T1, typename T2>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
- const internal::assign_op<T1, T2>& /*func*/) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src,
+ const internal::assign_op<T1, T2>& /*func*/) {
Index dstRows = src.rows();
Index dstCols = src.cols();
if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols);
@@ -790,7 +822,7 @@ struct Assignment;
// not has to bother about these annoying details.
template <typename Dst, typename Src>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(Dst& dst, const Src& src) {
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar, typename Src::Scalar>());
}
template <typename Dst, typename Src>
@@ -807,7 +839,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void call_assignment(
}
template <typename Dst, typename Src, typename Func>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(
Dst& dst, const Src& src, const Func& func, std::enable_if_t<!evaluator_assume_aliasing<Src>::value, void*> = 0) {
call_assignment_no_alias(dst, src, func);
}
@@ -891,9 +923,12 @@ EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src);
// both partial specialization+SFINAE without ambiguous specialization
template <typename DstXprType, typename SrcXprType, typename Functor, typename Weak>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Weak> {
- EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func) {
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src,
+ const Functor& func) {
#ifndef EIGEN_NO_DEBUG
- internal::check_for_aliasing(dst, src);
+ if (!internal::is_constant_evaluated()) {
+ internal::check_for_aliasing(dst, src);
+ }
#endif
call_dense_assignment_loop(dst, src, func);
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 6d167006a094181fa3693b19f6b9daeb6f2afb79..894bfc13b15eb994abd90f100da15de5bd8b22b7 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -50,7 +50,7 @@ struct EigenBase {
/** \returns a const reference to the derived object */
EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast<const Derived*>(this); }
- EIGEN_DEVICE_FUNC inline Derived& const_cast_derived() const {
+ EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const {
return *static_cast<Derived*>(const_cast<EigenBase*>(this));
}
EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast<const Derived*>(this); }
diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h
index 09d1da8ca2bcb41384520f46e2b793ba8b28a798..3687bb20db4dfe1a2f6cf1342b4fcbd8f91f1f68 100644
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@@ -23,7 +23,7 @@ namespace internal {
*/
template <typename DstScalar, typename SrcScalar>
struct assign_op {
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
template <int Alignment, typename Packet>
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const {