Skip to content

Commit b32e806

Browse files
authored
Merge pull request #4357 from pleroy/AVXSSEBuild
A mechanism for building SSE and AVX binaries separately with Clang
2 parents c938a6f + 41ec51d commit b32e806

File tree

4 files changed

+99
-66
lines changed

4 files changed

+99
-66
lines changed

Makefile

Lines changed: 67 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
.SECONDEXPANSION:
22
PERCENT := %
33

4-
# detect OS
4+
# Detect OS.
55
UNAME_S := $(shell uname -s)
66
UNAME_M := $(shell uname -m)
77

88
CXX := clang++
99
MSBUILD := msbuild
10-
OSX_DEPLOYMENT_TARGET ?= 13
10+
PRINCIPIA_CLANG_VERSION ?= 20
11+
PRINCIPIA_MACOS_VERSION_MIN ?= 13
12+
PRINCIPIA_TARGET ?= x64
13+
ifneq ($(PRINCIPIA_TARGET),x64_AVX_FMA)
14+
ifneq ($(PRINCIPIA_TARGET),x64)
15+
$(error PRINCIPIA_TARGET must be 'x64_AVX_FMA' or 'x64')
16+
endif
17+
endif
1118

1219
VERSION_TRANSLATION_UNIT := base/version.generated.cc
1320

@@ -36,17 +43,16 @@ PROTO_FILES := $(wildcard */*.proto)
3643
PROTO_TRANSLATION_UNITS := $(PROTO_FILES:.proto=.pb.cc)
3744
PROTO_HEADERS := $(PROTO_FILES:.proto=.pb.h)
3845

39-
DEP_DIR := deps/
40-
41-
OBJ_DIRECTORY := obj/
46+
DEPS_DIRECTORY := deps/
4247

43-
BIN_DIRECTORY := bin/
48+
OBJ_DIRECTORY := obj/$(PRINCIPIA_TARGET)/
49+
BIN_DIRECTORY := bin/$(PRINCIPIA_TARGET)/
4450
TOOLS_BIN := $(BIN_DIRECTORY)tools
4551

4652
GMOCK_TRANSLATION_UNITS := \
47-
$(DEP_DIR)googletest/googlemock/src/gmock-all.cc \
48-
$(DEP_DIR)googletest/googletest/src/gtest-all.cc
49-
GMOCK_MAIN_TRANSLATION_UNIT := $(DEP_DIR)googletest/googlemock/src/gmock_main.cc
53+
$(DEPS_DIRECTORY)googletest/googlemock/src/gmock-all.cc \
54+
$(DEPS_DIRECTORY)googletest/googletest/src/gtest-all.cc
55+
GMOCK_MAIN_TRANSLATION_UNIT := $(DEPS_DIRECTORY)googletest/googlemock/src/gmock_main.cc
5056

5157
GENERATED_PROFILES := \
5258
journal/profiles.generated.h \
@@ -62,47 +68,54 @@ ADAPTER_CONFIGURATION := Release
6268
FINAL_PRODUCTS_DIR := Release/
6369
ADAPTER := $(ADAPTER_BUILD_DIR)$(ADAPTER_CONFIGURATION)/ksp_plugin_adapter.dll
6470

71+
# TODO(phl): Change the OS names once the loader is ready.
6572
ifeq ($(UNAME_S),Linux)
6673
PLUGIN_DIRECTORY := $(FINAL_PRODUCTS_DIR)GameData/Principia/Linux64/
6774
endif
6875
ifeq ($(UNAME_S),Darwin)
6976
PLUGIN_DIRECTORY := $(FINAL_PRODUCTS_DIR)GameData/Principia/MacOS64/
7077
endif
7178

72-
TEST_LIBS := $(DEP_DIR)benchmark/src/libbenchmark.a $(DEP_DIR)protobuf/src/.libs/libprotobuf.a
79+
TEST_LIBS := \
80+
$(DEPS_DIRECTORY)benchmark/src/libbenchmark.a \
81+
$(DEPS_DIRECTORY)protobuf/src/.libs/libprotobuf.a
7382
ABSL_LIBS := \
74-
$(DEP_DIR)abseil-cpp/absl/base/libabsl_*.a \
75-
$(DEP_DIR)abseil-cpp/absl/container/libabsl_*.a \
76-
$(DEP_DIR)abseil-cpp/absl/debugging/libabsl_*.a \
77-
$(DEP_DIR)abseil-cpp/absl/flags/libabsl_*.a \
78-
$(DEP_DIR)abseil-cpp/absl/hash/libabsl_*.a \
79-
$(DEP_DIR)abseil-cpp/absl/numeric/libabsl_*.a \
80-
$(DEP_DIR)abseil-cpp/absl/status/libabsl_*.a \
81-
$(DEP_DIR)abseil-cpp/absl/strings/libabsl_*.a \
82-
$(DEP_DIR)abseil-cpp/absl/synchronization/libabsl_synchronization.a \
83-
$(DEP_DIR)abseil-cpp/absl/time/libabsl_*.a
83+
$(DEPS_DIRECTORY)abseil-cpp/absl/base/libabsl_*.a \
84+
$(DEPS_DIRECTORY)abseil-cpp/absl/container/libabsl_*.a \
85+
$(DEPS_DIRECTORY)abseil-cpp/absl/debugging/libabsl_*.a \
86+
$(DEPS_DIRECTORY)abseil-cpp/absl/flags/libabsl_*.a \
87+
$(DEPS_DIRECTORY)abseil-cpp/absl/hash/libabsl_*.a \
88+
$(DEPS_DIRECTORY)abseil-cpp/absl/numeric/libabsl_*.a \
89+
$(DEPS_DIRECTORY)abseil-cpp/absl/status/libabsl_*.a \
90+
$(DEPS_DIRECTORY)abseil-cpp/absl/strings/libabsl_*.a \
91+
$(DEPS_DIRECTORY)abseil-cpp/absl/synchronization/libabsl_synchronization.a \
92+
$(DEPS_DIRECTORY)abseil-cpp/absl/time/libabsl_*.a
8493
ifeq ($(UNAME_S),Linux)
8594
ABSL_GROUP_LIBS = -Wl,--start-group $(ABSL_LIBS) -Wl,--end-group
8695
else
8796
ABSL_GROUP_LIBS = $(ABSL_LIBS)
8897
endif
89-
LIBS := $(DEP_DIR)protobuf/src/.libs/libprotobuf.a \
90-
$(DEP_DIR)gipfeli/libgipfeli.a \
98+
LIBS := $(DEPS_DIRECTORY)protobuf/src/.libs/libprotobuf.a \
99+
$(DEPS_DIRECTORY)gipfeli/libgipfeli.a \
91100
$(ABSL_GROUP_LIBS) \
92-
$(DEP_DIR)core-math/libcore-math.a \
93-
$(DEP_DIR)zfp/build/lib/libzfp.a \
94-
$(DEP_DIR)glog/.libs/libglog.a -lpthread -lc++abi
101+
$(DEPS_DIRECTORY)core-math/libcore-math.a \
102+
$(DEPS_DIRECTORY)zfp/build/lib/libzfp.a \
103+
$(DEPS_DIRECTORY)glog/.libs/libglog.a -lpthread -lc++abi
95104
TEST_INCLUDES := \
96-
-I$(DEP_DIR)googletest/googlemock/include -I$(DEP_DIR)googletest/googletest/include \
97-
-I$(DEP_DIR)googletest/googlemock/ -I$(DEP_DIR)googletest/googletest/ -I$(DEP_DIR)benchmark/include
98-
INCLUDES := -I. -I$(DEP_DIR)glog/src \
99-
-I$(DEP_DIR)protobuf/src \
100-
-I$(DEP_DIR)gipfeli/include \
101-
-I$(DEP_DIR)abseil-cpp \
102-
-I$(DEP_DIR)core-math/include \
103-
-I$(DEP_DIR)zfp/include \
104-
-I$(DEP_DIR)config/include \
105-
-I$(DEP_DIR)multiprecision/include
105+
-I$(DEPS_DIRECTORY)googletest/googlemock/include \
106+
-I$(DEPS_DIRECTORY)googletest/googletest/include \
107+
-I$(DEPS_DIRECTORY)googletest/googlemock/ \
108+
-I$(DEPS_DIRECTORY)googletest/googletest/ \
109+
-I$(DEPS_DIRECTORY)benchmark/include
110+
INCLUDES := -I. \
111+
-I$(DEPS_DIRECTORY)glog/src \
112+
-I$(DEPS_DIRECTORY)protobuf/src \
113+
-I$(DEPS_DIRECTORY)gipfeli/include \
114+
-I$(DEPS_DIRECTORY)abseil-cpp \
115+
-I$(DEPS_DIRECTORY)core-math/include \
116+
-I$(DEPS_DIRECTORY)zfp/include \
117+
-I$(DEPS_DIRECTORY)config/include \
118+
-I$(DEPS_DIRECTORY)multiprecision/include
106119
SHARED_ARGS := \
107120
-std=c++23 -stdlib=libc++ -O3 -g \
108121
--system-header-prefix=serialization/ \
@@ -133,27 +146,33 @@ SHARED_ARGS := \
133146
-Wno-mathematical-notation-identifier-extension \
134147
-Wno-nested-anon-types \
135148
-Wno-unknown-pragmas \
136-
-DPRINCIPIA_REQUIRES_AVX=0 \
137-
-DPRINCIPIA_REQUIRES_FMA=0 \
138149
-DPROJECT_DIR='std::filesystem::path("$(PROJECT_DIR)")' \
139150
-DSOLUTION_DIR='std::filesystem::path("$(SOLUTION_DIR)")' \
140151
-DTEMP_DIR='std::filesystem::path("/tmp")' \
141152
-DNDEBUG
142153

154+
ifeq ($(PRINCIPIA_TARGET),x64_AVX_FMA)
155+
SHARED_ARGS += \
156+
-DPRINCIPIA_REQUIRES_AVX=1 \
157+
-DPRINCIPIA_REQUIRES_FMA=1 \
158+
-mfma \
159+
-mavx
160+
else
161+
SHARED_ARGS += \
162+
-DPRINCIPIA_REQUIRES_AVX=0 \
163+
-DPRINCIPIA_REQUIRES_FMA=0
164+
endif
165+
143166
ifeq ($(UNAME_S),Linux)
144-
ifeq ($(UNAME_M),x86_64)
145-
SHARED_ARGS += -m64 -msse4.1
146-
else
147-
SHARED_ARGS += -m32
148-
endif
167+
SHARED_ARGS += -m64 -msse4.1
149168
LIBS += \
150169
-lsupc++ \
151170
-lc++
152171
TEST_LIBS += -lsupc++
153172
SHAREDFLAG := -shared
154173
endif
155174
ifeq ($(UNAME_S),Darwin)
156-
LLVM_PATH = $(shell brew --prefix llvm@20)
175+
LLVM_PATH = $(shell brew --prefix llvm@$(PRINCIPIA_CLANG_VERSION))
157176
INCLUDES += \
158177
-include "base/macos_allocator_replacement.hpp" \
159178
-I$(LLVM_PATH)/include
@@ -163,7 +182,7 @@ ifeq ($(UNAME_S),Darwin)
163182
-L$(LLVM_PATH)/lib/unwind \
164183
-lunwind
165184
SHARED_ARGS += \
166-
-mmacosx-version-min=$(OSX_DEPLOYMENT_TARGET) \
185+
-mmacosx-version-min=$(PRINCIPIA_MACOS_VERSION_MIN) \
167186
-arch x86_64 \
168187
-D_LIBCPP_STD_VER=20 \
169188
-D_LIBCPP_NO_EXCEPTIONS
@@ -220,7 +239,8 @@ $(VERSION_TRANSLATION_UNIT): .git
220239

221240
# We don't do dependency resolution on the protos; we compile them all at once.
222241
$(PROTO_HEADERS) $(PROTO_TRANSLATION_UNITS): $(PROTO_FILES)
223-
$(DEP_DIR)/protobuf/src/protoc -I $(DEP_DIR)/protobuf/src/ -I . $^ --cpp_out=.
242+
$(DEPS_DIRECTORY)/protobuf/src/protoc \
243+
-I $(DEPS_DIRECTORY)/protobuf/src/ -I . $^ --cpp_out=.
224244

225245
$(GENERATED_PROFILES) : $(TOOLS_BIN)
226246
$^ generate_profiles
@@ -303,8 +323,8 @@ $(KSP_PLUGIN) : $(PROTO_OBJECTS) $(PLUGIN_OBJECTS) $(JOURNAL_LIB_OBJECTS) $(BASE
303323

304324
TEST_BINS := $(addprefix $(BIN_DIRECTORY), $(TEST_TRANSLATION_UNITS:.cpp=))
305325
PACKAGE_TEST_BINS := $(addprefix $(BIN_DIRECTORY), $(addsuffix test, $(sort $(dir $(TEST_TRANSLATION_UNITS)))))
306-
PLUGIN_DEPENDENT_TEST_BINS := $(filter bin/ksp_plugin_test/% bin/journal/%, $(TEST_BINS))
307-
PLUGIN_DEPENDENT_PACKAGE_TEST_BINS := $(filter bin/ksp_plugin_test/% bin/journal/%, $(PACKAGE_TEST_BINS))
326+
PLUGIN_DEPENDENT_TEST_BINS := $(filter $(BIN_DIRECTORY)ksp_plugin_test/% $(BIN_DIRECTORY)journal/%, $(TEST_BINS))
327+
PLUGIN_DEPENDENT_PACKAGE_TEST_BINS := $(filter $(BIN_DIRECTORY)ksp_plugin_test/% $(BIN_DIRECTORY)journal/%, $(PACKAGE_TEST_BINS))
308328
PLUGIN_INDEPENDENT_TEST_BINS := $(filter-out $(PLUGIN_DEPENDENT_TEST_BINS), $(TEST_BINS))
309329
PLUGIN_INDEPENDENT_PACKAGE_TEST_BINS := $(filter-out $(PLUGIN_DEPENDENT_PACKAGE_TEST_BINS), $(PACKAGE_TEST_BINS))
310330
PRINCIPIA_TEST_BIN := $(BIN_DIRECTORY)test

astronomy/orbit_analysis_test.cpp

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -371,11 +371,13 @@ TEST_F(OrbitAnalysisTest, 北斗MEO) {
371371
EXPECT_THAT(elements.mean_inclination_interval().midpoint(),
372372
IsNear(55.10_(1) * Degree));
373373
EXPECT_THAT(elements.mean_eccentricity_interval().midpoint(),
374-
AnyOf(IsNear(0.000554_(1)), // Windows, Ubuntu.
374+
AnyOf(IsNear(0.000554_(1)), // Windows, Ubuntu SSE.
375+
IsNear(0.000553_(1)), // Ubuntu AVX.
375376
IsNear(0.000550_(1)))); // macOS.
376377
EXPECT_THAT(elements.mean_argument_of_periapsis_interval().midpoint(),
377378
AnyOf(IsNear(0.7875_(1) * Degree), // Windows.
378-
IsNear(1.349_(1) * Degree), // Ubuntu.
379+
IsNear(1.349_(1) * Degree), // Ubuntu SSE.
380+
IsNear(1.456_(1) * Degree), // Ubuntu AVX.
379381
IsNear(1.011_(1) * Degree))); // macOS.
380382
}
381383

@@ -412,19 +414,22 @@ TEST_F(OrbitAnalysisTest, GalileoNominalSlot) {
412414
AbsoluteErrorFrom(
413415
nominal_anomalistic_mean_motion,
414416
AnyOf(IsNear(0.46_(1) * Degree / Day), // Windows.
415-
IsNear(0.65_(1) * Degree / Day), // Ubuntu.
417+
IsNear(0.65_(1) * Degree / Day), // Ubuntu SSE.
418+
IsNear(0.39_(1) * Degree / Day), // Ubuntu AVX.
416419
IsNear(0.63_(1) * Degree / Day))), // macOS.
417420
RelativeErrorFrom(
418421
nominal_anomalistic_mean_motion,
419422
AnyOf(IsNear(0.00075_(1)), // Windows.
420-
IsNear(0.00106_(1)))))); // Ubuntu, macOS.
423+
IsNear(0.00064_(1)), // Ubuntu AVX.
424+
IsNear(0.00106_(1)))))); // Ubuntu SSE, macOS.
421425

422426
EXPECT_THAT(elements.mean_semimajor_axis_interval().midpoint(),
423427
AbsoluteErrorFrom(29'599.8 * Kilo(Metre),
424428
IsNear(0.33_(1) * Kilo(Metre))));
425429
EXPECT_THAT(elements.mean_semimajor_axis_interval().measure(),
426430
AnyOf(IsNear(0.087_(1) * Kilo(Metre)), // Windows.
427-
IsNear(0.089_(1) * Kilo(Metre)), // Ubuntu.
431+
IsNear(0.089_(1) * Kilo(Metre)), // Ubuntu SSE.
432+
IsNear(0.091_(1) * Kilo(Metre)), // Ubuntu AVX.
428433
IsNear(0.092_(1) * Kilo(Metre)))); // macOS.
429434

430435
// Nominal: 0.0.
@@ -433,7 +438,8 @@ TEST_F(OrbitAnalysisTest, GalileoNominalSlot) {
433438
IsNear(0.000'18_(1)))); // macOS.
434439
EXPECT_THAT(elements.mean_eccentricity_interval().measure(),
435440
AnyOf(IsNear(0.000'020_(1)), // Windows.
436-
IsNear(0.000'017_(1)))); // Ubuntu, macOS.
441+
IsNear(0.000'023_(1)), // Ubuntu AVX.
442+
IsNear(0.000'017_(1)))); // Ubuntu SSE, macOS.
437443

438444
EXPECT_THAT(elements.mean_inclination_interval().midpoint(),
439445
AbsoluteErrorFrom(56.0 * Degree, IsNear(0.61_(1) * Degree)));
@@ -454,7 +460,8 @@ TEST_F(OrbitAnalysisTest, GalileoNominalSlot) {
454460
IsNear(89_(1) * Degree))); // Ubuntu.
455461
EXPECT_THAT(elements.mean_argument_of_periapsis_interval().measure(),
456462
AnyOf(IsNear(5.2_(1) * Degree), // Windows.
457-
IsNear(7.5_(1) * Degree), // Ubuntu.
463+
IsNear(7.5_(1) * Degree), // Ubuntu SSE.
464+
IsNear(5.6_(1) * Degree), // Ubuntu AVX.
458465
IsNear(6.2_(1) * Degree))); // macOS.
459466

460467
// Since the reference parameters conventionally set ω = 0, the given mean
@@ -502,12 +509,13 @@ TEST_F(OrbitAnalysisTest, GalileoExtendedSlot) {
502509
RelativeErrorFrom(nominal_anomalistic_mean_motion,
503510
IsNear(1.7e-06_(1)))));
504511

505-
EXPECT_THAT(
506-
elements.mean_semimajor_axis_interval().midpoint(),
507-
AbsoluteErrorFrom(27'977.6 * Kilo(Metre),
508-
AnyOf(IsNear(0.0534_(1) * Kilo(Metre)), // Windows.
509-
IsNear(0.0490_(1) * Kilo(Metre)), // Ubuntu.
510-
IsNear(0.0503_(1) * Kilo(Metre))))); // macOS.
512+
EXPECT_THAT(elements.mean_semimajor_axis_interval().midpoint(),
513+
AbsoluteErrorFrom(
514+
27'977.6 * Kilo(Metre),
515+
AnyOf(IsNear(0.0534_(1) * Kilo(Metre)), // Windows.
516+
IsNear(0.0490_(1) * Kilo(Metre)), // Ubuntu SSE.
517+
IsNear(0.0586_(1) * Kilo(Metre)), // Ubuntu AVX.
518+
IsNear(0.0503_(1) * Kilo(Metre))))); // macOS.
511519
EXPECT_THAT(elements.mean_semimajor_axis_interval().measure(),
512520
AnyOf(IsNear(00'000.101_(1) * Kilo(Metre)), // Windows.
513521
IsNear(00'000.105_(1) * Kilo(Metre)), // Ubuntu.
@@ -607,7 +615,8 @@ TEST_F(OrbitAnalysisTest, TOPEXPoséidon) {
607615
elements.mean_semimajor_axis_interval().midpoint(),
608616
DifferenceFrom(7714.42938 * Kilo(Metre),
609617
AnyOf(IsNear(2.73_(1) * Metre), // Windows.
610-
IsNear(2.15_(1) * Metre), // Ubuntu.
618+
IsNear(2.15_(1) * Metre), // Ubuntu SSE.
619+
IsNear(2.27_(1) * Metre), // Ubuntu AVX.
611620
IsNear(1.91_(1) * Metre)))); // macOS.
612621
// Reference inclination from the legend of figure 9 of [BSFL98]; that
613622
// value is given as 66.040° in table 1 of [BSFL98], 66.039° in [BS96], and
@@ -644,11 +653,13 @@ TEST_F(OrbitAnalysisTest, TOPEXPoséidon) {
644653
EXPECT_THAT(elements.mean_argument_of_periapsis_interval(),
645654
AllOf(Field(&Interval<Angle>::min,
646655
AnyOf(IsNear(74.7_(1) * Degree), // Windows.
647-
IsNear(75.1_(1) * Degree), // Ubuntu.
656+
IsNear(75.1_(1) * Degree), // Ubuntu SSE.
657+
IsNear(74.5_(1) * Degree), // Ubuntu AVX.
648658
IsNear(75.0_(1) * Degree))), // macOS.
649659
Field(&Interval<Angle>::max,
650660
AnyOf(IsNear(99.0_(1) * Degree), // Windows.
651-
IsNear(98.9_(1) * Degree), // Ubuntu.
661+
IsNear(98.9_(1) * Degree), // Ubuntu SSE.
662+
IsNear(99.9_(1) * Degree), // Ubuntu AVX.
652663
IsNear(98.8_(1) * Degree))))); // macOS.
653664

654665
// Nominal longitude of the equatorial crossing of the first ascending pass
@@ -732,7 +743,8 @@ TEST_F(OrbitAnalysisTest, SPOT5) {
732743
IsNear(0.0012_(1)));
733744
EXPECT_THAT(elements.mean_argument_of_periapsis_interval().midpoint(),
734745
AnyOf(IsNear(89.58_(1) * Degree), // Windows.
735-
IsNear(89.41_(1) * Degree), // Ubuntu.
746+
IsNear(89.41_(1) * Degree), // Ubuntu SSE.
747+
IsNear(89.35_(1) * Degree), // Ubuntu AVX.
736748
IsNear(89.64_(1) * Degree))); // macOS.
737749

738750
// The nominal mean solar times of the nodes are 22:30 ascending, 10:30

physics/ephemeris_test.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -459,8 +459,9 @@ TEST_P(EphemerisTest, EarthProbe) {
459459
// on the use of FMA in polynomial evaluation.
460460
EXPECT_THAT(probe_positions.size(),
461461
AnyOf(Eq(358), // MSVC no FMA/0
462-
Eq(366), // Clang Linux
462+
Eq(366), // Ubuntu SSE.
463463
Eq(373), // MSVC all FMA/0
464+
Eq(379), // Ubuntu AVX.
464465
Eq(406), // MSVC all FMA/1
465466
Eq(420), // MSVC FMA in libm only/1
466467
Eq(421))); // MSVC no FMA/1

principia_make.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ elif [ "${AGENT_OS?}" == "Linux" ]; then
2121
export LD_LIBRARY_PATH="./deps/protobuf/src/.libs:$LD_LIBRARY_PATH"
2222
fi
2323

24-
make test
24+
make -j 4 test
2525
if [ "${AGENT_OS?}" == "Darwin" ]; then
2626
# See https://github.com/actions/virtual-environments/issues/2619#issuecomment-788397841
2727
# for why this is needed.

0 commit comments

Comments
 (0)