Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tuning suite #2006

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 53 additions & 25 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
FLINT_DIR:=.
SRC_DIR:=src
BUILD_DIR:=build
TUNE_DIR:=src/tune
ABS_FLINT_DIR:='$(patsubst %/,%, $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))'
ABS_SRC_DIR:=$(ABS_FLINT_DIR)/$(SRC_DIR)
ABS_BUILD_DIR:=$(ABS_FLINT_DIR)/$(SRC_DIR)
ABS_TUNE_DIR:=$(ABS_FLINT_DIR)/$(TUNE_DIR)

FLINT_VERSION:=@FLINT_VERSION_FULL@
FLINT_MAJOR_SO:=@FLINT_MAJOR_SO@
Expand Down Expand Up @@ -81,6 +83,7 @@ CFLAGS:=@CFLAGS@
TESTCFLAGS:=@TESTCFLAGS@
CPPFLAGS:=@CPPFLAGS@ -DBUILDING_FLINT
CPPFLAGS2:=-L$(FLINT_DIR) $(CPPFLAGS)
CPPFLAGS3:=-I$(TUNE_DIR) $(CPPFLAGS2)
LIB_CPPFLAGS:=@LIB_CPPFLAGS@
CXXFLAGS:=@CXXFLAGS@
LIBS:=@LIBS@
Expand Down Expand Up @@ -218,8 +221,15 @@ TEMPLATE_DIRS := \
fq_poly_templates fq_poly_factor_templates \
fq_embed_templates fq_templates

_TUNE_DIRS := \
ulong_extras

TUNE_DIRS := $(patsubst %, $(TUNE_DIR)/%, $(_TUNE_DIRS))
TUNE_BUILD_DIRS := $(patsubst %, $(BUILD_DIR)/tune/%, $(_TUNE_DIRS))

BUILD_DIRS := \
$(BUILD_DIR) \
$(TUNE_BUILD_DIRS) \
$(patsubst %, $(BUILD_DIR)/%, $(DIRS)) \
$(patsubst %, $(BUILD_DIR)/%/profile, $(DIRS)) \
$(patsubst %, $(BUILD_DIR)/%/test, $(DIRS)) \
Expand Down Expand Up @@ -299,12 +309,12 @@ ifneq ($(WANT_NTL), 0)
interfaces_TEST_SOURCES := $(SRC_DIR)/interfaces/test/t-NTL-interface.cpp
endif

define xxx_TUNE_SOURCES
$(1)_TUNE_SOURCES := $(wildcard $(SRC_DIR)/$(1)/tune/*.c)
define xxx_OLD_TUNE_SOURCES
$(1)_OLD_TUNE_SOURCES := $(wildcard $(SRC_DIR)/$(1)/tune/*.c)
endef
_TUNE_SOURCES := $(wildcard $(SRC_DIR)/tune/*.c)
$(foreach dir, $(DIRS), $(eval $(call xxx_TUNE_SOURCES,$(dir))))
TUNE_SOURCES := $(foreach dir,$(DIRS),$($(dir)_TUNE_SOURCES)) $(_TUNE_SOURCES)
_OLD_TUNE_SOURCES := $(wildcard $(SRC_DIR)/tune/*.c)
$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNE_SOURCES,$(dir))))
OLD_TUNE_SOURCES := $(foreach dir,$(DIRS),$($(dir)_OLD_TUNE_SOURCES)) $(_OLD_TUNE_SOURCES)

EXMP_SOURCES := $(wildcard $(FLINT_DIR)/examples/*.c)

Expand Down Expand Up @@ -359,12 +369,12 @@ interfaces_TESTS := $(BUILD_DIR)/interfaces/test/t-NTL-interface$(EXEEXT)
endif
TESTS := $(_TESTS) $(foreach dir,$(DIRS),$($(dir)_TESTS)) $(interfaces_TESTS)

define xxx_TUNES
$(1)_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$($(1)_TUNE_SOURCES))
define xxx_OLD_TUNES
$(1)_OLD_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$($(1)_OLD_TUNE_SOURCES))
endef
_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(_TUNE_SOURCES))
$(foreach dir, $(DIRS), $(eval $(call xxx_TUNES,$(dir))))
TUNES := $(foreach dir,$(DIRS),$($(dir)_TUNES)) $(_TUNES)
_OLD_TUNES := $(patsubst $(SRC_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(_OLD_TUNE_SOURCES))
$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNES,$(dir))))
OLD_TUNES := $(foreach dir,$(DIRS),$($(dir)_OLD_TUNES)) $(_OLD_TUNES)

EXMPS := $(patsubst $(FLINT_DIR)/%.c,$(BUILD_DIR)/%$(EXEEXT),$(EXMP_SOURCES))

Expand Down Expand Up @@ -675,30 +685,20 @@ endif
endif

ifeq ($(SHARED), 0)
$(BUILD_DIR)/tune/%$(EXEEXT): $(SRC_DIR)/tune/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/tune
@echo " CC $(<:$(SRC_DIR)/%=%)"
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
else
$(BUILD_DIR)/tune/%$(EXEEXT): $(SRC_DIR)/tune/%.c | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)/tune
@echo " CC $(<:$(SRC_DIR)/%=%)"
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
endif

ifeq ($(SHARED), 0)
define xxx_TUNES_rule
define xxx_OLD_TUNES_rule
$(BUILD_DIR)/$(1)/tune/%$(EXEEXT): $(SRC_DIR)/$(1)/tune/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/$(1)/tune
@echo " CC $$(<:$(SRC_DIR)/%=%)"
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $$< -o $$@ $(EXE_LDFLAGS) $(LIBS2) $$(DEPFLAGS)
endef
else
define xxx_TUNES_rule
define xxx_OLD_TUNES_rule
$(BUILD_DIR)/$(1)/tune/%$(EXEEXT): $(SRC_DIR)/$(1)/tune/%.c | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)/$(1)/tune
@echo " CC $$(<:$(SRC_DIR)/%=%)"
@$(CC) $(TESTCFLAGS) $(CPPFLAGS2) $$< -o $$@ $(EXE_LDFLAGS) $(LIBS2) $$(DEPFLAGS)
endef
endif

$(foreach dir, $(DIRS), $(eval $(call xxx_TUNES_rule,$(dir))))
$(foreach dir, $(DIRS), $(eval $(call xxx_OLD_TUNES_rule,$(dir))))

ifeq ($(SHARED), 0)
$(BUILD_DIR)/examples/%$(EXEEXT): $(FLINT_DIR)/examples/%.c $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)/examples $(BUILD_DIR)/include
Expand Down Expand Up @@ -834,7 +834,35 @@ endif
# tuning
################################################################################

tune: library $(TUNES)
# Only old tunes, i.e. under src/MOD/tune/
old_tune: library $(OLD_TUNES)

TUNE_SOURCE:=$(TUNE_DIR)/tune.c
TUNE_DEPS_SOURCES:=$(foreach dir,$(TUNE_DIRS),$(wildcard $(dir)/*.c))

_TUNE_HEADERS:=tune.h clock.h
TUNE_HEADERS:=$(patsubst %,$(TUNE_DIR)/%,$(_TUNE_HEADERS))

TUNE_DEPS_OBJS:=$(patsubst $(TUNE_DIR)/%.c,$(BUILD_DIR)/tune/%.o,$(TUNE_DEPS_SOURCES))

TUNE_EXE:=$(BUILD_DIR)/tuneup$(EXEEXT)

# Sloppy to say that all TUNE_BUILD_DIRS have to be built.
$(BUILD_DIR)/tune/%.o: $(TUNE_DIR)/%.c $(TUNE_HEADERS) | $(TUNE_BUILD_DIRS)
@echo " CC $(<:$(SRC_DIR)/%=%)"
@$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) -c $< -o $@ $(DEPFLAGS)

ifeq ($(SHARED), 0)
$(TUNE_EXE): $(TUNE_SOURCE) $(TUNE_DEPS_OBJS) $(TUNE_HEADERS) $(FLINT_DIR)/$(FLINT_LIB_STATIC) | $(BUILD_DIR)
@echo " CC $(<:$(SRC_DIR)/%=%)"
@$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) $(TUNE_DEPS_OBJS) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
else
$(TUNE_EXE): $(TUNE_SOURCE) $(TUNE_DEPS_OBJS) $(TUNE_HEADERS) | $(FLINT_DIR)/$(FLINT_LIB_FULL) $(BUILD_DIR)
@echo " CC $(<:$(SRC_DIR)/%=%)"
@$(CC) $(CFLAGS) $(CPPFLAGS3) $(LIB_CPPFLAGS) $(TUNE_DEPS_OBJS) $< -o $@ $(EXE_LDFLAGS) $(LIBS2) $(DEPFLAGS)
endif

tune: $(TUNE_EXE)

################################################################################
# valgrind
Expand Down Expand Up @@ -988,5 +1016,5 @@ dist:
print-%:
@echo "$*=$($*)"

.PHONY: all library shared static examples checkexamples profile tests check tune valgrind clean distclean install uninstall dist %_TEST_RUN %_TEST_RUN_% %_TEST_DGB_RUN_ARGS %_VALGRIND_RUN print-% coverage coverage_html debug
.PHONY: all library shared static examples checkexamples profile tests check tune old_tune valgrind clean distclean install uninstall dist %_TEST_RUN %_TEST_RUN_% %_TEST_DGB_RUN_ARGS %_VALGRIND_RUN print-% coverage coverage_html debug
.PRECIOUS: $(mpn_extras_PIC_S_SOURCES) $(mpn_extras_S_SOURCES)
2 changes: 2 additions & 0 deletions src/limb_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
extern "C" {
#endif

typedef struct { ulong m0, m1; } nn_pair_t;

#define FLINT_MAX_FACTORS_IN_LIMB 15

typedef struct
Expand Down
13 changes: 2 additions & 11 deletions src/mpn_extras.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#endif

#include <gmp.h>
#include "flint-mparam.h"
#include "longlong.h"

#ifdef __cplusplus
Expand Down Expand Up @@ -594,13 +595,7 @@ FLINT_DLL extern const flint_mpn_sqrhigh_normalised_func_t flint_mpn_sqrhigh_nor

#endif

/* FIXME: this tuning is for x86_64_adx with fft_small */
/* NOTE: we assume that the same cutoff is optimal for both mulhigh and mullow */
#define FLINT_MPN_MULHIGH_MULDERS_CUTOFF 50
#define FLINT_MPN_MULHIGH_MUL_CUTOFF 2000
#define FLINT_MPN_MULHIGH_K_TAB_SIZE 2048

FLINT_DLL extern const signed short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE];
FLINT_DLL extern const short flint_mpn_mulhigh_k_tab[];

mp_limb_t flint_mpn_mullow_basecase(mp_ptr res, mp_srcptr u, mp_srcptr v, mp_size_t n);
void _flint_mpn_mullow_n_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n);
Expand Down Expand Up @@ -667,10 +662,6 @@ void flint_mpn_mul_or_mulhigh_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t
flint_mpn_mul_n(rp, xp, yp, n);
}

#define FLINT_MPN_SQRHIGH_MULDERS_CUTOFF 90
#define FLINT_MPN_SQRHIGH_SQR_CUTOFF 2000
#define FLINT_MPN_SQRHIGH_K_TAB_SIZE 2048

#if FLINT_HAVE_ASSEMBLY_x86_64_adx
mp_limb_t _flint_mpn_sqrhigh_basecase_even(mp_ptr, mp_srcptr, mp_size_t);
mp_limb_t _flint_mpn_sqrhigh_basecase_odd(mp_ptr, mp_srcptr, mp_size_t);
Expand Down
Loading
Loading