Skip to content

Commit 83de090

Browse files
committed
Start on tuning flint_mpn_mulhigh_n
1 parent b690596 commit 83de090

File tree

4 files changed

+138
-1
lines changed

4 files changed

+138
-1
lines changed

src/tune/mpn_extras/mulhigh_0.c

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
Copyright (C) 2024 Albin Ahlbäck
3+
4+
This file is part of FLINT.
5+
6+
FLINT is free software: you can redistribute it and/or modify it under
7+
the terms of the GNU Lesser General Public License (LGPL) as published
8+
by the Free Software Foundation; either version 3 of the License, or
9+
(at your option) any later version. See <https://www.gnu.org/licenses/>.
10+
*/
11+
12+
#include "flint-mparam.h"
13+
#include "mpn_extras.h"
14+
#include "tune.h"
15+
16+
#undef FLINT_MPN_MULHIGH_K_TAB_SIZE
17+
#undef FLINT_MPN_MULHIGH_K_TAB
18+
#define TUNE_PROGRAM 1
19+
20+
#define FLINT_MPN_MULHIGH_K_TAB_SIZE FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE
21+
#define flint_mpn_mulhigh_k_tab flint_mpn_mulhigh_k_tab_0
22+
#define _flint_mpn_mulhigh_n_mulders_recursive _flint_mpn_mulhigh_n_mulders_recursive_0
23+
#define _flint_mpn_mulhigh_n_mulders _flint_mpn_mulhigh_n_mulders_0
24+
25+
#include "mpn_extras/mulhigh.c"

src/tune/mpn_extras/tune_mulhigh_n.c

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
/*
2+
Copyright (C) 2024 Fredrik Johansson
3+
Copyright (C) 2024 Albin Ahlbäck
4+
5+
This file is part of FLINT.
6+
7+
FLINT is free software: you can redistribute it and/or modify it under
8+
the terms of the GNU Lesser General Public License (LGPL) as published
9+
by the Free Software Foundation; either version 3 of the License, or
10+
(at your option) any later version. See <https://www.gnu.org/licenses/>.
11+
*/
12+
13+
#include "mpn_extras.h"
14+
#include "tune.h"
15+
16+
#undef FLINT_MPN_MULHIGH_K_TAB_SIZE
17+
#define FLINT_MPN_MULHIGH_K_TAB_SIZE FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE
18+
19+
#define flint_mpn_mulhigh_k_tab flint_mpn_mulhigh_k_tab_0
20+
#define _flint_mpn_mulhigh_n_mulders _flint_mpn_mulhigh_n_mulders_0
21+
22+
FLINT_DLL extern short flint_mpn_mulhigh_k_tab[FLINT_MPN_MULHIGH_K_TAB_SIZE];
23+
mp_limb_t _flint_mpn_mulhigh_n_mulders(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
24+
25+
/* IDEA:
26+
27+
1. Set all entries in flint_mpn_mulhigh_k_tab to zeroes.
28+
2. Skip any further altering of those entries having a corresponding
29+
hardcoded version.
30+
3. When reasonable, check if basecase is faster.
31+
4. The sequence of k should be weakly increasing. We only need to consider k
32+
between n / 2 and n.
33+
TODO: How can we prune the search with this information?
34+
5. Run some warmups and run some hotlaps for each k. Only use the fastest
35+
consistent time (see tune.c).
36+
6. Use the k that corresponds to the fastest time and push that to
37+
flint_mpn_mulhigh_k_tab so that it can be used in consecutive runs.
38+
7. For large enough n, check if _flint_mpn_mulhigh_n_mul is faster. When we
39+
have 50 consecutive runs of full multiplication that are faster high
40+
multiplication, we exit.
41+
*/
42+
43+
#define BASECASE_REASONABLE(n) ((n) < 200)
44+
45+
double measure_func(tune_func_t, void *, int, int);
46+
47+
#if 0
48+
void _tune_flint_mpn_mulhigh_n(void * vparam)
49+
{
50+
struct mulhigh__param_0 * param = vparam;
51+
nn_ptr ap, bp, xp, yp;
52+
slong len;
53+
flint_time_t t0, t1;
54+
slong ix;
55+
56+
ap = param->ap;
57+
bp = param->bp;
58+
xp = param->xp;
59+
yp = param->yp;
60+
len = param->len;
61+
62+
flint_time_get(t0);
63+
for (ix = 0; ix < len; ix++)
64+
func(ap, bp, xp[ix], yp[ix]);
65+
flint_time_get(t1);
66+
67+
return flint_time_nsec_diff(t1, t0);
68+
}
69+
70+
void
71+
tune_flint_mpn_mulhigh_n(int FLINT_UNUSED(warmups), int FLINT_UNUSED(min_runs))
72+
{
73+
slong n;
74+
mp_ptr rp, xp, yp;
75+
flint_rand_t state;
76+
77+
/* Initialize flint_mpn_mulhigh_k_tab */
78+
for (n = 0; n < FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE; n++)
79+
flint_mpn_mulhigh_k_tab[n] = 0;
80+
81+
rp = flint_malloc(2 * sizeof(mp_limb_t) * FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE);
82+
xp = flint_malloc(sizeof(mp_limb_t) * FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE);
83+
yp = flint_malloc(sizeof(mp_limb_t) * FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE);
84+
85+
flint_rand_init(state);
86+
flint_mpn_rrandom(xp, state, FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE);
87+
flint_mpn_rrandom(yp, state, FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE);
88+
flint_rand_clear(state);
89+
90+
for (n = 1; n < FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE; n++)
91+
{
92+
if (FLINT_HAVE_MULHIGH_FUNC(n))
93+
continue;
94+
95+
if (BASECASE_REASONABLE(n))
96+
{
97+
98+
}
99+
else
100+
{
101+
}
102+
}
103+
104+
flint_free(rp);
105+
flint_free(xp);
106+
flint_free(yp);
107+
}
108+
#endif

src/tune/tune.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ int compare_doubles(const void * ap, const void * bp)
5252
return (a < b) ? -1 : (a > b) ? 1 : 0;
5353
}
5454

55-
static double measure_func(tune_func_t fun, void * params, int runs, int warmups)
55+
double measure_func(tune_func_t fun, void * params, int runs, int warmups)
5656
{
5757
double * times = malloc(sizeof(double) * runs);
5858
int ix;

src/tune/tune.h

+4
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,10 @@ void n_param_clear(void *);
4242
double _tune_n_xgcd_0(void *);
4343
double _tune_n_xgcd_1(void *);
4444

45+
/* mpn_extras ****************************************************************/
46+
47+
#define FLINT_MPN_MULHIGH_K_TAB_MAX_SIZE 4096
48+
4549
/* n_mod_vec *****************************************************************/
4650
#if WANT_N_MOD
4751
struct n_mod_vec_param_0

0 commit comments

Comments
 (0)