Skip to content

AVX2 core for OGR-NG #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
4 changes: 2 additions & 2 deletions common/cliident.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ int CliIsDevelVersion(void)

const char *CliGetFullVersionDescriptor(void)
{
static char buffer[10+32+sizeof("v"CLIENT_VERSIONSTRING"-XXX-99071523-*dev* client for "CLIENT_OS_NAME_EXTENDED)];
static char buffer[10+32+sizeof("v" CLIENT_VERSIONSTRING "-XXX-99071523-*dev* client for " CLIENT_OS_NAME_EXTENDED)];
struct timeval tv; tv.tv_usec = 0;
tv.tv_sec = CliGetNewestModuleTime();
sprintf( buffer, "%s v" CLIENT_VERSIONSTRING "-"
Expand All @@ -559,7 +559,7 @@ const char *CliGetFullVersionDescriptor(void)
"%c" /* limited release or dev branch or public release */
"-%s" /* date is in bugzilla format yymmddhh */
"%s" /* "-*dev*" or "" */
" for "CLIENT_OS_NAME_EXTENDED,
" for " CLIENT_OS_NAME_EXTENDED,
utilGetAppName(),
((ConIsGUI())?('G'):('C')),
((CliIsDevelVersion())?('L'):('R')),
Expand Down
14 changes: 7 additions & 7 deletions common/confopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ struct optionstruct conf_options[CONF_OPTION_COUNT] = {
"\n"
"It is possible to have the client rotate through this list, updating its\n"
"buffers only once for each pass. To do so, 'Dialup-link detection'\n"
"and '"ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME"' must be disabled since a buffer\n"
"and '" ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME "' must be disabled since a buffer\n"
"update (new work being made available) would otherwise cause the client\n"
"to go back to the beginning of the load order.\n"
/*) */,CONF_MENU_BUFF,CONF_TYPE_ASCIIZ,NULL,NULL,0,0,NULL,NULL
Expand Down Expand Up @@ -439,13 +439,13 @@ struct optionstruct conf_options[CONF_OPTION_COUNT] = {
CONF_FREQUENT_FREQUENCY , /* CONF_MENU_BUFF */
CFGTXT("Buffer-level check interval"), "0:00 (on buffer change)",
/*CFGTXT(*/
"This option determines how often '"ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME"'\n"
"This option determines how often '" ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME "'\n"
"should be performed. (More precisely: how much time must elapse between\n"
"buffer-level checks)\n"
"\n"
"This setting is meaningful only if one of the extensions to normal threshold\n"
"management is enabled: either implicitly when 'Dialup detection options' are\n"
"active or explicitly with '"ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME"'.\n"
"active or explicitly with '" ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME "'.\n"
"\n"
"The interval specified here is in hours and minutes, and the default denotes\n"
"that the client should check buffer-levels whenever it detects a change (by\n"
Expand All @@ -456,13 +456,13 @@ struct optionstruct conf_options[CONF_OPTION_COUNT] = {
CONF_FREQUENT_RETRY_FREQUENCY , /* CONF_MENU_BUFF */
CFGTXT("Buffer-level check retry interval"), "0:00 (no delay)",
/*CFGTXT(*/
"This option determines how often '"ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME"'\n"
"This option determines how often '" ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME "'\n"
"should be retried after failure. (More precisely: how much time must elapse\n"
"between buffer-level check retries)\n"
"\n"
"This setting is meaningful only if one of the extensions to normal threshold\n"
"management is enabled: either implicitly when 'Dialup detection options' are\n"
"active or explicitly with '"ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME"'.\n"
"active or explicitly with '" ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME "'.\n"
"\n"
"The interval specified here is in hours and minutes, and the default denotes\n"
"that the client should retry the buffer-level checks at most twice per minute\n"
Expand Down Expand Up @@ -503,7 +503,7 @@ struct optionstruct conf_options[CONF_OPTION_COUNT] = {
"should be used instead. If that too is unspecified, then the client will\n"
"use defaults.\n"
"\n"
"* See also: '"ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME"'\n"
"* See also: '" ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME "'\n"
,CONF_MENU_BUFF,CONF_TYPE_IARRAY,NULL,NULL,1,0xffff,NULL,NULL
},
{
Expand All @@ -523,7 +523,7 @@ struct optionstruct conf_options[CONF_OPTION_COUNT] = {
"unprocessed packet cannot be predicted.\n"
#endif
"\n"
"* See also: '"ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME"'\n"
"* See also: '" ADDITIONAL_BUFFLEVEL_CHECK_OPTION_NAME "'\n"
,CONF_MENU_BUFF,CONF_TYPE_IARRAY,NULL,NULL,0,(14*24),NULL,NULL
},

Expand Down
12 changes: 11 additions & 1 deletion common/core_ogr_ng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ return "@(#)$Id: core_ogr_ng.cpp,v 1.47 2015/06/27 21:43:52 zebe Exp $"; }
CoreDispatchTable *ogrng64_get_dispatch_table_cj1_generic(void);
CoreDispatchTable *ogrng64_get_dispatch_table_cj1_sse2(void);
CoreDispatchTable *ogrng64_get_dispatch_table_cj1_sse2_lzcnt(void);
CoreDispatchTable *ogrng64_get_dispatch_table_cj1_avx2(void);
#elif (CLIENT_CPU == CPU_SPARC) && (SIZEOF_LONG == 8)
CoreDispatchTable *ogrng64_get_dispatch_table(void);
#elif (CLIENT_CPU == CPU_S390X) && (SIZEOF_LONG == 8)
Expand Down Expand Up @@ -166,6 +167,7 @@ int InitializeCoreTable_ogr_ng(int first_time)
ogrng64_get_dispatch_table_cj1_generic();
ogrng64_get_dispatch_table_cj1_sse2();
ogrng64_get_dispatch_table_cj1_sse2_lzcnt();
ogrng64_get_dispatch_table_cj1_avx2();
#elif (CLIENT_CPU == CPU_S390)
ogrng_get_dispatch_table();
#elif (CLIENT_CPU == CPU_S390X)
Expand Down Expand Up @@ -225,6 +227,7 @@ const char **corenames_for_contest_ogr_ng()
"cj-asm-generic",
"cj-asm-sse2",
"cj-asm-sse2-lzcnt",
"cj-asm-avx2",
#elif (CLIENT_CPU == CPU_ARM)
"FLEGE 2.0",
"FLEGE 2.0 ARMv3",
Expand Down Expand Up @@ -356,6 +359,8 @@ int apply_selcore_substitution_rules_ogr_ng(int cindex)
# endif
# elif (CLIENT_CPU == CPU_AMD64)
unsigned feature = GetProcessorFeatureFlags();
if (cindex == 4 && !(feature & CPU_F_AVX2)) /* Core 4 needs AVX2 */
cindex = 2; /* If no AVX2, try SSE2 */
if (cindex == 3 && !(feature & CPU_F_LZCNT)) /* Core 3 needs LZCNT */
cindex = 2; /* If no LZCNT, try SSE2 */
if (cindex == 2 && !(feature & CPU_F_SSE2)) /* Core 2 needs SSE2 */
Expand Down Expand Up @@ -504,8 +509,11 @@ int selcoreGetPreselectedCoreForProject_ogr_ng()
}
if (cindex == -1)
{
/* Assume that if AVX2 is availble it is the best choice */
if (detected_flags & CPU_F_AVX2)
cindex = 4;
/* Assume that LZCNT+SSE2 is better then plain SSE2 everywhere */
if (detected_flags & CPU_F_LZCNT)
else if (detected_flags & CPU_F_LZCNT)
cindex = 3;
else if (detected_flags & CPU_F_SSE2)
cindex = 2; /* sse2 core */
Expand Down Expand Up @@ -638,6 +646,8 @@ int selcoreSelectCore_ogr_ng(Client *client, unsigned int threadindex,
unit_func.ogr = ogrng64_get_dispatch_table_cj1_sse2();
else if (coresel == 3)
unit_func.ogr = ogrng64_get_dispatch_table_cj1_sse2_lzcnt();
else if (coresel == 4)
unit_func.ogr = ogrng64_get_dispatch_table_cj1_avx2();
else
{
unit_func.ogr = ogrng64_get_dispatch_table();
Expand Down
4 changes: 3 additions & 1 deletion common/cpucheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,8 @@ long __GetRawProcessorID(const char **cpuname, int whattoret = 0 )
* 0x19 - Sandy Bridge Core iX-2xxx
* 0x1A - Ivy Bridge Core iX-3xxx
* 0x1B - Haswell Core iX-4xxx
* 0x1C-1F
* 0x1C - Kaby Lake Core iX-7xxx
* 0x1D-1F
* 0x20 - AMD Bobcat - Embedded APU
* 0x21 - AMD Bulldozer - FX
* 0x22 - AMD Husky - APU
Expand Down Expand Up @@ -1488,6 +1489,7 @@ long __GetRawProcessorID(const char **cpuname, int whattoret = 0 )
{ 0x0006450, 0xFFFFFF0, CPU_F_I686, 0x1B, "Core iX-4xxx (Haswell)" }, /* (#4579) */
{ 0x0006460, 0xFFFFFF0, CPU_F_I686, 0x1B, "Core iX-4xxx (Haswell)" },
{ 0x00065E0, 0xFFFFFF0, CPU_F_I686, 0x1B, "Core iX-6xxx (Skylake)" }, /* (#4615) */
{ 0x00069E0, 0xFFFFFF0, CPU_F_I686, 0x1C, "Core iX-7xxx (Kaby Lake)" },
{ 0x0000000, 0, 0, 0, NULL }
}; internalxref = &intelxref[0];
}
Expand Down
4 changes: 2 additions & 2 deletions common/cpucheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
#define CPU_F_SSE (0x00002000L)
#define CPU_F_SSE2 (0x00004000L)
#define CPU_F_SSE3 (0x00008000L)
#define CPU_F_HYPERTHREAD (0x00010000L) /* supported and enabled */
#define CPU_F_HYPERTHREAD (0x00010000L) /* supported and enabled */
#define CPU_F_AMD64 (0x00020000L)
#define CPU_F_EM64T (0x00040000L)
#define CPU_F_SSE4_1 (0x00080000L)
#define CPU_F_SSE4_2 (0x00100000L)
#define CPU_F_SSSE3 (0x00200000L)
#define CPU_F_LZCNT (0x00400000L)
#define CPU_F_LZCNT (0x00400000L)
#define CPU_F_AVX_DISABLED (0x00800000L) /* supported but disabled (no OS support) */
#define CPU_F_AVX (0x01000000L) /* supported and enabled */
#define CPU_F_AVX2 (0x02000000L) /* supported and enabled */
Expand Down
4 changes: 2 additions & 2 deletions common/mail.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,8 +564,8 @@ static int smtp_send_message_header( void * net,
if (errcode == 0) //send the date
{
sprintf( buffer, "\r\nDate: %s"
"\r\nX-Mailer: distributed.net v"CLIENT_VERSIONSTRING
" client for "CLIENT_OS_NAME_EXTENDED, rfc822Date( buffer + 256 ) );
"\r\nX-Mailer: distributed.net v" CLIENT_VERSIONSTRING
" client for " CLIENT_OS_NAME_EXTENDED, rfc822Date( buffer + 256 ) );
if ( put_smtp_line( net, buffer, strlen( buffer ) ) )
errcode = -1;
}
Expand Down
6 changes: 3 additions & 3 deletions common/problem.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@
#define CORE_MEM_ALIGNMENT 4
#endif
#else
// For x86, alignment must be 8 for MMX core and 16 for SSE.
#if CORE_MEM_ALIGNMENT < 4
// For x86, alignment must be 8 for MMX core, 16 for SSE and 32 for AVX2.
#if CORE_MEM_ALIGNMENT < 5
#undef CORE_MEM_ALIGNMENT
#define CORE_MEM_ALIGNMENT 4
#define CORE_MEM_ALIGNMENT 5
#endif
#endif
#endif
Expand Down
2 changes: 1 addition & 1 deletion common/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void trace_setsrc( const char *filename )
void trace_out( int indlevel, const char *format, ... )
{
static int indentlevel = -1; /* uninitialized */
const char *tracefile = "trace"EXTN_SEP"out";
const char *tracefile = "trace" EXTN_SEP "out";
int old_errno = errno;
FILE *file;
va_list arglist;
Expand Down
2 changes: 2 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -659,9 +659,11 @@ add_sources() # $1=os, $2=arch, $3=custom
TARGET_ADDSRCS="$TARGET_ADDSRCS $OGR/amd64/ogrng64-cj1-generic.cpp"
TARGET_ADDSRCS="$TARGET_ADDSRCS $OGR/amd64/ogrng64-cj1-sse2.cpp"
TARGET_ADDSRCS="$TARGET_ADDSRCS $OGR/amd64/ogrng64-cj1-sse2-lzcnt.cpp"
TARGET_ADDSRCS="$TARGET_ADDSRCS $OGR/amd64/ogrng64-cj1-avx2.cpp"
TARGET_ADDNASMS="$TARGET_ADDNASMS $OGR/amd64/ogrng64-cj1-generic-asm.asm"
TARGET_ADDNASMS="$TARGET_ADDNASMS $OGR/amd64/ogrng64-cj1-sse2-asm.asm"
TARGET_ADDNASMS="$TARGET_ADDNASMS $OGR/amd64/ogrng64-cj1-sse2-lzcnt-asm.asm"
TARGET_ADDNASMS="$TARGET_ADDNASMS $OGR/amd64/ogrng64-cj1-avx2-asm.asm"
fi

if [ "$HAVE_OGR_P2" = "1" ]; then
Expand Down
10 changes: 9 additions & 1 deletion makefile.vc
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ ZIPEXTRAS = \

OPTS_MSVC = -nologo -D__WIN32__ \
-W4 -GR- -GA -GF -Gy \
-Dsnprintf=_snprintf -DHAVE_SNPRINTF \
-DHAVE_SNPRINTF \
-D_M_$(OPTS_M_PLAT) $(OPTS_CC_CPU) $(OPTS_CC_DEBUG)
## *** +++++++++++++++++++++++++++++++++++++++++
OPTS_LIBS = advapi32.lib user32.lib kernel32.lib gdi32.lib
Expand All @@ -157,6 +157,12 @@ OPTS_RC = -d_Windows -d_M_$(OPTS_M_PLAT)
# cl 16.00.xxxx = Visual Studio 2010 (VC10)
# cl 17.00.xxxx = Visual Studio 2012 (VC11)
# cl 18.00.xxxx = Visual Studio 2013 (VC12)
# cl 19.00.xxxx = Visual Studio 2015 (VC14)

# snprintf needs to be defined for Visual Studio 2015 and earlier
!if ( [plat\win\msvcver.cmd] < 19 )
OPTS_MSVC = $(OPTS_MSVC) -Dsnprintf=_snprintf
!endif

!if ( [plat\win\msvcver.cmd] >= 15 )
OPTS_MSVC = $(OPTS_MSVC) -EHs-c- -GS- -wd4996
Expand Down Expand Up @@ -470,6 +476,8 @@ OGRNG_OBJS = \
$(OUTPUTPATH)/ogrng64-cj1-sse2-asm.obj \
$(OUTPUTPATH)/ogrng64-cj1-sse2-lzcnt.obj \
$(OUTPUTPATH)/ogrng64-cj1-sse2-lzcnt-asm.obj \
$(OUTPUTPATH)/ogrng64-cj1-avx2.obj \
$(OUTPUTPATH)/ogrng64-cj1-avx2-asm.obj \
$(OUTPUTPATH)/ogrng_init.obj \
$(OUTPUTPATH)/ogrng_dat.obj
!elseif "$(PROCESSOR_ARCHITECTURE)" == "x86"
Expand Down
Loading