Skip to content

Apply softAES asm for older CPUs from xmrig via @cppdev-123 #2372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 63 additions & 11 deletions xmrstak/backend/cpu/cpuType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ void cpuid(uint32_t eax, int32_t ecx, int32_t val[4])
#endif
}

int32_t get_masked(int32_t val, int32_t h, int32_t l)
uint32_t get_masked(int32_t val, int32_t h, int32_t l)
{
val &= (0x7FFFFFFF >> (31 - (h - l))) << l;
return val >> l;
return static_cast<uint32_t>(val >> l);
}

bool has_feature(int32_t val, int32_t bit)
Expand All @@ -41,34 +41,86 @@ bool has_feature(int32_t val, int32_t bit)

Model getModel()
{
Model result;

int32_t cpu_HFP = 0; // Highest Function Parameter
int32_t cpu_HEFP = 0; // Highest Extended Function Parameter
int32_t cpu_info[4];
char cpustr[13] = {0};
char brandstr[13] = {0};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if I set this to 64 it solves the first issue


cpuid(0, 0, cpu_info);
cpu_HFP = cpu_info[0];
std::memcpy(cpustr, &cpu_info[1], 4);
std::memcpy(cpustr + 4, &cpu_info[3], 4);
std::memcpy(cpustr + 8, &cpu_info[2], 4);

Model result;

cpuid(1, 0, cpu_info);

result.model = get_masked(cpu_info[0], 8, 4);
result.family = get_masked(cpu_info[0], 12, 8);
result.model = get_masked(cpu_info[0], 8, 4) | get_masked(cpu_info[0], 20, 16) << 4;
result.type_name = cpustr;
if(result.family == 0x6 || result.family == 0xF)
{
result.model += get_masked(cpu_info[0], 20, 16) << 4;
}
if(result.family != 0xF)
{
result.family += get_masked(cpu_info[0], 28, 20);
}

// feature bits https://en.wikipedia.org/wiki/CPUID
// sse2
// feature bits https://en.wikipedia.org/wiki/CPUID#EAX=1:_Processor_Info_and_Feature_Bits
// sse2/sse3/ssse3
result.sse2 = has_feature(cpu_info[3], 26);
result.sse3 = has_feature(cpu_info[2], 0);
result.ssse3 = has_feature(cpu_info[2], 9);
// aes-ni
result.aes = has_feature(cpu_info[2], 25);
// avx - 27 is the check if the OS overwrote cpu features
result.avx = has_feature(cpu_info[2], 28) && has_feature(cpu_info[2], 27);

// extended feature bits https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features
if(cpu_HFP >= 7)
{
cpuid(7, 0, cpu_info);
result.avx2 = has_feature(cpu_info[1], 5);
}
// extended function support https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented
cpuid(0x80000000, 0, cpu_info);
cpu_HEFP = cpu_info[0];

// processor brand string https://en.wikipedia.org/wiki/CPUID#EAX=80000002h,80000003h,80000004h:_Processor_Brand_String
if(cpu_HEFP >= 0x80000004)
{
for(uint32_t efp=0x80000002; efp<0x80000004; efp++){
cpuid(0x80000002, 0, cpu_info);
std::memcpy(brandstr+(16*(efp-0x80000002)), &cpu_info, 16);
}
result.brand_name = brandstr;
}

if(strcmp(cpustr, "GenuineIntel") == 0)
{
if(result.family == 0x6){
result.isIntelXBridge = (
result.model == 0x2A //Sandy Bridge
|| result.model == 0x3A //Ivy Bridge
);
result.isIntelXWell = (
result.model == 0x3C || result.model == 0x45 || result.model == 0x46 //Haswell
|| result.model == 0x47 || result.model == 0x3D //Broadwell
);
result.isIntelXLake = (
result.model == 0x4E || result.model == 0x5E //Skylake
|| result.model == 0x8E //Kaby/Coffee/Whiskey/Amber Lake
|| result.model == 0x9E //Kaby/Coffee Lake
|| result.model == 0x66 //Cannon Lake
);
}
}
if(strcmp(cpustr, "AuthenticAMD") == 0)
{
if(result.family == 0xF)
result.family += get_masked(cpu_info[0], 28, 20);
result.isAMDHammer = (result.family != 0x15 && result.family >= 0xF && result.family <= 0x16);
result.isAMDBulldozer = (result.family == 0x15);
result.isAMDZen = (result.family == 0x17);
}

return result;
Expand Down
13 changes: 12 additions & 1 deletion xmrstak/backend/cpu/cpuType.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,20 @@ struct Model
{
uint32_t family = 0u;
uint32_t model = 0u;
bool isIntelXBridge = false;
bool isIntelXWell = false;
bool isIntelXLake = false;
bool isAMDHammer = false;
bool isAMDBulldozer = false;
bool isAMDZen = false;
bool aes = false;
bool sse2 = false;
bool sse3 = false;
bool ssse3 = false;
bool avx = false;
bool avx2 = false;
std::string type_name = "unknown";
std::string brand_name = "unknown";
};

Model getModel();
Expand All @@ -24,7 +34,8 @@ Model getModel();
* This enables us to put in values exactly like in the manual
* For example EBX[30:22] is get_masked(cpu_info[1], 31, 22)
*/
int32_t get_masked(int32_t val, int32_t h, int32_t l);
uint32_t get_masked(int32_t val, int32_t h, int32_t l);


} // namespace cpu
} // namespace xmrstak
119 changes: 118 additions & 1 deletion xmrstak/backend/cpu/crypto/CryptonightR_gen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ static inline void add_random_math(uint8_t*& p, const V4_Instruction* code, int

void v4_compile_code(size_t N, cryptonight_ctx* ctx, int code_size)
{
printer::inst()->print_msg(LDEBUG, "CryptonightR update ASM code");
printer::inst()->print_msg(LDEBUG, "CPU CryptonightR update ASM code");
const int allocation_size = 65536;

if(ctx->fun_data == nullptr)
Expand Down Expand Up @@ -127,3 +127,120 @@ void v4_compile_code(size_t N, cryptonight_ctx* ctx, int code_size)
printer::inst()->print_msg(L0, "Error: CPU CryptonightR update ASM code ctx->fun_data is a nullptr");
}
}

void wow_compile_code(size_t N, cryptonight_ctx* ctx, int code_size)
{
printer::inst()->print_msg(LDEBUG, "CPU CryptonightR-WOW update ASM code");
const int allocation_size = 65536;

if(ctx->fun_data == nullptr)
ctx->fun_data = static_cast<uint8_t*>(allocateExecutableMemory(allocation_size));
else
unprotectExecutableMemory(ctx->fun_data, allocation_size);

uint8_t* p0 = ctx->fun_data;
uint8_t* p = p0;
if(ctx->fun_data != nullptr)
{

if(N == 2)
{
add_code(p, CryptonightWOW_template_double_part1, CryptonightWOW_template_double_part2);
add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version);
add_code(p, CryptonightWOW_template_double_part2, CryptonightWOW_template_double_part3);
add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version);
add_code(p, CryptonightWOW_template_double_part3, CryptonightWOW_template_double_part4);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_double_mainloop) - ((const uint8_t*)CryptonightWOW_template_double_part1)) - (p - p0));
add_code(p, CryptonightWOW_template_double_part4, CryptonightWOW_template_double_end);
}
else
{
add_code(p, CryptonightWOW_template_part1, CryptonightWOW_template_part2);
add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version);
add_code(p, CryptonightWOW_template_part2, CryptonightWOW_template_part3);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_template_mainloop) - ((const uint8_t*)CryptonightWOW_template_part1)) - (p - p0));
add_code(p, CryptonightWOW_template_part3, CryptonightWOW_template_end);
}

ctx->loop_fn = reinterpret_cast<cn_mainloop_fun>(ctx->fun_data);
protectExecutableMemory(ctx->fun_data, allocation_size);
flushInstructionCache(ctx->fun_data, p - p0);
}
else
{
printer::inst()->print_msg(L0, "Error: CPU CryptonightR-WOW update ASM code ctx->fun_data is a nullptr");
}
}

void v4_soft_aes_compile_code(size_t N, cryptonight_ctx* ctx, int code_size)
{
printer::inst()->print_msg(LDEBUG, "CPU CryptonightR update soft-aes ASM code");
const int allocation_size = 65536;

if (ctx->fun_data == nullptr)
ctx->fun_data = static_cast<uint8_t*>(allocateExecutableMemory(allocation_size));
else
unprotectExecutableMemory(ctx->fun_data, allocation_size);

uint8_t* p0 = ctx->fun_data;
uint8_t* p = p0;
if (ctx->fun_data != nullptr)
{
if(N == 2)
{
printer::inst()->print_msg(L0, "Error: CPU CryptonightR update soft-aes ASM code has no double");
}
else
{
add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2);
add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version);
add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0));
add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end);
ctx->loop_fn = reinterpret_cast<cn_mainloop_fun>(ctx->fun_data);
protectExecutableMemory(ctx->fun_data, allocation_size);
flushInstructionCache(ctx->fun_data, p - p0);
}
}
else
{
printer::inst()->print_msg(L0, "Error: CPU CryptonightR update soft-aes ASM code ctx->fun_data is a nullptr");
}
}

void wow_soft_aes_compile_code(size_t N, cryptonight_ctx* ctx, int code_size)
{
printer::inst()->print_msg(LDEBUG, "CPU CryptonightR-WOW update soft-aes ASM code");
const int allocation_size = 65536;

if (ctx->fun_data == nullptr)
ctx->fun_data = static_cast<uint8_t*>(allocateExecutableMemory(allocation_size));
else
unprotectExecutableMemory(ctx->fun_data, allocation_size);

uint8_t* p0 = ctx->fun_data;
uint8_t* p = p0;
if (ctx->fun_data != nullptr)
{
if(N == 2)
{
printer::inst()->print_msg(L0, "Error: CPU CryptonightR-WOW update soft-aes ASM code has no double");
}
else
{
add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2);
add_random_math(p, ctx->cn_r_ctx.code, code_size, instructions, instructions_mov, false, ctx->asm_version);
add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3);
*(int*)(p - 4) = static_cast<int>((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0));
add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end);

ctx->loop_fn = reinterpret_cast<cn_mainloop_fun>(ctx->fun_data);
protectExecutableMemory(ctx->fun_data, allocation_size);
flushInstructionCache(ctx->fun_data, p - p0);
}
}
else
{
printer::inst()->print_msg(L0, "Error: CPU CryptonightR-WOW update soft-aes ASM code ctx->fun_data is a nullptr");
}
}
Loading