diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1ff0c42 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,63 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp + +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary + +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +#*.jpg binary +#*.png binary +#*.gif binary + +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +#*.doc diff=astextplain +#*.DOC diff=astextplain +#*.docx diff=astextplain +#*.DOCX diff=astextplain +#*.dot diff=astextplain +#*.DOT diff=astextplain +#*.pdf diff=astextplain +#*.PDF diff=astextplain +#*.rtf diff=astextplain +#*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore index fd5204b..131c088 100644 --- a/.gitignore +++ b/.gitignore @@ -181,3 +181,4 @@ UpgradeLog*.htm # Microsoft Fakes FakesAssemblies/ +/diskspd_vs/.vs/diskspd/v15 diff --git a/CmdLineParser/CmdLineParser.cpp b/CmdLineParser/CmdLineParser.cpp index 8ec9319..ae5ccf5 100644 --- a/CmdLineParser/CmdLineParser.cpp +++ b/CmdLineParser/CmdLineParser.cpp @@ -30,9 +30,9 @@ SOFTWARE. #include "CmdLineParser.h" #include "Common.h" #include "XmlProfileParser.h" -#include -#include -#include +#include +#include +#include CmdLineParser::CmdLineParser() : _dwBlockSize(64 * 1024), @@ -42,9 +42,7 @@ CmdLineParser::CmdLineParser() : { } -CmdLineParser::~CmdLineParser() -{ -} +CmdLineParser::~CmdLineParser() = default; // Get size in bytes from a string (it can end with K, M, G for KB, MB, GB and b for block) bool CmdLineParser::_GetSizeInBytes(const char *pszSize, UINT64& ullSize) const @@ -107,11 +105,11 @@ bool CmdLineParser::_GetSizeInBytes(const char *pszSize, UINT64& ullSize) const return fOk; } -bool CmdLineParser::_GetRandomDataWriteBufferData(const string& sArg, UINT64& cb, string& sPath) +bool CmdLineParser::_GetRandomDataWriteBufferData(const string& sArg, UINT64& cb, string& sPath) const { - bool fOk = true; - size_t iComma = sArg.find(','); - if (iComma == sArg.npos) + bool fOk; + const size_t iComma = sArg.find(','); + if (iComma == std::string::npos) { fOk = _GetSizeInBytes(sArg.c_str(), cb); sPath = ""; @@ -124,7 +122,7 @@ bool CmdLineParser::_GetRandomDataWriteBufferData(const string& sArg, UINT64& cb return fOk; } -void CmdLineParser::_DisplayUsageInfo(const char *pszFilename) const +void CmdLineParser::_DisplayUsageInfo(const char *pszFilename) { // ISSUE-REVIEW: this formats badly in the default 80 column command prompt printf("\n"); @@ -179,6 +177,8 @@ void CmdLineParser::_DisplayUsageInfo(const char *pszFilename) const printf(" -o number of outstanding I/O requests per target per thread\n"); printf(" (1=synchronous I/O, unless more than 1 thread is specified with -F)\n"); printf(" [default=2]\n"); + printf(" -O number of outstanding I/O requests per thread - for use with -F\n"); + printf(" (1=synchronous I/O)\n"); printf(" -p start parallel sequential I/O operations with the same offset\n"); printf(" (ignored if -r is specified, makes sense only with -o2 or greater)\n"); printf(" -P enable printing a progress dot after each [default=65536]\n"); @@ -215,6 +215,9 @@ void CmdLineParser::_DisplayUsageInfo(const char *pszFilename) const printf("\n"); printf("Write buffers:\n"); printf(" -Z zero buffers used for write tests\n"); + printf(" -Zr per IO random buffers used for write tests - this incurrs additional run-time\n"); + printf(" overhead to create random content and shouln't be compared to results run\n"); + printf(" without -Zr\n"); printf(" -Z[K|M|G|b] use a buffer filled with random data as a source for write operations.\n"); printf(" -Z[K|M|G|b], use a buffer filled with data from as a source for write operations.\n"); printf("\n"); @@ -391,8 +394,8 @@ bool CmdLineParser::_ParseAffinity(const char *arg, TimeSpan *pTimeSpan) { if (nNum > MAXWORD) { - fprintf(stderr, "ERROR: group %u is out of range\n", nNum); - fOk = false; + fprintf(stderr, "ERROR: group %lu is out of range\n", nNum); + fOk = false; } else { @@ -411,12 +414,12 @@ bool CmdLineParser::_ParseAffinity(const char *arg, TimeSpan *pTimeSpan) { if (nNum > MAXBYTE) { - fprintf(stderr, "ERROR: core %u is out of range\n", nNum); + fprintf(stderr, "ERROR: core %lu is out of range\n", nNum); fOk = false; } else { - pTimeSpan->AddAffinityAssignment((WORD)nGroup, (BYTE)nNum); + pTimeSpan->AddAffinityAssignment(static_cast(nGroup), static_cast(nNum)); nNum = 0; fNum = false; } @@ -445,7 +448,7 @@ bool CmdLineParser::_ParseAffinity(const char *arg, TimeSpan *pTimeSpan) if (fOk && nNum > MAXBYTE) { - fprintf(stderr, "ERROR: core %u is out of range\n", nNum); + fprintf(stderr, "ERROR: core %lu is out of range\n", nNum); fOk = false; } @@ -469,7 +472,7 @@ bool CmdLineParser::_ParseAffinity(const char *arg, TimeSpan *pTimeSpan) if (fOk) { // fprintf(stderr, "FINAL parsed group %d core %d\n", nGroup, nNum); - pTimeSpan->AddAffinityAssignment((WORD)nGroup, (BYTE)nNum); + pTimeSpan->AddAffinityAssignment(static_cast(nGroup), static_cast(nNum)); } return fOk; @@ -483,12 +486,12 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ // create targets vector vTargets; - int iFirstFile = -1; + //int iFirstFile = -1; for (int i = 1; i < argc; i++) { if (argv[i][0] != '-' && argv[i][0] != '/') { - iFirstFile = i; + //iFirstFile = i; Target target; target.SetPath(argv[i]); vTargets.push_back(target); @@ -504,10 +507,10 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ UINT64 ullBlockSize; if (_GetSizeInBytes(&argv[x][2], ullBlockSize)) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { // TODO: UINT64->DWORD - i->SetBlockSizeInBytes((DWORD)ullBlockSize); + vTarget.SetBlockSizeInBytes(static_cast(ullBlockSize)); } } else @@ -515,7 +518,7 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ fprintf(stderr, "Invalid block size passed to -b\n"); return false; } - _dwBlockSize = (DWORD)ullBlockSize; + _dwBlockSize = static_cast(ullBlockSize); break; } } @@ -566,15 +569,15 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ // nop - block size has been taken care of before the loop break; - case 'B': //base file offset (offset from the beginning of the file), cannot be used with 'random' + case 'B': //base file offset (offset from the beginning of the file) if (*(arg + 1) != '\0') { UINT64 cb; if (_GetSizeInBytes(arg + 1, cb)) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetBaseFileOffsetInBytes(cb); + vTarget.SetBaseFileOffsetInBytes(cb); } } else @@ -595,10 +598,10 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ UINT64 cb; if (_GetSizeInBytes(arg + 1, cb)) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetFileSize(cb); - i->SetCreateFile(true); + vTarget.SetFileSize(cb); + vTarget.SetCreateFile(true); } } else @@ -615,7 +618,7 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'C': //cool down time { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c >= 0) { timeSpan.SetCooldown(c); @@ -629,7 +632,7 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'd': //duration { - int x = atoi(arg + 1); + const int x = atoi(arg + 1); if (x > 0) { timeSpan.SetDuration(x); @@ -645,7 +648,7 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ { timeSpan.SetCalculateIopsStdDev(true); - int x = atoi(arg + 1); + const int x = atoi(arg + 1); if (x > 0) { timeSpan.SetIoBucketDurationInMilliseconds(x); @@ -666,9 +669,9 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ UINT64 cb; if (_GetSizeInBytes(arg + 1, cb)) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetMaxFileSize(cb); + vTarget.SetMaxFileSize(cb); } } else @@ -692,21 +695,21 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ switch (*arg) { case 'r': - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetRandomAccessHint(true); + vTarget.SetRandomAccessHint(true); } break; case 's': - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetSequentialScanHint(true); + vTarget.SetSequentialScanHint(true); } break; case 't': - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetTemporaryFileHint(true); + vTarget.SetTemporaryFileHint(true); } break; default: @@ -720,7 +723,7 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'F': //total number of threads { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c > 0) { timeSpan.SetThreadCount(c); @@ -734,12 +737,12 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'g': //throughput in bytes per millisecond { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c > 0) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetThroughput(c); + vTarget.SetThroughput(c); } } else @@ -765,13 +768,13 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'i': //number of IOs to issue before think time { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c > 0) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetBurstSize(c); - i->SetUseBurstSize(true); + vTarget.SetBurstSize(c); + vTarget.SetUseBurstSize(true); } } else @@ -783,13 +786,13 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'j': //time to wait between bursts of IOs { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c > 0) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetThinkTime(c); - i->SetEnableThinkTime(true); + vTarget.SetThinkTime(c); + vTarget.SetEnableThinkTime(true); } } else @@ -801,13 +804,13 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'I': //io priority { - int x = atoi(arg + 1); + const int x = atoi(arg + 1); if (x > 0 && x < 4) { PRIORITY_HINT hint[] = { IoPriorityHintVeryLow, IoPriorityHintLow, IoPriorityHintNormal }; - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetIOPriorityHint(hint[x - 1]); + vTarget.SetIOPriorityHint(hint[x - 1]); } } else @@ -818,9 +821,9 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ break; case 'l': //large pages - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetUseLargePages(true); + vTarget.SetUseLargePages(true); } break; @@ -834,12 +837,12 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'o': //request count (1==synchronous) { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c > 0) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetRequestCount(c); + vTarget.SetRequestCount(c); } } else @@ -849,11 +852,25 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ } break; + case 'O': //total number of IOs/thread - for use with -F + { + const int c = atoi(arg + 1); + if (c > 0) + { + timeSpan.SetRequestCount(c); + } + else + { + fError = true; + } + } + break; + case 'p': //start async IO operations with the same offset //makes sense only for -o2 and greater - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetUseParallelAsyncIO(true); + vTarget.SetUseParallelAsyncIO(true); } break; @@ -881,10 +898,10 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ } if (!fError) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetUseRandomAccessPattern(true); - i->SetBlockAlignmentInBytes(cb); + vTarget.SetUseRandomAccessPattern(true); + vTarget.SetBlockAlignmentInBytes(cb); } } } @@ -920,9 +937,9 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ // ISSUE-REVIEW: this does nothing if -r is specified // ISSUE-REVIEW: this does nothing if -p is specified // ISSUE-REVIEW: this does nothing if we are single-threaded - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetUseInterlockedSequential(true); + vTarget.SetUseInterlockedSequential(true); } idx++; @@ -933,9 +950,9 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ UINT64 cb; if (_GetSizeInBytes(arg + idx, cb)) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetBlockAlignmentInBytes(cb); + vTarget.SetBlockAlignmentInBytes(cb); } } else @@ -1039,12 +1056,12 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 't': //number of threads per file { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c > 0) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetThreadsPerFile(c); + vTarget.SetThreadsPerFile(c); } } else @@ -1059,9 +1076,9 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ UINT64 cb; if (_GetSizeInBytes(arg + 1, cb) && (cb > 0)) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetThreadStrideInBytes(cb); + vTarget.SetThreadStrideInBytes(cb); } } else @@ -1078,7 +1095,7 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'w': //write test [default=read] { - int c = -1; + int c; if (*(arg + 1) == '\0') { c = _ulWriteRatio; @@ -1094,9 +1111,9 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ } if (c != -1) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetWriteRatio(c); + vTarget.SetWriteRatio(c); } } } @@ -1104,7 +1121,7 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'W': //warm up time { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c >= 0) { timeSpan.SetWarmup(c); @@ -1125,8 +1142,8 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ { case 's': - _hEventStarted = CreateEvent(NULL, TRUE, FALSE, arg + 2); - if (NULL == _hEventStarted) + _hEventStarted = CreateEvent(nullptr, TRUE, FALSE, arg + 2); + if (nullptr == _hEventStarted) { fprintf(stderr, "Error creating/opening start notification event: '%s'\n", arg + 2); exit(1); // TODO: this class shouldn't terminate the process @@ -1134,8 +1151,8 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ break; case 'f': - _hEventFinished = CreateEvent(NULL, TRUE, FALSE, arg + 2); - if (NULL == _hEventFinished) + _hEventFinished = CreateEvent(nullptr, TRUE, FALSE, arg + 2); + if (nullptr == _hEventFinished) { fprintf(stderr, "Error creating/opening finish notification event: '%s'\n", arg + 2); exit(1); // TODO: this class shouldn't terminate the process @@ -1143,8 +1160,8 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ break; case 'r': - synch->hStartEvent = CreateEvent(NULL, TRUE, FALSE, arg + 2); - if (NULL == synch->hStartEvent) + synch->hStartEvent = CreateEvent(nullptr, TRUE, FALSE, arg + 2); + if (nullptr == synch->hStartEvent) { fprintf(stderr, "Error creating/opening wait-for-start event: '%s'\n", arg + 2); exit(1); // TODO: this class shouldn't terminate the process @@ -1152,8 +1169,8 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ break; case 'p': - synch->hStopEvent = CreateEvent(NULL, TRUE, FALSE, arg + 2); - if (NULL == synch->hStopEvent) + synch->hStopEvent = CreateEvent(nullptr, TRUE, FALSE, arg + 2); + if (nullptr == synch->hStopEvent) { fprintf(stderr, "Error creating/opening force-stop event: '%s'\n", arg + 2); exit(1); // TODO: this class shouldn't terminate the process @@ -1162,8 +1179,8 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'e': { - HANDLE hEvent = OpenEvent(EVENT_MODIFY_STATE, FALSE, arg + 2); - if (NULL == hEvent) + HANDLE hEvent = OpenEvent(EVENT_MODIFY_STATE, FALSE, arg + 2); + if (nullptr == hEvent) { fprintf(stderr, "Error opening event '%s'\n", arg + 2); exit(1); // TODO: this class shouldn't terminate the process @@ -1186,11 +1203,11 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'z': //random seed if (*(arg + 1) == '\0') { - timeSpan.SetRandSeed((ULONG)GetTickCount64()); + timeSpan.SetRandSeed(static_cast(GetTickCount64())); } else { - int c = atoi(arg + 1); + const int c = atoi(arg + 1); if (c >= 0) { timeSpan.SetRandSeed(c); @@ -1205,21 +1222,25 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ case 'Z': //zero write buffers if (*(arg + 1) == '\0') { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetZeroWriteBuffers(true); + vTarget.SetZeroWriteBuffers(true); } } + else if (*(arg + 1) == 'r' && *(arg + 2) == '\0') + { + timeSpan.SetRandomWriteData(true); + } else { UINT64 cb = 0; string sPath; if (_GetRandomDataWriteBufferData(string(arg + 1), cb, sPath) && (cb > 0)) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->SetRandomDataWriteBufferSize(cb); - i->SetRandomDataWriteBufferSourcePath(sPath); + vTarget.SetRandomDataWriteBufferSize(cb); + vTarget.SetRandomDataWriteBufferSourcePath(sPath); } } else @@ -1254,29 +1275,29 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ exit(1); // TODO: this class shouldn't terminate the process } - if (vTargets.size() < 1) + if (vTargets.empty()) { fprintf(stderr, "ERROR: need to provide at least one filename\n"); return false; } // apply resultant cache/writethrough modes to the targets - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { if (t != TargetCacheMode::Undefined) { - i->SetCacheMode(t); + vTarget.SetCacheMode(t); } if (w != WriteThroughMode::Undefined) { - i->SetWriteThroughMode(w); + vTarget.SetWriteThroughMode(w); } } // ... and apply targets to the timespan - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - timeSpan.AddTarget(*i); + timeSpan.AddTarget(vTarget); } pProfile->AddTimeSpan(timeSpan); @@ -1285,15 +1306,14 @@ bool CmdLineParser::_ReadParametersFromCmdLine(const int argc, const char *argv[ bool CmdLineParser::_ReadParametersFromXmlFile(const char *pszPath, Profile *pProfile) { - XmlProfileParser parser; - return parser.ParseFile(pszPath, pProfile); + return XmlProfileParser::ParseFile(pszPath, pProfile); } bool CmdLineParser::ParseCmdLine(const int argc, const char *argv[], Profile *pProfile, struct Synchronization *synch, SystemInformation *pSystem) { assert(nullptr != argv); assert(nullptr != pProfile); - assert(NULL != synch); + assert(nullptr != synch); if (argc < 2) { @@ -1314,7 +1334,7 @@ bool CmdLineParser::ParseCmdLine(const int argc, const char *argv[], Profile *pP pProfile->SetCmdLine(sCmdLine); //check if parameters should be read from an xml file - bool fOk = true; + bool fOk; bool fCmdLine; if (argc == 2 && (argv[1][0] == '-' || argv[1][0] == '/') && argv[1][1] == 'X' && argv[1][2] != '\0') @@ -1337,4 +1357,4 @@ bool CmdLineParser::ParseCmdLine(const int argc, const char *argv[], Profile *pP } return fOk; -} \ No newline at end of file +} diff --git a/CmdRequestCreator/CmdRequestCreator.cpp b/CmdRequestCreator/CmdRequestCreator.cpp index 1d2cfd9..bdfae78 100644 --- a/CmdRequestCreator/CmdRequestCreator.cpp +++ b/CmdRequestCreator/CmdRequestCreator.cpp @@ -31,24 +31,23 @@ SOFTWARE. // #include "CmdRequestCreator.h" -#include -#include -#include -#include "common.h" +//#include +#include +#include "Common.h" #include "errors.h" #include "CmdLineParser.h" #include "XmlProfileParser.h" #include "IORequestGenerator.h" #include "ResultParser.h" -#include "XmlResultParser.h" +#include "xmlresultparser.h" /*****************************************************************************/ // global variables -static HANDLE g_hAbortEvent = NULL; // handle to the 'abort' event +static HANDLE g_hAbortEvent = nullptr; // handle to the 'abort' event // it allows stopping I/O Request Generator in the middle of its work // the results of its work will be passed to the Results Parser -static HANDLE g_hEventStarted = NULL; // event signalled to notify that the actual (measured) test is to be started -static HANDLE g_hEventFinished = NULL; // event signalled to notify that the actual test has finished +static HANDLE g_hEventStarted = nullptr; // event signalled to notify that the actual (measured) test is to be started +static HANDLE g_hEventFinished = nullptr; // event signalled to notify that the actual test has finished /*****************************************************************************/ // wrapper for printf. printf cannot be used directly, because IORequestGenerator.dll @@ -74,34 +73,31 @@ BOOL WINAPI ctrlCRoutine(DWORD dwCtrlType) printf("\n*** Interrupted by Ctrl-C. Stopping I/O Request Generator. ***\n"); if( !SetEvent(g_hAbortEvent) ) { - fprintf(stderr, "Warning: Setting abort event failed (error code: %u)\n", GetLastError()); + fprintf(stderr, "Warning: Setting abort event failed (error code: %lu)\n", GetLastError()); } SetConsoleCtrlHandler(ctrlCRoutine, FALSE); //indicate that the signal has been handled return TRUE; } - else - { - return FALSE; - } + return FALSE; } /*****************************************************************************/ void TestStarted() { - if( (NULL != g_hEventStarted) && !SetEvent(g_hEventStarted) ) + if( (nullptr != g_hEventStarted) && !SetEvent(g_hEventStarted) ) { - fprintf(stderr, "Warning: Setting test start notification event failed (error code: %u)\n", GetLastError()); + fprintf(stderr, "Warning: Setting test start notification event failed (error code: %lu)\n", GetLastError()); } } /*****************************************************************************/ void TestFinished() { - if( (NULL != g_hEventFinished) && !SetEvent(g_hEventFinished) ) + if( (nullptr != g_hEventFinished) && !SetEvent(g_hEventFinished) ) { - fprintf(stderr, "Warning: Setting test finish notification event failed (error code: %u)\n", GetLastError()); + fprintf(stderr, "Warning: Setting test finish notification event failed (error code: %lu)\n", GetLastError()); } } @@ -111,10 +107,10 @@ int __cdecl main(int argc, const char* argv[]) // // parse cmd line parameters // - struct Synchronization synch; //sychronization structure + struct Synchronization synch{}; //sychronization structure synch.ulStructSize = sizeof(synch); - synch.hStopEvent = NULL; - synch.hStartEvent = NULL; + synch.hStopEvent = nullptr; + synch.hStartEvent = nullptr; CmdLineParser cmdLineParser; Profile profile; @@ -130,10 +126,10 @@ int __cdecl main(int argc, const char* argv[]) // create abort event if stop event is not explicitly provided by the user (otherwise use the stop event) // - if (NULL == synch.hStopEvent) + if (nullptr == synch.hStopEvent) { - synch.hStopEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - if( NULL == synch.hStopEvent ) + synch.hStopEvent = CreateEvent(nullptr, TRUE, FALSE, nullptr); + if( nullptr == synch.hStopEvent ) { fprintf(stderr, "Unable to create an abort event for CTRL+C\n"); //FUTURE EXTENSION: change error code @@ -157,7 +153,7 @@ int __cdecl main(int argc, const char* argv[]) // ResultParser resultParser; XmlResultParser xmlResultParser; - IResultParser *pResultParser = nullptr; + IResultParser *pResultParser; if (profile.GetResultsFormat() == ResultsFormat::Xml) { pResultParser = &xmlResultParser; @@ -168,17 +164,17 @@ int __cdecl main(int argc, const char* argv[]) } IORequestGenerator ioGenerator; - if (!ioGenerator.GenerateRequests(profile, *pResultParser, (PRINTF)PrintOut, (PRINTF)PrintError, (PRINTF)PrintOut, &synch)) + if (!ioGenerator.GenerateRequests(profile, *pResultParser, static_cast(PrintOut), static_cast(PrintError), static_cast(PrintOut), &synch)) { fprintf(stderr, "Error generating I/O requests\n"); return 1; } - if( NULL != synch.hStartEvent ) + if( nullptr != synch.hStartEvent ) { CloseHandle(synch.hStartEvent); } - if( NULL != synch.hStopEvent ) + if( nullptr != synch.hStopEvent ) { CloseHandle(synch.hStopEvent); } @@ -186,7 +182,7 @@ int __cdecl main(int argc, const char* argv[]) { CloseHandle(g_hEventStarted); } - if( NULL != g_hEventFinished ) + if( nullptr != g_hEventFinished ) { CloseHandle(g_hEventFinished); } diff --git a/Common/CmdLineParser.h b/Common/CmdLineParser.h index 613d02f..fc3a71e 100644 --- a/Common/CmdLineParser.h +++ b/Common/CmdLineParser.h @@ -38,18 +38,19 @@ class CmdLineParser CmdLineParser(); ~CmdLineParser(); - bool ParseCmdLine(const int argc, const char *argv[], Profile *pProfile, struct Synchronization *synch, SystemInformation *pSystem = nullptr); + bool ParseCmdLine(int argc, const char* argv[], Profile* pProfile, struct Synchronization* synch, + SystemInformation* pSystem = nullptr); private: - bool _ReadParametersFromCmdLine(const int argc, const char *argv[], Profile *pProfile, struct Synchronization *synch); - bool _ReadParametersFromXmlFile(const char *pszPath, Profile *pProfile); + bool _ReadParametersFromCmdLine(int argc, const char *argv[], Profile *pProfile, struct Synchronization *synch); + static bool _ReadParametersFromXmlFile(const char *pszPath, Profile *pProfile); - bool _ParseETWParameter(const char *arg, Profile *pProfile); - bool _ParseAffinity(const char *arg, TimeSpan *pTimeSpan); + static bool _ParseETWParameter(const char *arg, Profile *pProfile); + static bool _ParseAffinity(const char *arg, TimeSpan *pTimeSpan); - void _DisplayUsageInfo(const char *pszFilename) const; + static void _DisplayUsageInfo(const char *pszFilename); bool _GetSizeInBytes(const char *pszSize, UINT64& ullSize) const; - bool _GetRandomDataWriteBufferData(const string& sArg, UINT64& cb, string& sPath); + bool _GetRandomDataWriteBufferData(const string& sArg, UINT64& cb, string& sPath) const; // variables that used to be global DWORD _dwBlockSize; // block size; other parameters may be stated in blocks diff --git a/Common/CmdRequestCreator.h b/Common/CmdRequestCreator.h index f8d2937..2439688 100644 --- a/Common/CmdRequestCreator.h +++ b/Common/CmdRequestCreator.h @@ -35,4 +35,4 @@ SOFTWARE. #pragma once #include -#include \ No newline at end of file +#include \ No newline at end of file diff --git a/Common/Common.cpp b/Common/Common.cpp index 4747320..8b72a8d 100644 --- a/Common/Common.cpp +++ b/Common/Common.cpp @@ -92,6 +92,136 @@ UINT64 PerfTimer::SecondsToPerfTime(const double seconds) return static_cast(TIMER_FREQ * seconds); } +Random::Random(UINT64 ulSeed) +{ + _ulState[0] = 0xf1ea5eed; + _ulState[1] = ulSeed; + _ulState[2] = ulSeed; + _ulState[3] = ulSeed; + + for (UINT32 i = 0; i < 20; i++) { + Rand64(); + } +} + +void Random::RandBuffer(BYTE *pBuffer, UINT32 ulLength, bool fPseudoRandomOkay) +{ + auto Remaining = static_cast(reinterpret_cast(pBuffer) & 7); + UINT64 r1, r2, r3, r4; + + // + // Align to 8 bytes + // + + if (Remaining != 0) { + r1 = Rand64(); + + while (Remaining != 0 && ulLength != 0) { + *pBuffer = static_cast(r1 & 0xFF); + r1 >>= 8; + pBuffer++; + ulLength--; + Remaining--; + } + } + + auto*pBuffer64 = reinterpret_cast(pBuffer); + Remaining = ulLength / 8; + ulLength -= Remaining * 8; + pBuffer += Remaining * 8; + + if (fPseudoRandomOkay) { + + // + // Generate 5 random numbers and then mix them to produce + // 16 random (but correlated) numbers. We want to do 16 + // numbers at a time for optimal cache line alignment. + // Only do this if the caller is okay with numbers that + // aren't independent. A detailed analysis of the data + // could probably detect that the first 5 numbers determine + // the next 11. For most purposes this won't matter (for + // instance it's unlikely compression algorithms will be + // able to detect this and utilize it). + // + + while (Remaining > 16) { + r1 = Rand64(); + r2 = Rand64(); + r3 = Rand64(); + r4 = Rand64(); + const UINT64 r5 = Rand64(); + + pBuffer64[0] = r1; + pBuffer64[1] = r2; + pBuffer64[2] = r3; + pBuffer64[3] = r4; + pBuffer64[4] = r5; + + // + // Throw in some rotates so that the below numbers + // aren't the xor sum of previous numbers. + // + + r1 = _rotl64(r1, 7); + pBuffer64[5] = r1 ^ r2; + pBuffer64[6] = r1 ^ r3; + pBuffer64[7] = r1 ^ r4; + pBuffer64[8] = r1 ^ r5; + + r2 = _rotl64(r2, 13); + pBuffer64[9] = r2 ^ r3; + pBuffer64[10] = r2 ^ r4; + pBuffer64[11] = r2 ^ r5; + + r3 = _rotl64(r3, 19); + pBuffer64[12] = r3 ^ r4; + pBuffer64[13] = r3 ^ r5; + + pBuffer64[14] = r1 ^ r2 ^ r3; + pBuffer64[15] = r1 ^ _rotl64(r4 ^ r5, 39); + + pBuffer64 += 16; + Remaining -= 16; + } + } + + // + // Fill in the tail of the buffer + // + + while (Remaining >= 4) { + r1 = Rand64(); + r2 = Rand64(); + r3 = Rand64(); + r4 = Rand64(); + + pBuffer64[0] = r1; + pBuffer64[1] = r2; + pBuffer64[2] = r3; + pBuffer64[3] = r4; + + pBuffer64 += 4; + Remaining -= 4; + } + + while (Remaining != 0) { + *pBuffer64 = Rand64(); + pBuffer64++; + Remaining--; + } + + if (ulLength != 0) { + r1 = Rand64(); + + while (ulLength != 0) { + *pBuffer = static_cast(r1 & 0xFF); + r1 >>= 8; + pBuffer++; + ulLength--; + } + } +} + string Util::DoubleToStringHelper(const double d) { char szFloatBuffer[100]; @@ -100,13 +230,32 @@ string Util::DoubleToStringHelper(const double d) return string(szFloatBuffer); } +string ThreadTarget::GetXml() const +{ + char buffer[4096]; + string sXml("\n"); + + sprintf_s(buffer, _countof(buffer), "%u\n", _ulThread); + sXml += buffer; + + if (_ulWeight != 0) + { + sprintf_s(buffer, _countof(buffer), "%u\n", _ulWeight); + sXml += buffer; + } + + sXml += "\n"; + + return sXml; +} + string Target::GetXml() const { char buffer[4096]; string sXml("\n"); sXml += "" + _sPath + "\n"; - sprintf_s(buffer, _countof(buffer), "%u\n", _dwBlockSize); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwBlockSize); sXml += buffer; sprintf_s(buffer, _countof(buffer), "%I64u\n", _ullBaseFileOffset); @@ -126,6 +275,11 @@ string Target::GetXml() const case TargetCacheMode::DisableOSCache: sXml += "true\n"; break; + case TargetCacheMode::Cached: + break; + case TargetCacheMode::Undefined: + /* ? */ + break; } // WriteThroughMode::Off is implied default @@ -134,6 +288,11 @@ string Target::GetXml() const case WriteThroughMode::On: sXml += "true\n"; break; + case WriteThroughMode::Off: + break; + case WriteThroughMode::Undefined: + /* ? */ + break; } sXml += "\n"; @@ -151,7 +310,7 @@ string Target::GetXml() const sXml += "\n"; sprintf_s(buffer, _countof(buffer), "%I64u\n", _cbRandomDataWriteBuffer); sXml += buffer; - if (_sRandomDataWriteBufferSourcePath != "") + if (!_sRandomDataWriteBufferSourcePath.empty()) { sXml += "" + _sRandomDataWriteBufferSourcePath + "\n"; } @@ -163,13 +322,13 @@ string Target::GetXml() const if (_fUseBurstSize) { - sprintf_s(buffer, _countof(buffer), "%u\n", _dwBurstSize); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwBurstSize); sXml += buffer; } if (_fThinkTime) { - sprintf_s(buffer, _countof(buffer), "%u\n", _dwThinkTime); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwThinkTime); sXml += buffer; } @@ -201,16 +360,16 @@ string Target::GetXml() const sprintf_s(buffer, _countof(buffer), "%I64u\n", _ullMaxFileSize); sXml += buffer; - sprintf_s(buffer, _countof(buffer), "%u\n", _dwRequestCount); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwRequestCount); sXml += buffer; sprintf_s(buffer, _countof(buffer), "%u\n", _ulWriteRatio); sXml += buffer; - sprintf_s(buffer, _countof(buffer), "%u\n", _dwThroughputBytesPerMillisecond); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwThroughputBytesPerMillisecond); sXml += buffer; - sprintf_s(buffer, _countof(buffer), "%u\n", _dwThreadsPerFile); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwThreadsPerFile); sXml += buffer; if (_ioPriorityHint == IoPriorityHintVeryLow) @@ -230,36 +389,40 @@ string Target::GetXml() const sXml += "* UNSUPPORTED *\n"; } + sprintf_s(buffer, _countof(buffer), "%u\n", _ulWeight); + sXml += buffer; + + for (const auto& threadTarget : _vThreadTargets) + { + sXml += threadTarget.GetXml(); + } + sXml += "\n"; return sXml; } -bool Target::_FillRandomDataWriteBuffer() +bool Target::_FillRandomDataWriteBuffer(Random *pRand) const { assert(_pRandomDataWriteBuffer != nullptr); bool fOk = true; - size_t cb = static_cast(GetRandomDataWriteBufferSize()); - if (GetRandomDataWriteBufferSourcePath() == "") + const auto cb = static_cast(GetRandomDataWriteBufferSize()); + if (GetRandomDataWriteBufferSourcePath().empty()) { - // fill buffer with random data - for (size_t i = 0; i < cb; i++) - { - _pRandomDataWriteBuffer[i] = (rand() % 256); - } + pRand->RandBuffer(_pRandomDataWriteBuffer, static_cast(cb), false); } else { // fill buffer from file - HANDLE hFile = CreateFile(GetRandomDataWriteBufferSourcePath().c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr); + HANDLE hFile = CreateFile(GetRandomDataWriteBufferSourcePath().c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, 0, nullptr); if (hFile != INVALID_HANDLE_VALUE) { - UINT64 cbLeftToRead = GetRandomDataWriteBufferSize(); + const UINT64 cbLeftToRead = GetRandomDataWriteBufferSize(); BYTE *pBuffer = _pRandomDataWriteBuffer; bool fReadSuccess = true; while (fReadSuccess && cbLeftToRead > 0) { - DWORD cbToRead = static_cast(min(64 * 1024, cbLeftToRead)); + const auto cbToRead = static_cast(min(64 * 1024, cbLeftToRead)); DWORD cbRead; fReadSuccess = ((ReadFile(hFile, pBuffer, cbToRead, &cbRead, nullptr) == TRUE) && (cbRead > 0)); pBuffer += cbRead; @@ -283,29 +446,29 @@ bool Target::_FillRandomDataWriteBuffer() return fOk; } -bool Target::AllocateAndFillRandomDataWriteBuffer() +bool Target::AllocateAndFillRandomDataWriteBuffer(Random *pRand) { assert(_pRandomDataWriteBuffer == nullptr); - bool fOk = true; - size_t cb = static_cast(GetRandomDataWriteBufferSize()); + const auto cb = static_cast(GetRandomDataWriteBufferSize()); assert(cb > 0); // TODO: make sure the size if <= max value for size_t if (GetUseLargePages()) { - size_t cbMinLargePage = GetLargePageMinimum(); - size_t cbRoundedSize = (cb + cbMinLargePage - 1) & ~(cbMinLargePage - 1); - _pRandomDataWriteBuffer = (BYTE *)VirtualAlloc(nullptr, cbRoundedSize, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_EXECUTE_READWRITE); + const size_t cbMinLargePage = GetLargePageMinimum(); + const size_t cbRoundedSize = (cb + cbMinLargePage - 1) & ~(cbMinLargePage - 1); + _pRandomDataWriteBuffer = static_cast(VirtualAlloc(nullptr, cbRoundedSize, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, + PAGE_EXECUTE_READWRITE)); } else { - _pRandomDataWriteBuffer = (BYTE *)VirtualAlloc(nullptr, cb, MEM_COMMIT, PAGE_READWRITE); + _pRandomDataWriteBuffer = static_cast(VirtualAlloc(nullptr, cb, MEM_COMMIT, PAGE_READWRITE)); } - fOk = (_pRandomDataWriteBuffer != nullptr); + auto fOk = (_pRandomDataWriteBuffer != nullptr); if (fOk) { - fOk = _FillRandomDataWriteBuffer(); + fOk = _FillRandomDataWriteBuffer(pRand); } return fOk; } @@ -319,26 +482,26 @@ void Target::FreeRandomDataWriteBuffer() } } -BYTE* Target::GetRandomDataWriteBuffer() +BYTE* Target::GetRandomDataWriteBuffer(Random *pRand) const { - size_t cbBuffer = static_cast(GetRandomDataWriteBufferSize()); - size_t cbBlock = GetBlockSizeInBytes(); + const auto cbBuffer = static_cast(GetRandomDataWriteBufferSize()); + const size_t cbBlock = GetBlockSizeInBytes(); // leave enough bytes in the buffer for one block - size_t randomOffset = rand() % (cbBuffer - (cbBlock - 1)); + size_t randomOffset = pRand->Rand32() % (cbBuffer - (cbBlock - 1)); - bool fUnbufferedIO = (_cacheMode == TargetCacheMode::DisableOSCache); + const bool fUnbufferedIO = (_cacheMode == TargetCacheMode::DisableOSCache); if (fUnbufferedIO) { - // for unbuffered IO, offset in the buffer needs to be DWORD-aligned - const size_t cbAlignment = 4; + // for unbuffered IO, offset in the buffer needs to be 512-byte aligned + const size_t cbAlignment = 512; randomOffset -= (randomOffset % cbAlignment); } - BYTE *pBuffer = reinterpret_cast(reinterpret_cast(_pRandomDataWriteBuffer)+randomOffset); + auto pBuffer = reinterpret_cast(reinterpret_cast(_pRandomDataWriteBuffer)+randomOffset); // unbuffered IO needs aligned addresses - assert(!fUnbufferedIO || (reinterpret_cast(pBuffer) % 4 == 0)); + assert(!fUnbufferedIO || (reinterpret_cast(pBuffer) % 512 == 0)); assert(pBuffer >= _pRandomDataWriteBuffer); assert(pBuffer <= _pRandomDataWriteBuffer + GetRandomDataWriteBufferSize() - GetBlockSizeInBytes()); @@ -364,7 +527,10 @@ string TimeSpan::GetXml() const sprintf_s(buffer, _countof(buffer), "%u\n", _ulCoolDown); sXml += buffer; - sprintf_s(buffer, _countof(buffer), "%u\n", _dwThreadCount); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwThreadCount); + sXml += buffer; + + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwRequestCount); sXml += buffer; sprintf_s(buffer, _countof(buffer), "%u\n", _ulIoBucketDurationInMilliseconds); @@ -373,7 +539,7 @@ string TimeSpan::GetXml() const sprintf_s(buffer, _countof(buffer), "%u\n", _ulRandSeed); sXml += buffer; - if (_vAffinity.size() > 0) + if (!_vAffinity.empty()) { sXml += "\n"; for (const auto& a : _vAffinity) @@ -394,15 +560,15 @@ string TimeSpan::GetXml() const return sXml; } -void TimeSpan::MarkFilesAsPrecreated(const vector vFiles) +void TimeSpan::MarkFilesAsPrecreated(const vector& vFiles) { - for (auto sFile : vFiles) + for (const auto& sFile : vFiles) { - for (auto pTarget = _vTargets.begin(); pTarget != _vTargets.end(); pTarget++) + for (auto& _vTarget : _vTargets) { - if (sFile == pTarget->GetPath()) + if (sFile == _vTarget.GetPath()) { - pTarget->SetPrecreated(true); + _vTarget.SetPrecreated(true); } } } @@ -413,7 +579,7 @@ string Profile::GetXml() const string sXml("\n"); char buffer[4096]; - sprintf_s(buffer, _countof(buffer), "%u\n", _dwProgress); + sprintf_s(buffer, _countof(buffer), "%lu\n", _dwProgress); sXml += buffer; if (_resultsFormat == ResultsFormat::Text) @@ -469,11 +635,11 @@ string Profile::GetXml() const return sXml; } -void Profile::MarkFilesAsPrecreated(const vector vFiles) +void Profile::MarkFilesAsPrecreated(const vector& vFiles) { - for (auto pTimeSpan = _vTimeSpans.begin(); pTimeSpan != _vTimeSpans.end(); pTimeSpan++) + for (auto& _vTimeSpan : _vTimeSpans) { - pTimeSpan->MarkFilesAsPrecreated(vFiles); + _vTimeSpan.MarkFilesAsPrecreated(vFiles); } } @@ -481,20 +647,7 @@ bool Profile::Validate(bool fSingleSpec, SystemInformation *pSystem) const { bool fOk = true; - // Note that if no SystemInformation is provided, we do not verify the profile - // v. the system content. This is used to limit code churn in the UT. - - if (pSystem != nullptr && - (pSystem->processorTopology._vProcessorGroupInformation.size() > 1 || pSystem->processorTopology._ulProcCount > 64)) - { - fprintf(stderr, "WARNING: Complete CPU utilization cannot currently be gathered within DISKSPD for this system.\n" - " Use alternate mechanisms to gather this data such as perfmon/logman.\n" - " Active KGroups %u > 1 and/or processor count %u > 64.\n", - (int) pSystem->processorTopology._vProcessorGroupInformation.size(), - pSystem->processorTopology._ulProcCount); - } - - if (GetTimeSpans().size() == 0) + if (GetTimeSpans().empty()) { fprintf(stderr, "ERROR: no timespans specified\n"); fOk = false; @@ -511,7 +664,7 @@ bool Profile::Validate(bool fSingleSpec, SystemInformation *pSystem) const { fprintf(stderr, "ERROR: affinity assignment to group %u; system only has %u groups\n", Affinity.wGroup, - (int) pSystem->processorTopology._vProcessorGroupInformation.size()); + static_cast(pSystem->processorTopology._vProcessorGroupInformation.size())); fOk = false; @@ -528,17 +681,20 @@ bool Profile::Validate(bool fSingleSpec, SystemInformation *pSystem) const if (fOk && !pSystem->processorTopology._vProcessorGroupInformation[Affinity.wGroup].IsProcessorActive(Affinity.bProc)) { - fprintf(stderr, "ERROR: affinity assignment to group %u core %u not possible; core is not active (current mask 0x%Ix)\n", + fprintf( + stderr, + "ERROR: affinity assignment to group %u core %u not possible; core is not active (current mask 0x%llx)\n", Affinity.wGroup, Affinity.bProc, - pSystem->processorTopology._vProcessorGroupInformation[Affinity.wGroup]._activeProcessorMask); + pSystem->processorTopology._vProcessorGroupInformation[Affinity.wGroup]. + _activeProcessorMask); fOk = false; } } } - if (timeSpan.GetDisableAffinity() && timeSpan.GetAffinityAssignments().size() > 0) + if (timeSpan.GetDisableAffinity() && !timeSpan.GetAffinityAssignments().empty()) { fprintf(stderr, "ERROR: -n and -a parameters cannot be used together\n"); fOk = false; @@ -567,6 +723,47 @@ bool Profile::Validate(bool fSingleSpec, SystemInformation *pSystem) const fOk = false; } + if (timeSpan.GetThreadCount() > 0 && timeSpan.GetRequestCount() > 0) + { + if (target.GetThroughputInBytesPerMillisecond() > 0) + { + fprintf(stderr, "ERROR: -g throughput control cannot be used with -O outstanding requests per thread\n"); + fOk = false; + } + + if (target.GetThinkTime() > 0) + { + fprintf(stderr, "ERROR: -j think time cannot be used with -O outstanding requests per thread\n"); + fOk = false; + } + + if (target.GetUseParallelAsyncIO()) + { + fprintf(stderr, "ERROR: -p parallel IO cannot be used with -O outstanding requests per thread\n"); + fOk = false; + } + + if (target.GetWeight() == 0) + { + fprintf(stderr, "ERROR: a non-zero target Weight must be specified\n"); + fOk = false; + } + + for (const auto& threadTarget : target.GetThreadTargets()) + { + if (threadTarget.GetThread() >= timeSpan.GetThreadCount()) + { + fprintf(stderr, "ERROR: illegal thread specified for ThreadTarget\n"); + fOk = false; + } + } + } + else if (!target.GetThreadTargets().empty()) + { + fprintf(stderr, "ERROR: ThreadTargets can only be specified when the timespan ThreadCount and RequestCount are specified\n"); + fOk = false; + } + // FIXME: we can no longer do this check, because the target no longer // contains a property that uniquely identifies the case where "-s" or // was passed. @@ -648,7 +845,7 @@ bool Profile::Validate(bool fSingleSpec, SystemInformation *pSystem) const { if (target.GetRandomDataWriteBufferSize() < target.GetBlockSizeInBytes()) { - fprintf(stderr, "ERROR: custom write buffer (-Z) is smaller than the block size. Write buffer size: %I64u block size: %u\n", + fprintf(stderr, "ERROR: custom write buffer (-Z) is smaller than the block size. Write buffer size: %I64u block size: %lu\n", target.GetRandomDataWriteBufferSize(), target.GetBlockSizeInBytes()); fOk = false; @@ -672,21 +869,31 @@ bool Profile::Validate(bool fSingleSpec, SystemInformation *pSystem) const bool ThreadParameters::AllocateAndFillBufferForTarget(const Target& target) { - bool fOk = true; - BYTE *pDataBuffer = nullptr; - size_t cbDataBuffer = target.GetBlockSizeInBytes() * target.GetRequestCount(); + DWORD requestCount = target.GetRequestCount(); + + // Use global request count + if (pTimeSpan->GetThreadCount() != 0 && + pTimeSpan->GetRequestCount() != 0) { + + requestCount = pTimeSpan->GetRequestCount(); + } + + // Create separate read & write buffers so the write content doesn't get overriden by reads + const auto cbDataBuffer = static_cast(target.GetBlockSizeInBytes()) * requestCount * 2; + BYTE *pDataBuffer; if (target.GetUseLargePages()) { - size_t cbMinLargePage = GetLargePageMinimum(); - size_t cbRoundedSize = (cbDataBuffer + cbMinLargePage - 1) & ~(cbMinLargePage - 1); - pDataBuffer = (BYTE *)VirtualAlloc(nullptr, cbRoundedSize, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_EXECUTE_READWRITE); + const size_t cbMinLargePage = GetLargePageMinimum(); + const size_t cbRoundedSize = (cbDataBuffer + cbMinLargePage - 1) & ~(cbMinLargePage - 1); + pDataBuffer = static_cast(VirtualAlloc(nullptr, cbRoundedSize, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, + PAGE_EXECUTE_READWRITE)); } else { - pDataBuffer = (BYTE *)VirtualAlloc(nullptr, cbDataBuffer, MEM_COMMIT, PAGE_READWRITE); + pDataBuffer = static_cast(VirtualAlloc(nullptr, cbDataBuffer, MEM_COMMIT, PAGE_READWRITE)); } - fOk = (pDataBuffer != nullptr); + const bool fOk = (pDataBuffer != nullptr); //fill buffer (useful only for write tests) if (fOk && target.GetWriteRatio() > 0) @@ -699,7 +906,8 @@ bool ThreadParameters::AllocateAndFillBufferForTarget(const Target& target) { for (size_t i = 0; i < cbDataBuffer; i++) { - pDataBuffer[i] = (BYTE)(i % 256); + pDataBuffer[i] = static_cast(i % 256); + /* NB: the writable size may be 'cbRoundedSize' bytes, but '2' bytes might be written */ } } } @@ -707,6 +915,7 @@ bool ThreadParameters::AllocateAndFillBufferForTarget(const Target& target) if (fOk) { vpDataBuffers.push_back(pDataBuffer); + vulReadBufferSize.push_back(cbDataBuffer / 2); } return fOk; @@ -719,17 +928,32 @@ BYTE* ThreadParameters::GetReadBuffer(size_t iTarget, size_t iRequest) BYTE* ThreadParameters::GetWriteBuffer(size_t iTarget, size_t iRequest) { - BYTE *pBuffer = nullptr; + BYTE *pBuffer; Target& target(vTargets[iTarget]); - size_t cb = static_cast(target.GetRandomDataWriteBufferSize()); + const auto cb = static_cast(target.GetRandomDataWriteBufferSize()); if (cb == 0) { - pBuffer = vpDataBuffers[iTarget] + (iRequest * vTargets[iTarget].GetBlockSizeInBytes()); + pBuffer = vpDataBuffers[iTarget] + vulReadBufferSize[iTarget] + (iRequest * vTargets[iTarget].GetBlockSizeInBytes()); + + // + // This is a very efficient algorithm for generating random content at + // run-time. When tested in a single-threaded, CPU limited environment + // with 4K random writes, doing memset to fill the buffer got 112K IOPS, + // this algorithm got 111K IOPS. Using a static buffer got 118K IOPS. + // This was tested with a 64-bit diskspd.exe. With a 32-bit version it + // may be more efficient to do 32-bit operations. + // + + if (pTimeSpan->GetRandomWriteData() && + !target.GetZeroWriteBuffers()) + { + pRand->RandBuffer(pBuffer, vTargets[iTarget].GetBlockSizeInBytes(), true); + } } else { - pBuffer = target.GetRandomDataWriteBuffer(); + pBuffer = target.GetRandomDataWriteBuffer(pRand); } return pBuffer; } @@ -743,5 +967,11 @@ DWORD ThreadParameters::GetTotalRequestCount() const cRequests += t.GetRequestCount(); } + if (pTimeSpan->GetRequestCount() != 0 && + pTimeSpan->GetThreadCount() != 0) + { + cRequests = pTimeSpan->GetRequestCount(); + } + return cRequests; } diff --git a/Common/Common.h b/Common/Common.h index 9862b85..2c77ace 100644 --- a/Common/Common.h +++ b/Common/Common.h @@ -30,12 +30,12 @@ SOFTWARE. #pragma once #include -#include #include #include //ntdll.dll -#include +#include #include "Histogram.h" #include "IoBucketizer.h" +#include "ThroughputMeter.h" using namespace std; @@ -48,11 +48,11 @@ using namespace std; // // Monday, June 16, 2014 12:00:00 AM -#define DISKSPD_RELEASE_TAG "" -#define DISKSPD_NUMERIC_VERSION_STRING "2.0.18a" DISKSPD_RELEASE_TAG -#define DISKSPD_DATE_VERSION_STRING "2016/5/31" +#define DISKSPD_RELEASE_TAG " (MS)" +#define DISKSPD_NUMERIC_VERSION_STRING "2.0.19a" DISKSPD_RELEASE_TAG +#define DISKSPD_DATE_VERSION_STRING "2017/4/28" -typedef void (WINAPI *PRINTF)(const char*, va_list); //function used for displaying formatted data (printf style) +using PRINTF = void(WINAPI *)(const char*, va_list); //function used for displaying formatted data (printf style) struct ETWEventCounters { @@ -138,16 +138,16 @@ class PerfTimer static UINT64 GetTime(); - static double PerfTimeToMicroseconds(const double); - static double PerfTimeToMilliseconds(const double); - static double PerfTimeToSeconds(const double); - static double PerfTimeToMicroseconds(const UINT64); - static double PerfTimeToMilliseconds(const UINT64); - static double PerfTimeToSeconds(const UINT64); + static double PerfTimeToMicroseconds(double); + static double PerfTimeToMilliseconds(double); + static double PerfTimeToSeconds(double); + static double PerfTimeToMicroseconds(UINT64); + static double PerfTimeToMilliseconds(UINT64); + static double PerfTimeToSeconds(UINT64); - static UINT64 MicrosecondsToPerfTime(const double); - static UINT64 MillisecondsToPerfTime(const double); - static UINT64 SecondsToPerfTime(const double); + static UINT64 MicrosecondsToPerfTime(double); + static UINT64 MillisecondsToPerfTime(double); + static UINT64 SecondsToPerfTime(double); private: @@ -157,6 +157,38 @@ class PerfTimer friend class UnitTests::PerfTimerUnitTests; }; +// +// This code implements Bob Jenkins public domain simple random number generator +// See http://burtleburtle.net/bob/rand/smallprng.html for details +// + +class Random +{ +public: + explicit Random(UINT64 ulSeed = 0); + + UINT64 Rand64() + { + const UINT64 e = _ulState[0] - _rotl64(_ulState[1], 7); + _ulState[0] = _ulState[1] ^ _rotl64(_ulState[2], 13); + _ulState[1] = _ulState[2] + _rotl64(_ulState[3], 37); + _ulState[2] = _ulState[3] + e; + _ulState[3] = e + _ulState[0]; + + return _ulState[3]; + } + + UINT32 Rand32() + { + return static_cast(Rand64()); + } + + void RandBuffer(BYTE *pBuffer, UINT32 ulLength, bool fPseudoRandomOkay); + +private: + UINT64 _ulState[4]{}; +}; + struct PercentileDescriptor { double Percentile; @@ -166,7 +198,7 @@ struct PercentileDescriptor class Util { public: - static string DoubleToStringHelper(const double); + static string DoubleToStringHelper(double); template static T QuotientCeiling(T dividend, T divisor) { return (dividend + divisor - 1) / divisor; @@ -197,47 +229,46 @@ class TargetResults void Add(DWORD dwBytesTransferred, IOOperation type, - PUINT64 pullIoStartTime, - PUINT64 pullSpanStartTime, + UINT64 ullIoStartTime, + UINT64 ullSpanStartTime, bool fMeasureLatency, bool fCalculateIopsStdDev ) { - float fDurationMsec = 0; + double lfDurationUsec = 0; UINT64 ullEndTime = 0; + // assume it is worthwhile to stay off of the time query path unless needed (micro-overhead) if (fMeasureLatency || fCalculateIopsStdDev) { ullEndTime = PerfTimer::GetTime(); + const UINT64 ullDuration = ullEndTime - ullIoStartTime; + lfDurationUsec = PerfTimer::PerfTimeToMicroseconds(ullDuration); } if (fMeasureLatency) { - UINT64 ullDuration = ullEndTime - *pullIoStartTime; - fDurationMsec = static_cast(PerfTimer::PerfTimeToMicroseconds(ullDuration)); - if (type == IOOperation::ReadIO) { - readLatencyHistogram.Add(fDurationMsec); + readLatencyHistogram.Add(static_cast(lfDurationUsec)); } else { - writeLatencyHistogram.Add(fDurationMsec); + writeLatencyHistogram.Add(static_cast(lfDurationUsec)); } } - UINT64 ullRelativeCompletionTime = 0; if (fCalculateIopsStdDev) { - ullRelativeCompletionTime = ullEndTime - *pullSpanStartTime; + const UINT64 ullRelativeCompletionTime = ullEndTime - ullSpanStartTime; if (type == IOOperation::ReadIO) { - readBucketizer.Add(ullRelativeCompletionTime); + readBucketizer.Add(ullRelativeCompletionTime, lfDurationUsec); } else { - writeBucketizer.Add(ullRelativeCompletionTime); + writeBucketizer.Add(ullRelativeCompletionTime, lfDurationUsec); } } @@ -290,8 +321,8 @@ class Results vector vSystemProcessorPerfInfo; }; -typedef void (*CALLBACK_TEST_STARTED)(); //callback function to notify that the measured test is about to start -typedef void (*CALLBACK_TEST_FINISHED)(); //callback function to notify that the measured test has just finished +using CALLBACK_TEST_STARTED = void(*)(); //callback function to notify that the measured test is about to start +using CALLBACK_TEST_FINISHED = void(*)(); //callback function to notify that the measured test has just finished class ProcessorGroupInformation { @@ -307,36 +338,21 @@ class ProcessorGroupInformation BYTE ActiveProcessorCount, WORD Group, KAFFINITY ActiveProcessorMask) : - _maximumProcessorCount(MaximumProcessorCount), + _groupNumber(Group), + _maximumProcessorCount(MaximumProcessorCount), _activeProcessorCount(ActiveProcessorCount), - _groupNumber(Group), _activeProcessorMask(ActiveProcessorMask) { } - bool IsProcessorActive(BYTE Processor) + bool IsProcessorActive(BYTE Processor) const { - if (IsProcessorValid(Processor) && - (((KAFFINITY)1 << Processor) & _activeProcessorMask) != 0) - { - return true; - } - else - { - return false; - } + return IsProcessorValid(Processor) && ((static_cast(1) << Processor) & _activeProcessorMask) != 0; } - bool IsProcessorValid(BYTE Processor) + bool IsProcessorValid(BYTE Processor) const { - if (Processor < _maximumProcessorCount) - { - return true; - } - else - { - return false; - } + return Processor < _maximumProcessorCount; } }; @@ -345,19 +361,18 @@ class ProcessorTopology public: vector _vProcessorGroupInformation; DWORD _ulProcCount; + DWORD _ulActiveProcCount; ProcessorTopology() { - BOOL fResult; - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX pInformation; - DWORD ReturnedLength = 1024; - pInformation = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) new char[ReturnedLength]; + DWORD ReturnedLength = 1024; + auto pInformation = reinterpret_cast(new char[ReturnedLength]); - fResult = GetLogicalProcessorInformationEx(RelationGroup, pInformation, &ReturnedLength); + BOOL fResult = GetLogicalProcessorInformationEx(RelationGroup, pInformation, &ReturnedLength); if (!fResult && GetLastError() == ERROR_INSUFFICIENT_BUFFER) { delete [] pInformation; - pInformation = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) new char[ReturnedLength]; + pInformation = reinterpret_cast(new char[ReturnedLength]); fResult = GetLogicalProcessorInformationEx(RelationGroup, pInformation, &ReturnedLength); } @@ -366,6 +381,7 @@ class ProcessorTopology // Group information comes back as a single (large) element, not an array. assert(ReturnedLength == pInformation->Size); _ulProcCount = 0; + _ulActiveProcCount = 0; // Fill in group topology vector so we can answer questions about active/max procs for (WORD i = 0; i < pInformation->Group.ActiveGroupCount; i++) @@ -377,22 +393,16 @@ class ProcessorTopology pInformation->Group.GroupInfo[i].ActiveProcessorMask ); _ulProcCount += _vProcessorGroupInformation[i]._maximumProcessorCount; + _ulActiveProcCount += _vProcessorGroupInformation[i]._activeProcessorCount; } } delete [] pInformation; } - bool IsGroupValid(WORD Group) + bool IsGroupValid(WORD Group) const { - if (Group < _vProcessorGroupInformation.size()) - { - return true; - } - else - { - return false; - } + return Group < _vProcessorGroupInformation.size(); } // Return the next active processor in the system, exclusive (Next = true) @@ -434,15 +444,14 @@ class SystemInformation string sComputerName; ProcessorTopology processorTopology; - SystemInformation() + SystemInformation() : StartTime({}) { // System Name char buffer[64]; DWORD cb = _countof(buffer); - BOOL fResult; #pragma prefast(suppress:38020, "Yes, we're aware this is an ANSI API in a UNICODE project") - fResult = GetComputerNameExA(ComputerNamePhysicalDnsHostname, buffer, &cb); + const BOOL fResult = GetComputerNameExA(ComputerNamePhysicalDnsHostname, buffer, &cb); if (fResult) { sComputerName = buffer; @@ -453,12 +462,12 @@ class SystemInformation } // for unit test, squelch variable timestamp - void SystemInformation::ResetTime() + void ResetTime() { StartTime = { 0 }; } - string SystemInformation::GetXml() const + string GetXml() const { char szBuffer[64]; // enough for 64bit mask (17ch) and timestamp int nWritten; @@ -552,6 +561,29 @@ enum class WriteThroughMode { On, }; +class ThreadTarget +{ +public: + + ThreadTarget() : + _ulThread(0xFFFFFFFF), + _ulWeight(0) + { + } + + void SetThread(UINT32 ulThread) { _ulThread = ulThread; } + UINT32 GetThread() const { return _ulThread; } + + void SetWeight(UINT32 ulWeight) { _ulWeight = ulWeight; } + UINT32 GetWeight() const { return _ulWeight; } + + string GetXml() const; + +private: + UINT32 _ulThread; + UINT32 _ulWeight; +}; + class Target { public: @@ -579,19 +611,20 @@ class Target _dwBurstSize(0), _dwThinkTime(0), _fThinkTime(false), + _dwThroughputBytesPerMillisecond(0), _fSequentialScanHint(false), _fRandomAccessHint(false), _fTemporaryFileHint(false), _fUseLargePages(false), - _ioPriorityHint(IoPriorityHintNormal), - _dwThroughputBytesPerMillisecond(0), _cbRandomDataWriteBuffer(0), - _sRandomDataWriteBufferSourcePath(), - _pRandomDataWriteBuffer(nullptr) + _sRandomDataWriteBufferSourcePath(string()), + _pRandomDataWriteBuffer(nullptr), + _ioPriorityHint(IoPriorityHintNormal), + _ulWeight(1) { } - void SetPath(string sPath) { _sPath = sPath; } + void SetPath(const string& sPath) { _sPath = sPath; } string GetPath() const { return _sPath; } void SetBlockSizeInBytes(DWORD dwBlockSize) { _dwBlockSize = dwBlockSize; } @@ -613,6 +646,7 @@ class Target void SetBaseFileOffsetInBytes(UINT64 ullBaseFileOffset) { _ullBaseFileOffset = ullBaseFileOffset; } UINT64 GetBaseFileOffsetInBytes() const { return _ullBaseFileOffset; } + UINT64 GetThreadBaseFileOffsetInBytes(UINT32 ulThreadNo) const { return _ullBaseFileOffset + ulThreadNo * _ullThreadStride; } void SetSequentialScanHint(bool fBool) { _fSequentialScanHint = fBool; } bool GetSequentialScanHint() const { return _fSequentialScanHint; } @@ -639,9 +673,9 @@ class Target bool GetZeroWriteBuffers() const { return _fZeroWriteBuffers; } void SetRandomDataWriteBufferSize(UINT64 cbWriteBuffer) { _cbRandomDataWriteBuffer = cbWriteBuffer; } - UINT64 GetRandomDataWriteBufferSize(void) const { return _cbRandomDataWriteBuffer; } + UINT64 GetRandomDataWriteBufferSize() const { return _cbRandomDataWriteBuffer; } - void SetRandomDataWriteBufferSourcePath(string sPath) { _sRandomDataWriteBufferSourcePath = sPath; } + void SetRandomDataWriteBufferSourcePath(const string& sPath) { _sRandomDataWriteBufferSourcePath = sPath; } string GetRandomDataWriteBufferSourcePath() const { return _sRandomDataWriteBufferSourcePath; } void SetUseBurstSize(bool fBool) { _fUseBurstSize = fBool; } @@ -687,6 +721,15 @@ class Target } PRIORITY_HINT GetIOPriorityHint() const { return _ioPriorityHint; } + void SetWeight(UINT32 ulWeight) { _ulWeight = ulWeight; } + UINT32 GetWeight() const { return _ulWeight; } + + void AddThreadTarget(const ThreadTarget &threadTarget) + { + _vThreadTargets.push_back(threadTarget); + } + vector GetThreadTargets() const { return _vThreadTargets; } + void SetPrecreated(bool fBool) { _fPrecreated = fBool; } bool GetPrecreated() const { return _fPrecreated; } @@ -695,11 +738,11 @@ class Target string GetXml() const; - bool AllocateAndFillRandomDataWriteBuffer(); + bool AllocateAndFillRandomDataWriteBuffer(Random *pRand); void FreeRandomDataWriteBuffer(); - BYTE* GetRandomDataWriteBuffer(); + BYTE* GetRandomDataWriteBuffer(Random *pRand) const; - DWORD GetCreateFlags(bool fAsync) + DWORD GetCreateFlags(bool fAsync) const { DWORD dwFlags = FILE_ATTRIBUTE_NORMAL; @@ -718,7 +761,7 @@ class Target dwFlags |= FILE_ATTRIBUTE_TEMPORARY; } - if (GetRequestCount() > 1 || fAsync) + if (fAsync) { dwFlags |= FILE_FLAG_OVERLAPPED; } @@ -779,7 +822,10 @@ class Target PRIORITY_HINT _ioPriorityHint; - bool _FillRandomDataWriteBuffer(); + UINT32 _ulWeight; + vector _vThreadTargets; + + bool _FillRandomDataWriteBuffer(Random *pRand) const; friend class UnitTests::ProfileUnitTests; friend class UnitTests::TargetUnitTests; @@ -808,6 +854,8 @@ class TimeSpan _ulCoolDown(0), _ulRandSeed(0), _dwThreadCount(0), + _dwRequestCount(0), + _fRandomWriteData(false), _fDisableAffinity(false), _fCompletionRoutines(false), _fMeasureLatency(false), @@ -830,6 +878,7 @@ class TimeSpan { _vTargets.push_back(Target(target)); } + vector GetTargets() const { return _vTargets; } void SetDuration(UINT32 ulDuration) { _ulDuration = ulDuration; } @@ -844,9 +893,15 @@ class TimeSpan void SetRandSeed(UINT32 ulRandSeed) { _ulRandSeed = ulRandSeed; } UINT32 GetRandSeed() const { return _ulRandSeed; } + void SetRandomWriteData(bool fRandomWriteData) { _fRandomWriteData = fRandomWriteData; } + bool GetRandomWriteData() const { return _fRandomWriteData; } + void SetThreadCount(DWORD dwThreadCount) { _dwThreadCount = dwThreadCount; } DWORD GetThreadCount() const { return _dwThreadCount; } + void SetRequestCount(DWORD dwRequestCount) { _dwRequestCount = dwRequestCount; } + DWORD GetRequestCount() const { return _dwRequestCount; } + void SetDisableAffinity(bool fDisableAffinity) { _fDisableAffinity = fDisableAffinity; } bool GetDisableAffinity() const { return _fDisableAffinity; } @@ -863,7 +918,7 @@ class TimeSpan UINT32 GetIoBucketDurationInMilliseconds() const { return _ulIoBucketDurationInMilliseconds; } string GetXml() const; - void MarkFilesAsPrecreated(const vector vFiles); + void MarkFilesAsPrecreated(const vector& vFiles); private: vector _vTargets; @@ -872,6 +927,8 @@ class TimeSpan UINT32 _ulCoolDown; UINT32 _ulRandSeed; DWORD _dwThreadCount; + DWORD _dwRequestCount; + bool _fRandomWriteData; bool _fDisableAffinity; vector _vAffinity; bool _fCompletionRoutines; @@ -902,6 +959,8 @@ class Profile Profile() : _fVerbose(false), _dwProgress(0), + _resultsFormat(ResultsFormat::Text), + _precreateFiles(PrecreateFiles::None), _fEtwEnabled(false), _fEtwProcess(false), _fEtwThread(false), @@ -914,9 +973,7 @@ class Profile _fEtwUsePagedMemory(false), _fEtwUsePerfTimer(false), _fEtwUseSystemTimer(false), - _fEtwUseCyclesCounter(false), - _resultsFormat(ResultsFormat::Text), - _precreateFiles(PrecreateFiles::None) + _fEtwUseCyclesCounter(false) { } @@ -938,7 +995,7 @@ class Profile void SetProgress(DWORD dwProgress) { _dwProgress = dwProgress; } DWORD GetProgress() const { return _dwProgress; } - void SetCmdLine(string sCmdLine) { _sCmdLine = sCmdLine; } + void SetCmdLine(const string& sCmdLine) { _sCmdLine = sCmdLine; } string GetCmdLine() const { return _sCmdLine; }; void SetResultsFormat(ResultsFormat format) { _resultsFormat = format; } @@ -978,11 +1035,11 @@ class Profile string GetXml() const; bool Validate(bool fSingleSpec, SystemInformation *pSystem = nullptr) const; - void MarkFilesAsPrecreated(const vector vFiles); + void MarkFilesAsPrecreated(const vector& vFiles); -private: - Profile(const Profile& T); + Profile(const Profile& T) = delete; +private: vector_vTimeSpans; bool _fVerbose; DWORD _dwProgress; @@ -1008,31 +1065,122 @@ class Profile friend class UnitTests::ProfileUnitTests; }; -class ThreadParameters +class IORequest { public: - ThreadParameters() : - pProfile(nullptr), - pTimeSpan(nullptr), - pullSharedSequentialOffsets(nullptr), - ulRandSeed(0), - ulThreadNo(0), - ulRelativeThreadNo(0) + explicit IORequest(Random *pRand) : + _overlapped({}), + _ullTotalWeight(0), + _fEqualWeights(true), + _pRand(pRand), + _pCurrentTarget(nullptr), + _ioType(IOOperation::ReadIO), + _ullStartTime(0), + _ulRequestIndex(0xFFFFFFFF) + { + memset(&_overlapped, 0, sizeof(OVERLAPPED)); + _overlapped.Offset = 0xFFFFFFFF; + _overlapped.OffsetHigh = 0xFFFFFFFF; + } + + static IORequest *OverlappedToIORequest(OVERLAPPED *pOverlapped) + { + return CONTAINING_RECORD(pOverlapped, IORequest, _overlapped); + } + + OVERLAPPED *GetOverlapped() { return &_overlapped; } + + void AddTarget(Target *pTarget, UINT32 ulWeight) + { + _vTargets.push_back(pTarget); + _vulTargetWeights.push_back(ulWeight); + _ullTotalWeight += ulWeight; + + if (ulWeight != _vulTargetWeights[0]) { + _fEqualWeights = false; + } + } + + Target *GetCurrentTarget() const { return _pCurrentTarget; } + + Target *GetNextTarget() { + if (_vTargets.size() == 1) { + _pCurrentTarget = _vTargets[0]; + } + else if (_fEqualWeights) { + _pCurrentTarget = _vTargets[_pRand->Rand32() % _vTargets.size()]; + } + else { + UINT64 ullWeight = _pRand->Rand64() % _ullTotalWeight; + + for (int iTarget = 0; iTarget < _vTargets.size(); iTarget++) { + if (ullWeight < _vulTargetWeights[iTarget]) { + _pCurrentTarget = _vTargets[iTarget]; + break; + } + + ullWeight -= _vulTargetWeights[iTarget]; + } + } + + return _pCurrentTarget; } + void SetIoType(IOOperation ioType) { _ioType = ioType; } + IOOperation GetIoType() const { return _ioType; } + + void SetStartTime(UINT64 ullStartTime) { _ullStartTime = ullStartTime; } + UINT64 GetStartTime() const { return _ullStartTime; } + + void SetRequestIndex(UINT32 ulRequestIndex) { _ulRequestIndex = ulRequestIndex; } + UINT32 GetRequestIndex() const { return _ulRequestIndex; } + +private: + OVERLAPPED _overlapped{}; + vector _vTargets; + vector _vulTargetWeights; + UINT64 _ullTotalWeight; + bool _fEqualWeights; + Random *_pRand; + Target *_pCurrentTarget; + IOOperation _ioType; + UINT64 _ullStartTime; + UINT32 _ulRequestIndex; +}; + +class ThreadParameters +{ +public: + ThreadParameters() : + pProfile(nullptr), + pTimeSpan(nullptr), + pullSharedSequentialOffsets(nullptr), + pRand(nullptr), + ulRandSeed(0), + ulThreadNo(0), + ulRelativeThreadNo(0), + pfAccountingOn(nullptr), + pullStartTime(nullptr), + pResults(nullptr), + dwIOCnt(0), + wGroupNum(0), + bProcNum(0), + hStartEvent(nullptr), + hEndEvent(nullptr) + { + } + const Profile *pProfile; const TimeSpan *pTimeSpan; vector vTargets; vector vhTargets; vector vullFileSizes; + vector vulReadBufferSize; vector vpDataBuffers; - vector vOverlapped; // each target has RequestCount OVERLAPPED structures - vector vOverlappedIdToTargetId; - vector vFirstOverlappedIdForTargetId; //id of the first overlapped structure in the vOverlapped vector by target - vector vdwIoType; //as many as vOverlapped; used by the completion routines - vector vIoStartTimes; + vector vIORequest; + vector vThroughputMeters; // For vanilla sequential access (-s): // Private per-thread offsets, incremented directly, indexed to number of targets @@ -1042,6 +1190,8 @@ class ThreadParameters // Pointers to offsets shared between threads, incremented with an interlocked op UINT64* pullSharedSequentialOffsets; + Random *pRand; + UINT32 ulRandSeed; UINT32 ulThreadNo; UINT32 ulRelativeThreadNo; @@ -1051,6 +1201,9 @@ class ThreadParameters PUINT64 pullStartTime; ThreadResults *pResults; + //progress dots + DWORD dwIOCnt; + //group affinity WORD wGroupNum; DWORD bProcNum; @@ -1065,12 +1218,12 @@ class ThreadParameters BYTE* GetWriteBuffer(size_t iTarget, size_t iRequest); DWORD GetTotalRequestCount() const; -private: - ThreadParameters(const ThreadParameters& T); + ThreadParameters(const ThreadParameters& T) = delete; }; class IResultParser { public: - virtual string ParseResults(Profile& profile, const SystemInformation& system, vector vResults) = 0; + virtual ~IResultParser() = default; + virtual string ParseResults(Profile& profile, const SystemInformation& system, vector vResults) = 0; }; diff --git a/Common/Histogram.h b/Common/Histogram.h index ee0a312..b6e0b61 100644 --- a/Common/Histogram.h +++ b/Common/Histogram.h @@ -78,7 +78,7 @@ class Histogram void Add(T v) { - _data[ v ]++; + ++_data[ v ]; /* _data[ v ] just discards result */ _samples++; } @@ -167,11 +167,11 @@ class Histogram double GetMean() const { double sum(0); - unsigned samples = GetSampleSize(); + const unsigned samples = GetSampleSize(); for (auto i : _data) { - double bucket_val = + const double bucket_val = static_cast(i.first) * i.second / samples; if (sum + bucket_val < 0) @@ -186,13 +186,13 @@ class Histogram } double GetStandardDeviation() const - { - double mean(GetMean()); + { + const double mean(GetMean()); double ssd(0); for (auto i : _data) { - double dev = static_cast(i.first) - mean; + const double dev = static_cast(i.first) - mean; double sqdev = dev*dev; ssd += i.second * sqdev; } diff --git a/Common/IORequestGenerator.h b/Common/IORequestGenerator.h index 9e564c0..e23f506 100644 --- a/Common/IORequestGenerator.h +++ b/Common/IORequestGenerator.h @@ -40,6 +40,8 @@ namespace UnitTests class IORequestGeneratorUnitTests; } +#define FIRST_OFFSET 0xFFFFFFFFFFFFFFFFULL + class IORequestGenerator { public: @@ -51,34 +53,32 @@ class IORequestGenerator bool GenerateRequests(Profile& profile, IResultParser& resultParser, PRINTF pPrintOut, PRINTF pPrintError, PRINTF pPrintVerbose, struct Synchronization *pSynch); static UINT64 GetNextFileOffset(ThreadParameters& tp, size_t targetNum, UINT64 prevOffset); - static UINT64 GetStartingFileOffset(ThreadParameters& tp, size_t targetNum); - static UINT64 GetThreadBaseFileOffset(ThreadParameters& tp, size_t targetNum); private: struct CreateFileParameters { - string sPath; - UINT64 ullFileSize; - bool fZeroWriteBuffers; + string sPath; + UINT64 ullFileSize = 0; + bool fZeroWriteBuffers=false; }; bool _GenerateRequestsForTimeSpan(const Profile& profile, const TimeSpan& timeSpan, Results& results, struct Synchronization *pSynch); - void _AbortWorkerThreads(HANDLE hStartEvent, vector& vhThreads) const; - void _CloseOpenFiles(vector& vhFiles) const; - DWORD _CreateDirectoryPath(const char *path) const; + static void _AbortWorkerThreads(HANDLE hStartEvent, vector& vhThreads); + static void _CloseOpenFiles(vector& vhFiles); + DWORD _CreateDirectoryPath(const char *pszPath) const; bool _CreateFile(UINT64 ullFileSize, const char *pszFilename, bool fZeroBuffers, bool fVerbose) const; - void _DisplayFileSizeVerbose(bool fVerbose, UINT64 fsize) const; - bool _GetActiveGroupsAndProcs() const; - struct ETWSessionInfo _GetResultETWSession(const EVENT_TRACE_PROPERTIES *pTraceProperties) const; - bool _GetSystemPerfInfo(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION *pInfo, UINT32 uCpuCount) const; + static void _DisplayFileSizeVerbose(bool fVerbose, UINT64 fsize); + bool _GetActiveGroupsAndProcs() const; /* not implemented */ + static struct ETWSessionInfo _GetResultETWSession(const EVENT_TRACE_PROPERTIES *pTraceProperties); + static bool _GetSystemPerfInfo(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION *pInfo, UINT32 uCpuCount); void _InitializeGlobalParameters(); bool _LoadDLLs(); - bool _StopETW(bool fUseETW, TRACEHANDLE hTraceSession) const; - void _TerminateWorkerThreads(vector& vhThreads) const; - bool _ValidateProfile(const Profile& profile) const; - vector _GetFilesToPrecreate(const Profile& profile) const; - void _MarkFilesAsCreated(Profile& profile, const vector& vFiles) const; + static bool _StopETW(bool fUseETW, TRACEHANDLE hTraceSession); + static void _TerminateWorkerThreads(vector& vhThreads); + bool _ValidateProfile(const Profile& profile) const; /* Not implemented*/ + static vector _GetFilesToPrecreate(const Profile& profile); + void _MarkFilesAsCreated(Profile& profile, const vector& vFiles) const; /* Not implemented */ bool _PrecreateFiles(Profile& profile) const; HINSTANCE volatile _hNTDLL; //handle to ntdll.dll diff --git a/Common/IoBucketizer.cpp b/Common/IoBucketizer.cpp index 8be6306..d82a968 100644 --- a/Common/IoBucketizer.cpp +++ b/Common/IoBucketizer.cpp @@ -29,11 +29,26 @@ SOFTWARE. #include "IoBucketizer.h" +/* +Calculating stddev using an online algorithm: +avg = sum(1..n, a[n]) / n +stddev = sqrt(sum(1..n, (a[n] - avg)^2) / n) + = sqrt(sum(1..n, a[n]^2 - 2 * a[n] * avg + avg^2) / n) + = sqrt((sum(1..n, a[n]^2) - 2 * avg * sum(1..n, a[n]) + n * avg^2) / n) + = sqrt((sum(1..n, a[n]^2) - 2 * (sum(1..n, a[n]) / n) * sum(1..n, a[n]) + n * (sum(1..n], a[n]) / n)^2) / n) + = sqrt((sum(1..n, a[n]^2) - (2 / n) * sum(1..n, a[n])^2 + (1 / n) * sum(1..n, a[n])^2) / n) + = sqrt((sum(1..n, a[n]^2) - (1 / n) * sum(1..n, a[n])^2) / n) + +So if we track n, sum(a[n]) and sum(a[n]^2) we can calculate the stddev. This +is used to calculate the stddev of the latencies below. +*/ + const unsigned __int64 INVALID_BUCKET_DURATION = 0; IoBucketizer::IoBucketizer() : _bucketDuration(INVALID_BUCKET_DURATION), - _validBuckets(0) + _validBuckets(0), + _totalBuckets(0) {} void IoBucketizer::Initialize(unsigned __int64 bucketDuration, size_t validBuckets) @@ -49,77 +64,130 @@ void IoBucketizer::Initialize(unsigned __int64 bucketDuration, size_t validBucke _bucketDuration = bucketDuration; _validBuckets = validBuckets; - _vBuckets.reserve(_validBuckets); + _vBuckets.resize(_validBuckets); } -void IoBucketizer::Add(unsigned __int64 ioCompletionTime) +void IoBucketizer::Add(unsigned __int64 ioCompletionTime, double ioDuration) { if (_bucketDuration == INVALID_BUCKET_DURATION) { throw std::runtime_error("IoBucketizer has not been initialized"); } - size_t bucketNumber = static_cast(ioCompletionTime / _bucketDuration); - size_t currentSize = _vBuckets.size(); - if (currentSize < bucketNumber + 1) + const auto bucketNumber = static_cast(ioCompletionTime / _bucketDuration); + _totalBuckets = bucketNumber + 1; + + if (bucketNumber >= _validBuckets) { - _vBuckets.resize(bucketNumber + 1); - // Zero the new entries. Note that size is 1-based and bucketNumber is 0-based. - for (size_t i = currentSize; i <= bucketNumber; i++) - { - _vBuckets[i] = 0; - } + return; + } + + _vBuckets[bucketNumber].lfSumDuration += ioDuration; + _vBuckets[bucketNumber].lfSumSqrDuration += ioDuration * ioDuration; + + if (_vBuckets[bucketNumber].ulCount == 0 || + ioDuration < _vBuckets[bucketNumber].lfMinDuration) + { + _vBuckets[bucketNumber].lfMinDuration = ioDuration; + } + if (_vBuckets[bucketNumber].ulCount == 0 || + ioDuration > _vBuckets[bucketNumber].lfMaxDuration) + { + _vBuckets[bucketNumber].lfMaxDuration = ioDuration; } - _vBuckets[bucketNumber]++; + + _vBuckets[bucketNumber].ulCount++; } size_t IoBucketizer::GetNumberOfValidBuckets() const { - // Buckets beyond this may exist since Add is willing to extend the vector - // beyond the expected number of valid buckets, but they are not comparable - // buckets (straggling IOs over the timespan boundary). - return (_vBuckets.size() > _validBuckets ? _validBuckets : _vBuckets.size()); + return (_totalBuckets > _validBuckets ? _validBuckets : _totalBuckets); +} + +unsigned int IoBucketizer::GetIoBucketCount(size_t bucketNumber) const +{ + if (bucketNumber < _validBuckets) + { + return _vBuckets[bucketNumber].ulCount; + } + + return 0; +} + +double IoBucketizer::GetIoBucketMinDurationUsec(size_t bucketNumber) const +{ + if (bucketNumber < _validBuckets) + { + return _vBuckets[bucketNumber].lfMinDuration; + } + + return 0; +} + +double IoBucketizer::GetIoBucketMaxDurationUsec(size_t bucketNumber) const +{ + if (bucketNumber < _validBuckets) + { + return _vBuckets[bucketNumber].lfMaxDuration; + } + + return 0; } -size_t IoBucketizer::GetNumberOfBuckets() const +double IoBucketizer::GetIoBucketAvgDurationUsec(size_t bucketNumber) const { - return _vBuckets.size(); + if (bucketNumber < _validBuckets && _vBuckets[bucketNumber].ulCount != 0) + { + return _vBuckets[bucketNumber].lfSumDuration / static_cast(_vBuckets[bucketNumber].ulCount); + } + + return 0; } -unsigned int IoBucketizer::GetIoBucket(size_t bucketNumber) const +double IoBucketizer::GetIoBucketDurationStdDevUsec(size_t bucketNumber) const { - return _vBuckets[bucketNumber]; + if (bucketNumber < _validBuckets && _vBuckets[bucketNumber].ulCount != 0) + { + const double sum_of_squares = _vBuckets[bucketNumber].lfSumSqrDuration; + const double square_of_sum = _vBuckets[bucketNumber].lfSumDuration * _vBuckets[bucketNumber].lfSumDuration; + const auto count = static_cast(_vBuckets[bucketNumber].ulCount); + const double square_stddev = (sum_of_squares - (square_of_sum / count)) / count; + + return sqrt(square_stddev); + } + + return 0; } -double IoBucketizer::_GetMean() const -{ - size_t numBuckets = GetNumberOfValidBuckets(); +double IoBucketizer::_GetMeanIOPS() const +{ + const size_t numBuckets = GetNumberOfValidBuckets(); double sum = 0; for (size_t i = 0; i < numBuckets; i++) { - sum += static_cast(_vBuckets[i]) / numBuckets; + sum += static_cast(_vBuckets[i].ulCount) / numBuckets; } return sum; } -double IoBucketizer::GetStandardDeviation() const -{ - size_t numBuckets = GetNumberOfValidBuckets(); +double IoBucketizer::GetStandardDeviationIOPS() const +{ + const size_t numBuckets = GetNumberOfValidBuckets(); if(numBuckets == 0) { return 0.0; } - double mean = _GetMean(); + const double mean = _GetMeanIOPS(); double ssd = 0; for (size_t i = 0; i < numBuckets; i++) { - double dev = static_cast(_vBuckets[i]) - mean; - double sqdev = dev*dev; + const double dev = static_cast(_vBuckets[i].ulCount) - mean; + const double sqdev = dev*dev; ssd += sqdev; } @@ -132,12 +200,28 @@ void IoBucketizer::Merge(const IoBucketizer& other) { _vBuckets.resize(other._vBuckets.size()); } + for(size_t i = 0; i < other._vBuckets.size(); i++) + { + _vBuckets[i].ulCount += other._vBuckets[i].ulCount; + _vBuckets[i].lfSumDuration += other._vBuckets[i].lfSumDuration; + _vBuckets[i].lfSumSqrDuration += other._vBuckets[i].lfSumSqrDuration; + + if (i >= _validBuckets || + other._vBuckets[i].lfMinDuration < _vBuckets[i].lfMinDuration) + { + _vBuckets[i].lfMinDuration = other._vBuckets[i].lfMinDuration; + } + if (other._vBuckets[i].lfMaxDuration > _vBuckets[i].lfMaxDuration) + { + _vBuckets[i].lfMaxDuration = other._vBuckets[i].lfMaxDuration; + } + } if (other._validBuckets > _validBuckets) { _validBuckets = other._validBuckets; } - for(size_t i = 0; i < other._vBuckets.size(); i++) + if (other._totalBuckets > _totalBuckets) { - _vBuckets[i] += other.GetIoBucket(i); + _totalBuckets = other._totalBuckets; } -} \ No newline at end of file +} diff --git a/Common/IoBucketizer.h b/Common/IoBucketizer.h index 512c15d..6d501c4 100644 --- a/Common/IoBucketizer.h +++ b/Common/IoBucketizer.h @@ -38,15 +38,36 @@ class IoBucketizer void Initialize(unsigned __int64 bucketDuration, size_t validBuckets); size_t GetNumberOfValidBuckets() const; - size_t GetNumberOfBuckets() const; - unsigned int GetIoBucket(size_t bucketNumber) const; - void Add(unsigned __int64 ioCompletionTime); - double GetStandardDeviation() const; + unsigned int GetIoBucketCount(size_t bucketNumber) const; + double GetIoBucketMinDurationUsec(size_t bucketNumber) const; + double GetIoBucketMaxDurationUsec(size_t bucketNumber) const; + double GetIoBucketAvgDurationUsec(size_t bucketNumber) const; + double GetIoBucketDurationStdDevUsec(size_t bucketNumber) const; + void Add(unsigned __int64 ioCompletionTime, double ioDuration); + double GetStandardDeviationIOPS() const; void Merge(const IoBucketizer& other); private: - double _GetMean() const; + double _GetMeanIOPS() const; + + struct IoBucket { + IoBucket() : + ulCount(0), + lfMinDuration(0), + lfMaxDuration(0), + lfSumDuration(0), + lfSumSqrDuration(0) + { + } + + unsigned int ulCount; + double lfMinDuration; + double lfMaxDuration; + double lfSumDuration; + double lfSumSqrDuration; + }; unsigned __int64 _bucketDuration; size_t _validBuckets; - std::vector _vBuckets; -}; \ No newline at end of file + size_t _totalBuckets; + std::vector _vBuckets; +}; diff --git a/Common/OverlappedQueue.h b/Common/OverlappedQueue.h index 622ac67..85a8efc 100644 --- a/Common/OverlappedQueue.h +++ b/Common/OverlappedQueue.h @@ -36,11 +36,11 @@ SOFTWARE. class OverlappedQueue { public: - OverlappedQueue(void); + OverlappedQueue(); void Add(OVERLAPPED *pOverlapped); - bool IsEmpty(void) const; - OVERLAPPED * Remove(void); + bool IsEmpty() const; + OVERLAPPED * Remove(); size_t GetCount() const; private: diff --git a/Common/ResultParser.h b/Common/ResultParser.h index 45a2823..18943d9 100644 --- a/Common/ResultParser.h +++ b/Common/ResultParser.h @@ -38,7 +38,7 @@ namespace UnitTests class ResultParser : public IResultParser { public: - string ParseResults(Profile& profile, const SystemInformation& system, vector vResults); + string ParseResults(Profile& profile, const SystemInformation& system, vector vResults) override; private: void _DisplayFileSize(UINT64 fsize); @@ -46,17 +46,17 @@ class ResultParser : public IResultParser void _DisplayETW(struct ETWMask ETWMask, struct ETWEventCounters EtwEventCounters); void _Print(const char *format, ...); void _PrintProfile(const Profile& profile); - void _PrintCpuUtilization(const Results&); + void _PrintCpuUtilization(const Results& results, const SystemInformation& system); enum class _SectionEnum {TOTAL, READ, WRITE}; void _PrintSectionFieldNames(const TimeSpan& timeSpan); void _PrintSectionBorderLine(const TimeSpan& timeSpan); void _PrintSection(_SectionEnum, const TimeSpan&, const Results&); void _PrintLatencyPercentiles(const Results&); - void _PrintLatencyChart(const Histogram& readLatencyHistogram, - const Histogram& writeLatencyHistogram, - const Histogram& totalLatencyHistogram); + void _PrintLatencyChart(const Histogram& readLatencyHistogram, + const Histogram& writeLatencyHistogram, + const Histogram& totalLatencyHistogram); void _PrintTimeSpan(const TimeSpan &timeSpan); - void _PrintTarget(const Target &target, bool fUseThreadsPerFile, bool fCompletionRoutines); + void _PrintTarget(const Target &target, bool fUseThreadsPerFile, bool fUseRequestsPerFile, bool fCompletionRoutines); string _sResult; diff --git a/Common/ThroughputMeter.h b/Common/ThroughputMeter.h index 0375f05..3bd710c 100644 --- a/Common/ThroughputMeter.h +++ b/Common/ThroughputMeter.h @@ -37,15 +37,15 @@ SOFTWARE. class ThroughputMeter { public: - ThroughputMeter(void); + ThroughputMeter(); - bool IsRunning(void) const; + bool IsRunning() const; void Start(DWORD cBytesPerMillisecond, DWORD dwBlockSize, DWORD dwThinkTime, DWORD dwBurstSize); - DWORD GetSleepTime(void) const; + DWORD GetSleepTime() const; void Adjust(size_t cb); private: - DWORD _GetThrottleTime(void) const; + DWORD _GetThrottleTime() const; bool _fRunning; // true = throughput monitoring is on bool _fThrottle; // true = throttling is on diff --git a/Common/XmlProfileParser.h b/Common/XmlProfileParser.h index e52d011..236d395 100644 --- a/Common/XmlProfileParser.h +++ b/Common/XmlProfileParser.h @@ -34,27 +34,29 @@ SOFTWARE. class XmlProfileParser { public: - bool ParseFile(const char *pszPath, Profile *pProfile); + static bool ParseFile(const char *pszPath, Profile *pProfile); private: - HRESULT _ParseEtw(IXMLDOMDocument2 *pXmlDoc, Profile *pProfile); - HRESULT _ParseTimeSpans(IXMLDOMDocument2 *pXmlDoc, Profile *pProfile); - HRESULT _ParseTimeSpan(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); - HRESULT _ParseTargets(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); - HRESULT _ParseRandomDataSource(IXMLDOMNode *pXmlNode, Target *pTarget); - HRESULT _ParseWriteBufferContent(IXMLDOMNode *pXmlNode, Target *pTarget); - HRESULT _ParseTarget(IXMLDOMNode *pXmlNode, Target *pTarget); - HRESULT _ParseAffinityAssignment(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); - HRESULT _ParseAffinityGroupAssignment(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); - - HRESULT _GetString(IXMLDOMNode *pXmlNode, const char *pszQuery, string *psValue) const; - HRESULT _GetUINT32(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT32 *pulValue) const; - HRESULT _GetUINT64(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT64 *pullValue) const; - HRESULT _GetDWORD(IXMLDOMNode *pXmlNode, const char *pszQuery, DWORD *pdwValue) const; - HRESULT _GetBool(IXMLDOMNode *pXmlNode, const char *pszQuery, bool *pfValue) const; - - HRESULT _GetUINT32Attr(IXMLDOMNode *pXmlNode, const char *pszAttr, UINT32 *pulValue) const; - - HRESULT _GetVerbose(IXMLDOMDocument2 *pXmlDoc, bool *pfVerbose); - HRESULT _GetProgress(IXMLDOMDocument2 *pXmlDoc, DWORD *pdwProgress); -}; \ No newline at end of file + static HRESULT _ParseEtw(IXMLDOMDocument2 *pXmlDoc, Profile *pProfile); + static HRESULT _ParseTimeSpans(IXMLDOMDocument2 *pXmlDoc, Profile *pProfile); + static HRESULT _ParseTimeSpan(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); + static HRESULT _ParseTargets(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); + static HRESULT _ParseRandomDataSource(IXMLDOMNode *pXmlNode, Target *pTarget); + static HRESULT _ParseWriteBufferContent(IXMLDOMNode *pXmlNode, Target *pTarget); + static HRESULT _ParseTarget(IXMLDOMNode *pXmlNode, Target *pTarget); + static HRESULT _ParseThreadTargets(IXMLDOMNode *pXmlNode, Target *pTarget); + static HRESULT _ParseThreadTarget(IXMLDOMNode *pXmlNode, ThreadTarget *pThreadTarget); + static HRESULT _ParseAffinityAssignment(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); + static HRESULT _ParseAffinityGroupAssignment(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan); + + static HRESULT _GetString(IXMLDOMNode *pXmlNode, const char *pszQuery, string *psValue); + static HRESULT _GetUINT32(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT32 *pulValue); + static HRESULT _GetUINT64(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT64 *pullValue); + static HRESULT _GetDWORD(IXMLDOMNode *pXmlNode, const char *pszQuery, DWORD *pdwValue); + static HRESULT _GetBool(IXMLDOMNode *pXmlNode, const char *pszQuery, bool *pfValue); + + static HRESULT _GetUINT32Attr(IXMLDOMNode *pXmlNode, const char *pszAttr, UINT32 *pulValue); + + static HRESULT _GetVerbose(IXMLDOMDocument2 *pXmlDoc, bool *pfVerbose); + static HRESULT _GetProgress(IXMLDOMDocument2 *pXmlDoc, DWORD *pdwProgress); +}; diff --git a/Common/etw.h b/Common/etw.h index a14b670..1dd16ad 100644 --- a/Common/etw.h +++ b/Common/etw.h @@ -31,7 +31,6 @@ SOFTWARE. #include -#include ///WNODE_HEADER #define INITGUID //Include this #define to use SystemTraceControlGuid in Evntrace.h. #include //ETW #include "Common.h" diff --git a/Common/xmlresultparser.h b/Common/xmlresultparser.h index adecfd3..bc78015 100644 --- a/Common/xmlresultparser.h +++ b/Common/xmlresultparser.h @@ -33,10 +33,10 @@ SOFTWARE. class XmlResultParser: public IResultParser { public: - string ParseResults(Profile& profile, const SystemInformation& system, vector vResults); + string ParseResults(Profile& profile, const SystemInformation& system, vector vResults) override; private: - void _PrintCpuUtilization(const Results& results); + void _PrintCpuUtilization(const Results& results, const SystemInformation& system); void _PrintETW(struct ETWMask ETWMask, struct ETWEventCounters EtwEventCounters); void _PrintETWSessionInfo(struct ETWSessionInfo sessionInfo); void _PrintLatencyPercentiles(const Results& results); diff --git a/DiskSpd_Documentation.docx b/DiskSpd_Documentation.docx index 3b09dc4..9cfd18e 100644 Binary files a/DiskSpd_Documentation.docx and b/DiskSpd_Documentation.docx differ diff --git a/DiskSpd_Documentation.pdf b/DiskSpd_Documentation.pdf index e11a750..2180fb3 100644 Binary files a/DiskSpd_Documentation.pdf and b/DiskSpd_Documentation.pdf differ diff --git a/IORequestGenerator/IORequestGenerator.cpp b/IORequestGenerator/IORequestGenerator.cpp index 5cfc208..73155b0 100644 --- a/IORequestGenerator/IORequestGenerator.cpp +++ b/IORequestGenerator/IORequestGenerator.cpp @@ -34,19 +34,16 @@ SOFTWARE. #define _WIN32_WINNT 0x0601 #endif -#include "common.h" +#include "Common.h" #include "IORequestGenerator.h" -#include -#include -#include //DISK_GEOMETRY +#include +//#include #include -#include - -#include //WNODE_HEADER +//#include #include "etw.h" -#include +#include #include "ThroughputMeter.h" #include "OverlappedQueue.h" @@ -55,12 +52,12 @@ SOFTWARE. // UINT64 GetPartitionSize(HANDLE hFile) { - assert(NULL != hFile && INVALID_HANDLE_VALUE != hFile); + assert(nullptr != hFile && INVALID_HANDLE_VALUE != hFile); GET_LENGTH_INFORMATION pinf; OVERLAPPED ovlp = {}; - ovlp.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + ovlp.hEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (ovlp.hEvent == nullptr) { PrintError("ERROR: Failed to create event (error code: %u)\n", GetLastError()); @@ -68,21 +65,19 @@ UINT64 GetPartitionSize(HANDLE hFile) } DWORD rbcnt = 0; - DWORD status = ERROR_SUCCESS; - BOOL rslt; - rslt = DeviceIoControl(hFile, - IOCTL_DISK_GET_LENGTH_INFO, - NULL, - 0, - &pinf, - sizeof(pinf), - &rbcnt, - &ovlp); + BOOL rslt = DeviceIoControl(hFile, + IOCTL_DISK_GET_LENGTH_INFO, + nullptr, + 0, + &pinf, + sizeof(pinf), + &rbcnt, + &ovlp); if (!rslt) { - status = GetLastError(); + const DWORD status = GetLastError(); if (status == ERROR_IO_PENDING) { if (WAIT_OBJECT_0 != WaitForSingleObject(ovlp.hEvent, INFINITE)) @@ -115,12 +110,12 @@ UINT64 GetPartitionSize(HANDLE hFile) // UINT64 GetPhysicalDriveSize(HANDLE hFile) { - assert(NULL != hFile && INVALID_HANDLE_VALUE != hFile); + assert(nullptr != hFile && INVALID_HANDLE_VALUE != hFile); DISK_GEOMETRY geom; OVERLAPPED ovlp = {}; - ovlp.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + ovlp.hEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); if (ovlp.hEvent == nullptr) { PrintError("ERROR: Failed to create event (error code: %u)\n", GetLastError()); @@ -128,21 +123,19 @@ UINT64 GetPhysicalDriveSize(HANDLE hFile) } DWORD rbcnt = 0; - DWORD status = ERROR_SUCCESS; - BOOL rslt; - rslt = DeviceIoControl(hFile, - IOCTL_DISK_GET_DRIVE_GEOMETRY, - NULL, - 0, - &geom, - sizeof(geom), - &rbcnt, - &ovlp); + BOOL rslt = DeviceIoControl(hFile, + IOCTL_DISK_GET_DRIVE_GEOMETRY, + nullptr, + 0, + &geom, + sizeof(geom), + &rbcnt, + &ovlp); if (!rslt) { - status = GetLastError(); + const DWORD status = GetLastError(); if (status == ERROR_IO_PENDING) { if (WAIT_OBJECT_0 != WaitForSingleObject(ovlp.hEvent, INFINITE)) @@ -167,10 +160,10 @@ UINT64 GetPhysicalDriveSize(HANDLE hFile) return 0; } - return (UINT64)geom.BytesPerSector * - (UINT64)geom.SectorsPerTrack * - (UINT64)geom.TracksPerCylinder * - (UINT64)geom.Cylinders.QuadPart; + return static_cast(geom.BytesPerSector) * + static_cast(geom.SectorsPerTrack) * + static_cast(geom.TracksPerCylinder) * + static_cast(geom.Cylinders.QuadPart); } /*****************************************************************************/ @@ -242,9 +235,8 @@ Return Value: --*/ { DWORD BytesReturned = 0; - OVERLAPPED Overlapped = { 0 }; + OVERLAPPED Overlapped = { }; DWORD Status = ERROR_SUCCESS; - BOOL Success = false; Overlapped.hEvent = CreateEvent(nullptr, true, false, nullptr); if (!Overlapped.hEvent) @@ -256,7 +248,7 @@ Return Value: #define FSCTL_DISABLE_LOCAL_BUFFERING CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 174, METHOD_BUFFERED, FILE_ANY_ACCESS) #endif - Success = DeviceIoControl(h, + const BOOL Success = DeviceIoControl(h, FSCTL_DISABLE_LOCAL_BUFFERING, nullptr, 0, @@ -277,7 +269,7 @@ Return Value: } else { - Status = (DWORD) Overlapped.Internal; + Status = static_cast(Overlapped.Internal); } } @@ -299,7 +291,7 @@ __declspec(align(4)) static LONG volatile g_lRunningThreadsCount = 0; //must b static BOOL volatile g_bRun; //used for letting threads know that they should stop working -typedef NTSTATUS (__stdcall *NtQuerySysInfo)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG); +using NtQuerySysInfo = NTSTATUS(__stdcall *)(SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG); static NtQuerySysInfo g_pfnNtQuerySysInfo; static PRINTF g_pfnPrintOut = nullptr; @@ -320,21 +312,21 @@ VOID SetProcGroupMask(WORD wGroupNum, DWORD dwProcNum, PGROUP_AFFINITY pGroupAff memset(pGroupAffinity, 0, sizeof(GROUP_AFFINITY)); pGroupAffinity->Group = wGroupNum; - pGroupAffinity->Mask = (KAFFINITY)1<Mask = static_cast(1)<& vhFiles) const +void IORequestGenerator::_CloseOpenFiles(vector& vhFiles) { - for (size_t x = 0; x < vhFiles.size(); ++x) + for (auto& vhFile : vhFiles) { - if ((INVALID_HANDLE_VALUE != vhFiles[x]) && (nullptr != vhFiles[x])) + if ((INVALID_HANDLE_VALUE != vhFile) && (nullptr != vhFile)) { - if (!CloseHandle(vhFiles[x])) + if (!CloseHandle(vhFile)) { PrintError("Warning: unable to close file handle (error code: %u)\n", GetLastError()); } - vhFiles[x] = nullptr; + vhFile = nullptr; } } } @@ -344,9 +336,9 @@ void IORequestGenerator::_CloseOpenFiles(vector& vhFiles) const // may be consumed by gui app which doesn't have stdout static void print(const char *format, ...) { - assert(NULL != format); + assert(nullptr != format); - if( NULL != g_pfnPrintOut ) + if( nullptr != g_pfnPrintOut ) { va_list listArg; va_start(listArg, format); @@ -360,9 +352,9 @@ static void print(const char *format, ...) // may be consumed by gui app which doesn't have stdout void PrintError(const char *format, ...) { - assert(NULL != format); + assert(nullptr != format); - if( NULL != g_pfnPrintError ) + if( nullptr != g_pfnPrintError ) { va_list listArg; @@ -377,9 +369,9 @@ void PrintError(const char *format, ...) // static void printfv(bool fVerbose, const char *format, ...) { - assert(NULL != format); + assert(nullptr != format); - if( NULL != g_pfnPrintVerbose && fVerbose ) + if( nullptr != g_pfnPrintVerbose && fVerbose ) { va_list argList; va_start(argList, format); @@ -396,7 +388,7 @@ DWORD WINAPI etwThreadFunc(LPVOID cookie) UNREFERENCED_PARAMETER(cookie); g_bTracing = TRUE; - BOOL result = TraceEvents(); + const BOOL result = TraceEvents(); g_bTracing = FALSE; return result ? 0 : 1; @@ -405,13 +397,13 @@ DWORD WINAPI etwThreadFunc(LPVOID cookie) /*****************************************************************************/ // display file size in a user-friendly form using 'verbose' stream // -void IORequestGenerator::_DisplayFileSizeVerbose(bool fVerbose, UINT64 fsize) const +void IORequestGenerator::_DisplayFileSizeVerbose(bool fVerbose, UINT64 fsize) { - if( fsize > (UINT64)10*1024*1024*1024 ) // > 10GB + if( fsize > static_cast(10)*1024*1024*1024 ) // > 10GB { printfv(fVerbose, "%I64uGB", fsize >> 30); } - else if( fsize > (UINT64)10*1024*1024 ) // > 10MB + else if( fsize > static_cast(10)*1024*1024 ) // > 10MB { printfv(fVerbose, "%I64uMB", fsize >> 20); } @@ -425,18 +417,6 @@ void IORequestGenerator::_DisplayFileSizeVerbose(bool fVerbose, UINT64 fsize) co } } -/*****************************************************************************/ -// generate 64-bit random number -static ULONG64 rand64() -{ - return - ((((ULONG64)rand()) & 0x7fff) | - ((((ULONG64)rand()) & 0x7fff) << 15) | - ((((ULONG64)rand()) & 0x7fff) << 30) | - (((((ULONG64)rand()) & 0x7fff) << 30) << 15) | - (((((ULONG64)rand()) & 0xF) << 30) << 30)); -} - /*****************************************************************************/ bool IORequestGenerator::_LoadDLLs() { @@ -446,29 +426,72 @@ bool IORequestGenerator::_LoadDLLs() return false; } - g_pfnNtQuerySysInfo = (NtQuerySysInfo)GetProcAddress(_hNTDLL, "NtQuerySystemInformation"); - if( nullptr == g_pfnNtQuerySysInfo ) - { - return false; - } + g_pfnNtQuerySysInfo = reinterpret_cast(GetProcAddress(_hNTDLL, "NtQuerySystemInformation")); - return true; + return nullptr != g_pfnNtQuerySysInfo; } /*****************************************************************************/ -bool IORequestGenerator::_GetSystemPerfInfo(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION *pInfo, UINT32 uCpuCount) const +bool IORequestGenerator::_GetSystemPerfInfo(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION *pInfo, UINT32 uCpuCount) { - NTSTATUS Status = NO_ERROR; + UINT32 uCpuCtr; + WORD wActiveGroupCtr; + BYTE bActiveProc; + HANDLE hThread = GetCurrentThread(); + GROUP_AFFINITY GroupAffinity{}; + PROCESSOR_NUMBER procNumber; + bool fOk = true; - assert(NULL != pInfo); + assert(nullptr != pInfo); assert(uCpuCount > 0); - Status = g_pfnNtQuerySysInfo(SystemProcessorPerformanceInformation, - pInfo, - sizeof(*pInfo) * uCpuCount, - NULL); + for (uCpuCtr=0,wActiveGroupCtr=0; wActiveGroupCtr < g_SystemInformation.processorTopology._vProcessorGroupInformation.size(); wActiveGroupCtr++) + { + ProcessorGroupInformation *pGroup = &g_SystemInformation.processorTopology._vProcessorGroupInformation[wActiveGroupCtr]; + + if (pGroup->_activeProcessorCount != 0) { + + // + // Affinitize to the group we're querying counters from + // + + GetCurrentProcessorNumberEx(&procNumber); + + if (procNumber.Group != wActiveGroupCtr) + { + for (bActiveProc = 0; bActiveProc < pGroup->_maximumProcessorCount; bActiveProc++) + { + if (pGroup->IsProcessorActive(bActiveProc)) + { + SetProcGroupMask(wActiveGroupCtr, bActiveProc, &GroupAffinity); + break; + } + } + + if (bActiveProc == pGroup->_maximumProcessorCount || + SetThreadGroupAffinity(hThread, &GroupAffinity, nullptr) == FALSE) + { + fOk = false; + break; + } + } + + const NTSTATUS Status = g_pfnNtQuerySysInfo(SystemProcessorPerformanceInformation, + static_cast(pInfo + uCpuCtr), + (sizeof(*pInfo) * uCpuCount) - (uCpuCtr * sizeof(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION)), + nullptr); + + if (!NT_SUCCESS(Status)) + { + fOk = false; + break; + } + } + + uCpuCtr += pGroup->_maximumProcessorCount; + } - return NT_SUCCESS(Status); + return fOk; } /*****************************************************************************/ @@ -479,50 +502,64 @@ __inline UINT64 IORequestGenerator::GetNextFileOffset(ThreadParameters& tp, size { Target &target = tp.vTargets[targetNum]; - UINT64 blockAlignment = target.GetBlockAlignmentInBytes(); - UINT64 baseFileOffset = target.GetBaseFileOffsetInBytes(); - UINT64 blockSize = target.GetBlockSizeInBytes(); + const UINT64 blockAlignment = target.GetBlockAlignmentInBytes(); + const UINT64 baseFileOffset = target.GetBaseFileOffsetInBytes(); + const UINT64 baseThreadOffset = target.GetThreadBaseFileOffsetInBytes(tp.ulRelativeThreadNo); + const UINT64 blockSize = target.GetBlockSizeInBytes(); UINT64 nextBlockOffset; - // increment/produce - note, logically relative to base offset - if (target.GetUseRandomAccessPattern()) - { - nextBlockOffset = rand64(); - nextBlockOffset -= (nextBlockOffset % blockAlignment); - } - else if (target.GetUseParallelAsyncIO()) - { - nextBlockOffset = prevOffset - baseFileOffset + blockAlignment; - } - else if (target.GetUseInterlockedSequential()) - { - nextBlockOffset = InterlockedAdd64((PLONGLONG) &tp.pullSharedSequentialOffsets[targetNum], blockAlignment) - blockAlignment; - } - else // normal sequential access pattern - { - nextBlockOffset = (tp.vullPrivateSequentialOffsets[targetNum] += blockAlignment); - } - // now apply bounds for IO offset // aligned target size is the closed interval of byte offsets at which it is legal to issue IO // ISSUE IMPROVEMENT: much of this should be precalculated. It belongs within Target, which will // need discovery of target sizing moved from its current just-in-time at thread launch. UINT64 alignedTargetSize = tp.vullFileSizes[targetNum] - baseFileOffset - blockSize; + if (target.GetUseRandomAccessPattern() || target.GetUseInterlockedSequential()) { - // these access patterns occur on blockaligned boundaries relative to base // convert aligned target size to the open interval alignedTargetSize = ((alignedTargetSize / blockAlignment) + 1) * blockAlignment; - nextBlockOffset %= alignedTargetSize; + + // increment/produce - note, logically relative to base offset + if (target.GetUseRandomAccessPattern()) + { + nextBlockOffset = tp.pRand->Rand64(); + nextBlockOffset -= (nextBlockOffset % blockAlignment); + nextBlockOffset %= alignedTargetSize; + } + else + { + nextBlockOffset = InterlockedAdd64(reinterpret_cast(&tp.pullSharedSequentialOffsets[targetNum]), blockAlignment) - blockAlignment; + nextBlockOffset %= alignedTargetSize; + } } else { + if (prevOffset == FIRST_OFFSET) + { + nextBlockOffset = baseThreadOffset - baseFileOffset; + } + else + { + if (target.GetUseParallelAsyncIO()) + { + nextBlockOffset = prevOffset - baseFileOffset + blockAlignment; + } + else // normal sequential access pattern + { + nextBlockOffset = tp.vullPrivateSequentialOffsets[targetNum] + blockAlignment; + } + } + // parasync and seq bases are potentially modified by threadstride and loop back to the // file base offset + increment which will return them to their initial base offset. - if (nextBlockOffset > alignedTargetSize) + if (nextBlockOffset > alignedTargetSize) { + nextBlockOffset = (baseThreadOffset - baseFileOffset) % blockAlignment; + + } + + if (!target.GetUseParallelAsyncIO()) { - nextBlockOffset = (IORequestGenerator::GetThreadBaseFileOffset(tp, targetNum) - baseFileOffset) % blockAlignment; tp.vullPrivateSequentialOffsets[targetNum] = nextBlockOffset; } } @@ -532,127 +569,184 @@ __inline UINT64 IORequestGenerator::GetNextFileOffset(ThreadParameters& tp, size #ifndef NDEBUG // Don't overrun the end of the file - UINT64 fileSize = tp.vullFileSizes[targetNum]; + const UINT64 fileSize = tp.vullFileSizes[targetNum]; assert(nextBlockOffset + blockSize <= fileSize); #endif return nextBlockOffset; } -__inline UINT64 IORequestGenerator::GetThreadBaseFileOffset(ThreadParameters& tp, size_t targetNum) +/*****************************************************************************/ +// Decide the kind of IO to issue during a mix test +// Future Work: Add more types of distribution in addition to random +__inline static IOOperation DecideIo(Random *pRand, UINT32 ulWriteRatio) +{ + return ((pRand->Rand32() % 100 + 1) > ulWriteRatio) ? IOOperation::ReadIO : IOOperation::WriteIO; +} + +VOID CALLBACK fileIOCompletionRoutine(DWORD dwErrorCode, DWORD dwBytesTransferred, LPOVERLAPPED pOverlapped); + +static bool issueNextIO(ThreadParameters *p, IORequest *pIORequest, DWORD *pdwBytesTransferred, bool useCompletionRoutines) { - const Target &target = tp.vTargets[targetNum]; + OVERLAPPED *pOverlapped = pIORequest->GetOverlapped(); + Target *pTarget = pIORequest->GetCurrentTarget(); + const size_t iTarget = pTarget - &p->vTargets[0]; + const UINT32 iRequest = pIORequest->GetRequestIndex(); + LARGE_INTEGER li; + bool rslt; - UINT64 baseFileOffset = target.GetBaseFileOffsetInBytes(); - UINT64 nextBlockOffset; + li.LowPart = pOverlapped->Offset; + li.HighPart = pOverlapped->OffsetHigh; + + li.QuadPart = IORequestGenerator::GetNextFileOffset(*p, iTarget, li.QuadPart); + + pOverlapped->Offset = li.LowPart; + pOverlapped->OffsetHigh = li.HighPart; + + printfv(p->pProfile->GetVerbose(), "t[%u:%u] new I/O op at %I64u (starting in block: %I64u)\n", + p->ulThreadNo, + iTarget, + li.QuadPart, + li.QuadPart / pTarget->GetBlockSizeInBytes()); - if (target.GetUseRandomAccessPattern()) + const IOOperation readOrWrite = DecideIo(p->pRand, pTarget->GetWriteRatio()); + pIORequest->SetIoType(readOrWrite); + + if (p->pTimeSpan->GetMeasureLatency()) { - nextBlockOffset = IORequestGenerator::GetNextFileOffset(tp, targetNum, 0); + pIORequest->SetStartTime(PerfTimer::GetTime()); + } + + if (readOrWrite == IOOperation::ReadIO) + { + if (useCompletionRoutines) + { + rslt = ReadFileEx(p->vhTargets[iTarget], p->GetReadBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), pOverlapped, fileIOCompletionRoutine); + } + else + { + rslt = ReadFile(p->vhTargets[iTarget], p->GetReadBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), pdwBytesTransferred, pOverlapped); + } } else { - // interlocked sequential - thread stride is always zero, enforced during profile validation - // parallel async - apply thread stride - // sequential - apply thread stride - nextBlockOffset = baseFileOffset + tp.ulRelativeThreadNo * target.GetThreadStrideInBytes(); + if (useCompletionRoutines) + { + rslt = WriteFileEx(p->vhTargets[iTarget], p->GetWriteBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), pOverlapped, fileIOCompletionRoutine); + } + else + { + rslt = WriteFile(p->vhTargets[iTarget], p->GetWriteBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), pdwBytesTransferred, pOverlapped); + } } - return nextBlockOffset; + if (!p->vThroughputMeters.empty() && p->vThroughputMeters[iTarget].IsRunning()) + { + p->vThroughputMeters[iTarget].Adjust(pTarget->GetBlockSizeInBytes()); + } + + return rslt; } -__inline UINT64 IORequestGenerator::GetStartingFileOffset(ThreadParameters& tp, size_t targetNum) +static void completeIO(ThreadParameters *p, IORequest *pIORequest, DWORD dwBytesTransferred) { - const Target &target = tp.vTargets[targetNum]; + Target *pTarget = pIORequest->GetCurrentTarget(); + const size_t iTarget = pTarget - &p->vTargets[0]; - UINT64 baseFileOffset = target.GetBaseFileOffsetInBytes(); - UINT64 nextBlockOffset; + //check if I/O transferred all of the requested bytes + if (dwBytesTransferred != pTarget->GetBlockSizeInBytes()) + { + PrintError("Warning: thread %u transferred %u bytes instead of %u bytes\n", + p->ulThreadNo, + dwBytesTransferred, + pTarget->GetBlockSizeInBytes()); + } - if (target.GetUseRandomAccessPattern()) + if (*p->pfAccountingOn) { - nextBlockOffset = IORequestGenerator::GetNextFileOffset(tp, targetNum, 0); + p->pResults->vTargetResults[iTarget].Add(dwBytesTransferred, + pIORequest->GetIoType(), + pIORequest->GetStartTime(), + *(p->pullStartTime), + p->pTimeSpan->GetMeasureLatency(), + p->pTimeSpan->GetCalculateIopsStdDev()); } - else + + // check if we should print a progress dot + if (p->pProfile->GetProgress() != 0) { - // interlocked sequential - getnext starts the clock from zero, thread independent - // parallel async - getthreadbase, thread dependent - // sequential - "", and initialize private counter - if (target.GetUseInterlockedSequential()) + const DWORD dwIOCnt = ++p->dwIOCnt; + if (dwIOCnt % p->pProfile->GetProgress() == 0) { - nextBlockOffset = IORequestGenerator::GetNextFileOffset(tp, targetNum, 0); + print("."); } - else - { - nextBlockOffset = IORequestGenerator::GetThreadBaseFileOffset(tp, targetNum); + } +} - if (!target.GetUseParallelAsyncIO()) +/*****************************************************************************/ +// function called from worker thread +// performs synch I/O +// +static bool doWorkUsingSynchronousIO(ThreadParameters *p) +{ + bool fOk = true; + DWORD dwBytesTransferred; + IORequest *pIORequest = &p->vIORequest[0]; + + while(g_bRun && !g_bThreadError) + { + Target *pTarget = pIORequest->GetNextTarget(); + + if (!p->vThroughputMeters.empty()) + { + const size_t iTarget = pTarget - &p->vTargets[0]; + ThroughputMeter *pThroughputMeter = &p->vThroughputMeters[iTarget]; + const DWORD dwSleepTime = pThroughputMeter->GetSleepTime(); + if (pThroughputMeter->IsRunning() && dwSleepTime > 0) { - tp.vullPrivateSequentialOffsets[targetNum] = nextBlockOffset - baseFileOffset; + Sleep(dwSleepTime); + continue; } } + + const BOOL rslt = issueNextIO(p, pIORequest, &dwBytesTransferred, false); + + if (!rslt) + { + PrintError("t[%u] error during %s error code: %u)\n", 0, (pIORequest->GetIoType()== IOOperation::ReadIO ? "read" : "write"), GetLastError()); + fOk = false; + goto cleanup; + } + + completeIO(p, pIORequest, dwBytesTransferred); + + assert(!g_bError); // at this point we shouldn't be seeing initialization error } - return nextBlockOffset; +cleanup: + return fOk; } -/*****************************************************************************/ -// Decide the kind of IO to issue during a mix test -// Future Work: Add more types of distribution in addition to random -__inline static IOOperation DecideIo(UINT32 ulWriteRatio) -{ - return (((UINT32)abs(rand() % 100 + 1)) > ulWriteRatio) ? IOOperation::ReadIO : IOOperation::WriteIO; - } - /*****************************************************************************/ // function called from worker thread // performs asynch I/O using IO Completion Ports // -__inline static bool doWorkUsingIOCompletionPorts(ThreadParameters *p, HANDLE hCompletionPort) +static bool doWorkUsingIOCompletionPorts(ThreadParameters *p, HANDLE hCompletionPort) { assert(nullptr!= p); assert(nullptr != hCompletionPort); bool fOk = true; - - LARGE_INTEGER li; - BOOL rslt = FALSE; - OVERLAPPED * pCompletedOvrp; + OVERLAPPED * pCompletedOvrp; ULONG_PTR ulCompletionKey; DWORD dwBytesTransferred; - DWORD dwIOCnt = 0; OverlappedQueue overlappedQueue; - size_t cOverlapped = p->vOverlapped.size(); - - bool fMeasureLatency = p->pTimeSpan->GetMeasureLatency(); - - size_t cTargets = p->vTargets.size(); - vector vThroughputMeters(cTargets); - bool fUseThrougputMeter = false; - // TODO: move to a separate function - for (size_t i = 0; i < cTargets; i++) - { - Target *pTarget = &p->vTargets[i]; - DWORD dwBurstSize = pTarget->GetBurstSize(); - if (p->pTimeSpan->GetThreadCount() > 0) - { - dwBurstSize /= p->pTimeSpan->GetThreadCount(); - } - else - { - dwBurstSize /= pTarget->GetThreadsPerFile(); - } - - if (pTarget->GetThroughputInBytesPerMillisecond() > 0 || pTarget->GetThinkTime() > 0) - { - fUseThrougputMeter = true; - vThroughputMeters[i].Start(pTarget->GetThroughputInBytesPerMillisecond(), pTarget->GetBlockSizeInBytes(), pTarget->GetThinkTime(), dwBurstSize); - } - } + const size_t cIORequests = p->vIORequest.size(); //start IO operations - for (size_t i = 0; i < cOverlapped; i++) + for (size_t i = 0; i < cIORequests; i++) { - overlappedQueue.Add(&p->vOverlapped[i]); + overlappedQueue.Add(p->vIORequest[i].GetOverlapped()); } // @@ -660,55 +754,40 @@ __inline static bool doWorkUsingIOCompletionPorts(ThreadParameters *p, HANDLE hC // while(g_bRun && !g_bThreadError) { - DWORD dwMinSleepTime = ~((DWORD)0); + DWORD dwMinSleepTime = ~static_cast(0); for (size_t i = 0; i < overlappedQueue.GetCount(); i++) { OVERLAPPED *pReadyOverlapped = overlappedQueue.Remove(); - DWORD iOverlapped = (DWORD)(pReadyOverlapped - &p->vOverlapped[0]); - size_t iTarget = p->vOverlappedIdToTargetId[iOverlapped]; - size_t iRequest = iOverlapped - p->vFirstOverlappedIdForTargetId[iTarget]; - Target *pTarget = &p->vTargets[iTarget]; - ThroughputMeter *pThroughputMeter = &vThroughputMeters[iTarget]; + IORequest *pIORequest = IORequest::OverlappedToIORequest(pReadyOverlapped); + Target *pTarget = pIORequest->GetNextTarget(); - DWORD dwSleepTime = pThroughputMeter->GetSleepTime(); - if (pThroughputMeter->IsRunning() && dwSleepTime > 0) + if (!p->vThroughputMeters.empty()) { - dwMinSleepTime = min(dwMinSleepTime, dwSleepTime); - overlappedQueue.Add(pReadyOverlapped); - continue; - } + const size_t iTarget = pTarget - &p->vTargets[0]; + ThroughputMeter *pThroughputMeter = &p->vThroughputMeters[iTarget]; - if (fMeasureLatency) - { - p->vIoStartTimes[iOverlapped] = PerfTimer::GetTime(); // record IO start time + DWORD dwSleepTime = pThroughputMeter->GetSleepTime(); + if (pThroughputMeter->IsRunning() && dwSleepTime > 0) + { + dwMinSleepTime = min(dwMinSleepTime, dwSleepTime); + overlappedQueue.Add(pReadyOverlapped); + continue; + } } - IOOperation readOrWrite; - readOrWrite = p->vdwIoType[iOverlapped] = DecideIo(pTarget->GetWriteRatio()); - if (readOrWrite == IOOperation::ReadIO) - { - rslt = ReadFile(p->vhTargets[iTarget], p->GetReadBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), nullptr, pReadyOverlapped); - } - else - { - rslt = WriteFile(p->vhTargets[iTarget], p->GetWriteBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), nullptr, pReadyOverlapped); - } + const BOOL rslt = issueNextIO(p, pIORequest, nullptr, false); if (!rslt && GetLastError() != ERROR_IO_PENDING) { - PrintError("t[%u] error during %s error code: %u)\n", iOverlapped, (readOrWrite == IOOperation::ReadIO ? "read" : "write"), GetLastError()); + const auto iIORequest = static_cast(pIORequest - &p->vIORequest[0]); + PrintError("t[%u] error during %s error code: %u)\n", iIORequest, (pIORequest->GetIoType()== IOOperation::ReadIO ? "read" : "write"), GetLastError()); fOk = false; goto cleanup; } - - if (pThroughputMeter->IsRunning()) - { - pThroughputMeter->Adjust(pTarget->GetBlockSizeInBytes()); - } } // if no IOs are in flight, wait for the next scheduling time - if (fUseThrougputMeter && (overlappedQueue.GetCount() == p->vOverlapped.size()) && dwMinSleepTime != ~((DWORD)0)) + if ((overlappedQueue.GetCount() == p->vIORequest.size()) && dwMinSleepTime != ~static_cast(0)) { Sleep(dwMinSleepTime); } @@ -717,61 +796,13 @@ __inline static bool doWorkUsingIOCompletionPorts(ThreadParameters *p, HANDLE hC if (GetQueuedCompletionStatus(hCompletionPort, &dwBytesTransferred, &ulCompletionKey, &pCompletedOvrp, 1) != 0) { //find which I/O operation it was (so we know to which buffer should we use) - DWORD iOverlapped = (DWORD)(pCompletedOvrp - &p->vOverlapped[0]); - size_t iTarget = p->vOverlappedIdToTargetId[iOverlapped]; - - //check if I/O transferred all of the requested bytes - Target *pTarget = &p->vTargets[iTarget]; - if (dwBytesTransferred != pTarget->GetBlockSizeInBytes()) - { - PrintError("Warning: thread %u transferred %u bytes instead of %u bytes\n", - p->ulThreadNo, - dwBytesTransferred, - pTarget->GetBlockSizeInBytes()); - } - - li.HighPart = pCompletedOvrp->OffsetHigh; - li.LowPart = pCompletedOvrp->Offset; - - if (*p->pfAccountingOn) - { - p->pResults->vTargetResults[iTarget].Add(dwBytesTransferred, - p->vdwIoType[iOverlapped], - &p->vIoStartTimes[iOverlapped], - p->pullStartTime, - fMeasureLatency, - p->pTimeSpan->GetCalculateIopsStdDev()); - } - - // TODO: move to a separate function - // check if we should print a progress dot - if (p->pProfile->GetProgress() != 0) - { - ++dwIOCnt; - if (dwIOCnt == p->pProfile->GetProgress()) - { - print("."); - dwIOCnt = 0; - } - } - - //restart the I/O operation that just completed - li.QuadPart = IORequestGenerator::GetNextFileOffset(*p, iTarget, li.QuadPart); - - pCompletedOvrp->Offset = li.LowPart; - pCompletedOvrp->OffsetHigh = li.HighPart; - - printfv(p->pProfile->GetVerbose(), "t[%u:%u] new I/O op at %I64u (starting in block: %I64u)\n", - p->ulThreadNo, - iTarget, - li.QuadPart, - li.QuadPart / pTarget->GetBlockSizeInBytes()); - + IORequest *pIORequest = IORequest::OverlappedToIORequest(pCompletedOvrp); + completeIO(p, pIORequest, dwBytesTransferred); overlappedQueue.Add(pCompletedOvrp); } else { - DWORD err = GetLastError(); + const DWORD err = GetLastError(); if (err != WAIT_TIMEOUT) { PrintError("error during overlapped IO operation (error code: %u)\n", err); @@ -791,155 +822,72 @@ __inline static bool doWorkUsingIOCompletionPorts(ThreadParameters *p, HANDLE hC VOID CALLBACK fileIOCompletionRoutine(DWORD dwErrorCode, DWORD dwBytesTransferred, LPOVERLAPPED pOverlapped) { - assert(NULL != pOverlapped); - - BOOL rslt = FALSE; - LARGE_INTEGER li; + assert(nullptr != pOverlapped); - ThreadParameters *p = (ThreadParameters *)pOverlapped->hEvent; - bool fMeasureLatency = p->pTimeSpan->GetMeasureLatency(); + auto*p = static_cast(pOverlapped->hEvent); - assert(NULL != p); + assert(nullptr != p); //check error code if (0 != dwErrorCode) { PrintError("Thread %u failed executing an I/O operation (error code: %u)\n", p->ulThreadNo, dwErrorCode); - goto cleanup; - } - - size_t iOverlapped = (pOverlapped - &p->vOverlapped[0]); - size_t iTarget = p->vOverlappedIdToTargetId[iOverlapped]; - Target *pTarget = &p->vTargets[iTarget]; - - //check if I/O operation transferred requested number of bytes - if (dwBytesTransferred != pTarget->GetBlockSizeInBytes()) - { - PrintError("Warning: thread %u transferred %u bytes instead of %u bytes\n", - p->ulThreadNo, - dwBytesTransferred, - pTarget->GetBlockSizeInBytes()); - } - - // check if we should print a progress dot - // BUGBUG: does not work ... io counter must be global - DWORD cdwIO = 0; - if (p->pProfile->GetProgress() != 0) - { - ++cdwIO; - if (cdwIO == p->pProfile->GetProgress()) - { - print("."); - cdwIO = 0; - } - } - - if (*p->pfAccountingOn) - { - p->pResults->vTargetResults[iTarget].Add(dwBytesTransferred, - p->vdwIoType[iOverlapped], - &p->vIoStartTimes[iOverlapped], - p->pullStartTime, - fMeasureLatency, - p->pTimeSpan->GetCalculateIopsStdDev()); + return; } - //restart the I/O operation that just completed - li.HighPart = pOverlapped->OffsetHigh; - li.LowPart = pOverlapped->Offset; + IORequest *pIORequest = IORequest::OverlappedToIORequest(pOverlapped); - li.QuadPart = IORequestGenerator::GetNextFileOffset(*p, iTarget, li.QuadPart); - - pOverlapped->Offset = li.LowPart; - pOverlapped->OffsetHigh = li.HighPart; - - printfv(p->pProfile->GetVerbose(), "t[%u:%u] new I/O op at %I64u (starting in block: %I64u)\n", - p->ulThreadNo, - iTarget, - li.QuadPart, - li.QuadPart / pTarget->GetBlockSizeInBytes()); + completeIO(p, pIORequest, dwBytesTransferred); // start a new IO operation if (g_bRun && !g_bThreadError) { - size_t iRequest = iOverlapped - p->vFirstOverlappedIdForTargetId[iTarget]; - if (fMeasureLatency) - { - p->vIoStartTimes[iOverlapped] = PerfTimer::GetTime(); // record IO start time - } + Target *pTarget = pIORequest->GetNextTarget(); + const size_t iTarget = pTarget - &p->vTargets[0]; - IOOperation readOrWrite; - readOrWrite = p->vdwIoType[iOverlapped] = DecideIo(pTarget->GetWriteRatio()); - if (readOrWrite == IOOperation::ReadIO) - { - rslt = ReadFileEx(p->vhTargets[iTarget], p->GetReadBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), pOverlapped, fileIOCompletionRoutine); - } - else - { - rslt = WriteFileEx(p->vhTargets[iTarget], p->GetWriteBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), pOverlapped, fileIOCompletionRoutine); - } + const BOOL rslt = issueNextIO(p, pIORequest, nullptr, true); if (!rslt) { - PrintError("t[%u:%u] error during %s error code: %u)\n", p->ulThreadNo, iTarget, (readOrWrite == IOOperation::ReadIO ? "read" : "write"), GetLastError()); - goto cleanup; + PrintError("t[%u:%u] error during %s error code: %u)\n", p->ulThreadNo, iTarget, (pIORequest->GetIoType() == IOOperation::ReadIO ? "read" : "write"), GetLastError()); + return; } } -cleanup: - return; +//cleanup: + //return; } /*****************************************************************************/ // function called from worker thread // performs asynch I/O using IO Completion Routines (ReadFileEx, WriteFileEx) // -__inline static bool doWorkUsingCompletionRoutines(ThreadParameters *p) +static bool doWorkUsingCompletionRoutines(ThreadParameters *p) { - assert(NULL != p); + assert(nullptr != p); bool fOk = true; - BOOL rslt = FALSE; - - //start IO operations - size_t iOverlapped = 0; - bool fMeasureLatency = p->pTimeSpan->GetMeasureLatency(); + //start IO operations + const auto cIORequests = static_cast(p->vIORequest.size()); - for (size_t iTarget = 0; iTarget < p->vTargets.size(); iTarget++) - { - Target *pTarget = &p->vTargets[iTarget]; - for (size_t iRequest = 0; iRequest < pTarget->GetRequestCount(); ++iRequest) - { - if (fMeasureLatency) - { - p->vIoStartTimes[iOverlapped] = PerfTimer::GetTime(); // record IO start time - } + for (size_t iIORequest = 0; iIORequest < cIORequests; iIORequest++) { + IORequest *pIORequest = &p->vIORequest[iIORequest]; + Target *pTarget = pIORequest->GetNextTarget(); + const size_t iTarget = pTarget - &p->vTargets[0]; - IOOperation readOrWrite; - readOrWrite = p->vdwIoType[iOverlapped] = DecideIo(pTarget->GetWriteRatio()); - if (readOrWrite == IOOperation::ReadIO) - { - rslt = ReadFileEx(p->vhTargets[iTarget], p->GetReadBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), &p->vOverlapped[iOverlapped], fileIOCompletionRoutine); - } - else - { - rslt = WriteFileEx(p->vhTargets[iTarget], p->GetWriteBuffer(iTarget, iRequest), pTarget->GetBlockSizeInBytes(), &p->vOverlapped[iOverlapped], fileIOCompletionRoutine); - } + const BOOL rslt = issueNextIO(p, pIORequest, nullptr, true); - if (!rslt) - { - PrintError("t[%u:%u] error during %s error code: %u)\n", p->ulThreadNo, iTarget, (readOrWrite == IOOperation::ReadIO ? "read" : "write"), GetLastError()); - fOk = false; - goto cleanup; - } - iOverlapped++; + if (!rslt) + { + PrintError("t[%u:%u] error during %s error code: %u)\n", p->ulThreadNo, iTarget, (pIORequest->GetIoType() == IOOperation::ReadIO ? "read" : "write"), GetLastError()); + fOk = false; + return fOk; } } - DWORD dwWaitResult = 0; - while( g_bRun && !g_bThreadError ) + while( g_bRun && !g_bThreadError ) { - dwWaitResult = WaitForSingleObjectEx(p->hEndEvent, INFINITE, TRUE); + const DWORD dwWaitResult = WaitForSingleObjectEx(p->hEndEvent, INFINITE, TRUE); assert(WAIT_IO_COMPLETION == dwWaitResult || (WAIT_OBJECT_0 == dwWaitResult && (!g_bRun || g_bThreadError))); @@ -948,36 +896,85 @@ __inline static bool doWorkUsingCompletionRoutines(ThreadParameters *p) { PrintError("Error in thread %u during WaitForSingleObjectEx (in completion routines)\n", p->ulThreadNo); fOk = false; - goto cleanup; + return fOk; } } -cleanup: +//cleanup: return fOk; } +struct UniqueTarget { + string path; + TargetCacheMode caching = TargetCacheMode::Undefined; + PRIORITY_HINT priority = PRIORITY_HINT::IoPriorityHintNormal; + DWORD dwDesiredAccess = 0; + DWORD dwFlags = 0; + + bool operator < (const struct UniqueTarget &ut) const { + if (path < ut.path) { + return true; + } + if (ut.path < path) { + return false; + } + + if (caching < ut.caching) { + return true; + } + if (ut.caching < caching) { + return false; + } + + if (priority < ut.priority) { + return true; + } + if (ut.priority < priority) { + return false; + } + + if (dwDesiredAccess < ut.dwDesiredAccess) { + return true; + } + if (ut.dwDesiredAccess < dwDesiredAccess) { + return false; + } + + return dwFlags < ut.dwFlags; + } +}; + /*****************************************************************************/ // worker thread function // DWORD WINAPI threadFunc(LPVOID cookie) { bool fOk = true; - ThreadParameters *p = reinterpret_cast(cookie); + auto*p = reinterpret_cast(cookie); + const UINT32 cIORequests = p->GetTotalRequestCount(); + const size_t cTargets = p->vTargets.size(); HANDLE hCompletionPort = nullptr; - bool fMeasureLatency = p->pTimeSpan->GetMeasureLatency(); - bool fCalculateIopsStdDev = p->pTimeSpan->GetCalculateIopsStdDev(); + bool fUseThrougputMeter = false; + size_t iTarget = 0; + + // + // A single file can be specified in multiple targets, so only open one + // handle for each unique file. + // + + vector vhUniqueHandles; + map< UniqueTarget, UINT32 > mHandleMap; + + const bool fCalculateIopsStdDev = p->pTimeSpan->GetCalculateIopsStdDev(); UINT64 ioBucketDuration = 0; UINT32 expectedNumberOfBuckets = 0; if(fCalculateIopsStdDev) { - UINT32 ioBucketDurationInMilliseconds = p->pTimeSpan->GetIoBucketDurationInMilliseconds(); + const UINT32 ioBucketDurationInMilliseconds = p->pTimeSpan->GetIoBucketDurationInMilliseconds(); ioBucketDuration = PerfTimer::MillisecondsToPerfTime(ioBucketDurationInMilliseconds); expectedNumberOfBuckets = Util::QuotientCeiling(p->pTimeSpan->GetDuration() * 1000, ioBucketDurationInMilliseconds); } - //set random seed (each thread has a different one) - srand(p->ulRandSeed); - // apply affinity. The specific assignment is provided in the thread profile up front. if (!p->pTimeSpan->GetDisableAffinity()) { @@ -996,9 +993,9 @@ DWORD WINAPI threadFunc(LPVOID cookie) } // adjust thread token if large pages are needed - for (auto pTarget = p->vTargets.begin(); pTarget != p->vTargets.end(); pTarget++) + for (auto& vTarget : p->vTargets) { - if (pTarget->GetUseLargePages()) + if (vTarget.GetUseLargePages()) { if (!SetPrivilege(SE_LOCK_MEMORY_NAME)) { @@ -1010,8 +1007,7 @@ DWORD WINAPI threadFunc(LPVOID cookie) } // TODO: open files - size_t iTarget = 0; - for (auto pTarget = p->vTargets.begin(); pTarget != p->vTargets.end(); pTarget++) + for (auto pTarget = p->vTargets.begin(); pTarget != p->vTargets.end(); ++pTarget) { bool fPhysical = false; bool fPartition = false; @@ -1022,7 +1018,7 @@ DWORD WINAPI threadFunc(LPVOID cookie) const char *fname = nullptr; //filename (can point to physFN) char physFN[32]; //disk/partition name - if (NULL == filename || NULL == *(filename)) + if (nullptr == filename || NULL == *(filename)) { PrintError("FATAL ERROR: invalid filename\n"); fOk = false; @@ -1032,18 +1028,18 @@ DWORD WINAPI threadFunc(LPVOID cookie) //check if it is a physical drive if ('#' == *filename && NULL != *(filename + 1)) { - UINT32 nDriveNo = (UINT32)atoi(filename + 1); + const auto nDriveNo = static_cast(atoi(filename + 1)); fPhysical = true; - sprintf_s(physFN, 32, "\\\\.\\PhysicalDrive%u", nDriveNo); + sprintf_s(physFN, 32, R"(\\.\PhysicalDrive%u)", nDriveNo); fname = physFN; } //check if it is a partition - if (!fPhysical && NULL != *(filename + 1) && NULL == *(filename + 2) && isalpha((unsigned char)filename[0]) && ':' == filename[1]) + if (!fPhysical && NULL != *(filename + 1) && NULL == *(filename + 2) && isalpha(static_cast(filename[0])) && ':' == filename[1]) { fPartition = true; - sprintf_s(physFN, 32, "\\\\.\\%c:", filename[0]); + sprintf_s(physFN, 32, R"(\\.\%c:)", filename[0]); fname = physFN; } @@ -1054,8 +1050,8 @@ DWORD WINAPI threadFunc(LPVOID cookie) } // get/set file flags - DWORD dwFlags = pTarget->GetCreateFlags(p->vTargets.size() > 1); - DWORD dwDesiredAccess = 0; + const DWORD dwFlags = pTarget->GetCreateFlags(cIORequests > 1); + DWORD dwDesiredAccess; if (pTarget->GetWriteRatio() == 0) { dwDesiredAccess = GENERIC_READ; @@ -1069,47 +1065,63 @@ DWORD WINAPI threadFunc(LPVOID cookie) dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; } - HANDLE hFile = CreateFile(fname, - dwDesiredAccess, - FILE_SHARE_READ | FILE_SHARE_WRITE, - nullptr, //security - OPEN_EXISTING, - dwFlags, //flags - nullptr); //template file - if (INVALID_HANDLE_VALUE == hFile) - { - // TODO: error out - PrintError("Error opening file: %s [%u]\n", sPath.c_str(), GetLastError()); - fOk = false; - goto cleanup; - } - - if (pTarget->GetCacheMode() == TargetCacheMode::DisableLocalCache) - { - DWORD Status = DisableLocalCache(hFile); - if (Status != ERROR_SUCCESS) + HANDLE hFile; + UniqueTarget ut; + ut.path = sPath; + ut.priority = pTarget->GetIOPriorityHint(); + ut.caching = pTarget->GetCacheMode(); + ut.dwDesiredAccess = dwDesiredAccess; + ut.dwFlags = dwFlags; + + if (mHandleMap.find(ut) == mHandleMap.end()) { + hFile = CreateFile(fname, + dwDesiredAccess, + FILE_SHARE_READ | FILE_SHARE_WRITE, + nullptr, //security + OPEN_EXISTING, + dwFlags, //flags + nullptr); //template file + if (INVALID_HANDLE_VALUE == hFile) { - PrintError("Failed to disable local caching (error %u). NOTE: only supported on remote filesystems with Windows 8 or newer.\n", Status); + // TODO: error out + PrintError("Error opening file: %s [%u]\n", sPath.c_str(), GetLastError()); fOk = false; goto cleanup; } - } - p->vhTargets.push_back(hFile); + if (pTarget->GetCacheMode() == TargetCacheMode::DisableLocalCache) + { + const DWORD Status = DisableLocalCache(hFile); + if (Status != ERROR_SUCCESS) + { + PrintError("Failed to disable local caching (error %u). NOTE: only supported on remote filesystems with Windows 8 or newer.\n", Status); + fOk = false; + goto cleanup; + } + } - //set IO priority - if (pTarget->GetIOPriorityHint() != IoPriorityHintNormal) - { - _declspec(align(8)) FILE_IO_PRIORITY_HINT_INFO hintInfo; - hintInfo.PriorityHint = pTarget->GetIOPriorityHint(); - if (!SetFileInformationByHandle(hFile, FileIoPriorityHintInfo, &hintInfo, sizeof(hintInfo))) + //set IO priority + if (pTarget->GetIOPriorityHint() != IoPriorityHintNormal) { - PrintError("Error setting IO priority for file: %s [%u]\n", sPath.c_str(), GetLastError()); - fOk = false; - goto cleanup; + _declspec(align(8)) FILE_IO_PRIORITY_HINT_INFO hintInfo; + hintInfo.PriorityHint = pTarget->GetIOPriorityHint(); + if (!SetFileInformationByHandle(hFile, FileIoPriorityHintInfo, &hintInfo, sizeof(hintInfo))) + { + PrintError("Error setting IO priority for file: %s [%u]\n", sPath.c_str(), GetLastError()); + fOk = false; + goto cleanup; + } } + + mHandleMap[ut] = static_cast(vhUniqueHandles.size()); + vhUniqueHandles.push_back(hFile); + } + else { + hFile = vhUniqueHandles[mHandleMap[ut]]; } + p->vhTargets.push_back(hFile); + // obtain file/disk/partition size { UINT64 fsize = 0; //file size @@ -1168,7 +1180,7 @@ DWORD WINAPI threadFunc(LPVOID cookie) p->vullFileSizes.push_back(fsize); } - UINT64 startingFileOffset = IORequestGenerator::GetThreadBaseFileOffset(*p, iTarget); + const UINT64 startingFileOffset = pTarget->GetThreadBaseFileOffsetInBytes(p->ulRelativeThreadNo); // test whether the file is large enough for this thread to do work if (startingFileOffset + pTarget->GetBlockSizeInBytes() >= p->vullFileSizes[iTarget]) @@ -1217,10 +1229,6 @@ DWORD WINAPI threadFunc(LPVOID cookie) printfv(p->pProfile->GetVerbose(), "thread %u started (random seed: %u)\n", p->ulThreadNo, p->ulRandSeed); - // TODO: check if it's still used - LARGE_INTEGER li; //used for setting file positions, etc. - DWORD dwIOCnt = 0; //number of completed I/O operations since last progress dot - p->vullPrivateSequentialOffsets.clear(); p->vullPrivateSequentialOffsets.resize(p->vTargets.size()); p->pResults->vTargetResults.clear(); @@ -1237,283 +1245,176 @@ DWORD WINAPI threadFunc(LPVOID cookie) } // - // synchronous access + // fill the IORequest structures // - //FUTURE EXTENSION: enable asynchronous I/O even if only 1 outstanding I/O per file (requires another parameter) - - if (p->vTargets.size() == 1 && p->vTargets[0].GetRequestCount() == 1) + + p->vIORequest.clear(); + + if (p->pTimeSpan->GetThreadCount() != 0 && + p->pTimeSpan->GetRequestCount() != 0) { - Target *pTarget = &p->vTargets[0]; - DWORD dwBytesTransferred = 0; + p->vIORequest.resize(cIORequests, IORequest(p->pRand)); - //advance file pointer to base file offset - li.QuadPart = IORequestGenerator::GetStartingFileOffset(*p, 0); - printfv(p->pProfile->GetVerbose(), "t[%u] initial I/O op at %I64u (starting in block: %I64u)\n", - p->ulThreadNo, - li.QuadPart, - li.QuadPart / pTarget->GetBlockSizeInBytes()); - //FUTURE EXTENSION: file pointer should be set through OVERLAPPED stucture for consistency with other scenarios (unless this is suspected to be the common way in real scenarios) - if (!SetFilePointerEx(p->vhTargets[0], li, NULL, FILE_BEGIN)) + for (UINT32 iIORequest = 0; iIORequest < cIORequests; iIORequest++) { - PrintError("Error setting file pointer. Error code: %d.\n", GetLastError()); - fOk = false; - goto cleanup; - } + p->vIORequest[iIORequest].SetRequestIndex(iIORequest); - BOOL rslt = FALSE; + for (unsigned int iFile = 0; iFile < p->vTargets.size(); iFile++) + { + Target *pTarget = &p->vTargets[iFile]; + const vector vThreadTargets = pTarget->GetThreadTargets(); + UINT32 ulWeight = pTarget->GetWeight(); - assert(nullptr != p->hStartEvent); + for (auto vThreadTarget : vThreadTargets) + { + if (vThreadTarget.GetThread() == p->ulRelativeThreadNo) + { + if (vThreadTarget.GetWeight() != 0) + { + ulWeight = vThreadTarget.GetWeight(); + } + break; + } + } - //wait for a signal to start - printfv(p->pProfile->GetVerbose(), "thread %u: waiting for a signal to start\n", p->ulThreadNo); - if (WAIT_FAILED == WaitForSingleObject(p->hStartEvent, INFINITE)) - { - PrintError("Waiting for a signal to start failed (error code: %u)\n", GetLastError()); - fOk = false; - goto cleanup; + p->vIORequest[iIORequest].AddTarget(pTarget, ulWeight); + } } - printfv(p->pProfile->GetVerbose(), "thread %u: received signal to start\n", p->ulThreadNo); - - // TODO: check if this is needed - //check if everything is ok - if (g_bError) + } + else + { + for (unsigned int iFile = 0; iFile < p->vTargets.size(); iFile++) { - fOk = false; - goto cleanup; + Target *pTarget = &p->vTargets[iFile]; + + for (DWORD iRequest = 0; iRequest < pTarget->GetRequestCount(); ++iRequest) + { + IORequest ioRequest(p->pRand); + ioRequest.AddTarget(pTarget, 1); + ioRequest.SetRequestIndex(iRequest); + p->vIORequest.push_back(ioRequest); + } } + } - // - // perform work - // - - assert(nullptr != p->vhTargets[0] ); - assert(pTarget->GetBlockSizeInBytes() > 0); - + // + // fill the throughput meter structures + // + for (size_t i = 0; i < cTargets; i++) + { ThroughputMeter throughputMeter; - DWORD dwSleepTime; - + Target *pTarget = &p->vTargets[i]; DWORD dwBurstSize = pTarget->GetBurstSize(); if (p->pTimeSpan->GetThreadCount() > 0) { - dwBurstSize /= p->pTimeSpan->GetThreadCount(); + if (pTarget->GetThreadTargets().empty()) + { + dwBurstSize /= p->pTimeSpan->GetThreadCount(); + } + else + { + dwBurstSize /= static_cast(pTarget->GetThreadTargets().size()); + } } else { dwBurstSize /= pTarget->GetThreadsPerFile(); } - throughputMeter.Start(pTarget->GetThroughputInBytesPerMillisecond(), pTarget->GetBlockSizeInBytes(), pTarget->GetThinkTime(), dwBurstSize); - while(g_bRun && !g_bThreadError) + if (pTarget->GetThroughputInBytesPerMillisecond() > 0 || pTarget->GetThinkTime() > 0) { - if (throughputMeter.IsRunning()) - { - dwSleepTime = throughputMeter.GetSleepTime(); - if (0 != dwSleepTime) - { - Sleep(dwSleepTime); - continue; - } - } - - //start read or write operation (depends of the type of test) - //first access is always performed on base offset (even in case of random access) - - UINT64 ullStartTime = 0; - - if (fMeasureLatency) - { - ullStartTime = PerfTimer::GetTime(); // record IO start time - } - - IOOperation readOrWrite; - readOrWrite = DecideIo(pTarget->GetWriteRatio()); - if (readOrWrite == IOOperation::ReadIO) - { - rslt = ReadFile(p->vhTargets[0], p->GetReadBuffer(0, 0), pTarget->GetBlockSizeInBytes(), &dwBytesTransferred, nullptr); - } - else - { - rslt = WriteFile(p->vhTargets[0], p->GetWriteBuffer(0, 0), pTarget->GetBlockSizeInBytes(), &dwBytesTransferred, nullptr); - } - - if (!rslt) - { - PrintError("t[%u:%u] error during %s error code: %u)\n", p->ulThreadNo, 0, (readOrWrite == IOOperation::ReadIO ? "read" : "write"), GetLastError()); - fOk = false; - goto cleanup; - } - - //check if I/O operation transferred requested number of bytes - if (dwBytesTransferred != pTarget->GetBlockSizeInBytes()) - { - PrintError("Warning: thread %u transfered %u bytes instead of %u bytes\n", p->ulThreadNo, dwBytesTransferred, pTarget->GetBlockSizeInBytes()); - } - - if (throughputMeter.IsRunning()) - { - throughputMeter.Adjust(pTarget->GetBlockSizeInBytes()); - } - - if (*p->pfAccountingOn) - { - p->pResults->vTargetResults[0].Add(dwBytesTransferred, - readOrWrite, - &ullStartTime, - p->pullStartTime, - fMeasureLatency, - fCalculateIopsStdDev); - } - - // check if we should print a progress dot - if (0 != p->pProfile->GetProgress() > 0) - { - ++dwIOCnt; - if (dwIOCnt == p->pProfile->GetProgress()) - { - print("."); - dwIOCnt = 0; - } - } - - li.QuadPart = IORequestGenerator::GetNextFileOffset(*p, 0, li.QuadPart); - - printfv(p->pProfile->GetVerbose(), "t[%u] new I/O op at %I64u (starting in block: %I64u)\n", - p->ulThreadNo, - li.QuadPart, - li.QuadPart / pTarget->GetBlockSizeInBytes()); + fUseThrougputMeter = true; + throughputMeter.Start(pTarget->GetThroughputInBytesPerMillisecond(), pTarget->GetBlockSizeInBytes(), pTarget->GetThinkTime(), dwBurstSize); + } - if (!SetFilePointerEx(p->vhTargets[0], li, NULL, FILE_BEGIN)) - { - PrintError("thread %u: Error setting file pointer\n", p->ulThreadNo); - fOk = false; - goto cleanup; - } + p->vThroughputMeters.push_back(throughputMeter); + } - assert(!g_bError); // at this point we shouldn't be seeing initialization error + if (!fUseThrougputMeter) + { + p->vThroughputMeters.clear(); + } + + //FUTURE EXTENSION: enable asynchronous I/O even if only 1 outstanding I/O per file (requires another parameter) + if (cIORequests == 1) + { + //synchronous IO - no setup needed + } + else if (p->pTimeSpan->GetCompletionRoutines()) + { + //in case of completion routines hEvent field is not used, + //so we can use it to pass a pointer to the thread parameters + for (UINT32 iIORequest = 0; iIORequest < cIORequests; iIORequest++) { + OVERLAPPED *pOverlapped = p->vIORequest[iIORequest].GetOverlapped(); + pOverlapped->hEvent = static_cast(p); } - }//end of synchronous access - // - // overlapped IO operations - // + } else { // - // create IO completion port if not doing completion routines + // create IO completion port if not doing completion routines or synchronous IO // - if (!p->pTimeSpan->GetCompletionRoutines()) + for (auto& vhUniqueHandle : vhUniqueHandles) { - for (unsigned int i = 0; i < p->vTargets.size(); i++) + hCompletionPort = CreateIoCompletionPort(vhUniqueHandle, hCompletionPort, 0, 1); + if (nullptr == hCompletionPort) { - hCompletionPort = CreateIoCompletionPort(p->vhTargets[i], hCompletionPort, 0, 1); - if (nullptr == hCompletionPort) - { - PrintError("unable to create IO completion port (error code: %u)\n", GetLastError()); - fOk = false; - goto cleanup; - } + PrintError("unable to create IO completion port (error code: %u)\n", GetLastError()); + fOk = false; + goto cleanup; } } + } - // - // fill the OVERLAPPED structures - // - - UINT32 cOverlapped = p->GetTotalRequestCount(); - - p->vOverlapped.clear(); - p->vOverlapped.resize(cOverlapped); - - p->vdwIoType.clear(); - p->vdwIoType.resize(cOverlapped); - - p->vIoStartTimes.clear(); - p->vIoStartTimes.resize(cOverlapped); - - p->vFirstOverlappedIdForTargetId.clear(); - - UINT32 iOverlapped = 0; - for (unsigned int iFile = 0; iFile < p->vTargets.size(); iFile++) - { - Target *pTarget = &p->vTargets[iFile]; - - li.QuadPart = IORequestGenerator::GetStartingFileOffset(*p, iFile); - p->vFirstOverlappedIdForTargetId.push_back(iOverlapped); - - for (DWORD iRequest = 0; iRequest < pTarget->GetRequestCount(); ++iRequest) - { - // on increment, get next except in the case of parallel async, which all start at the initial offset. - // note that we must only do this when needed, since it will advance global state. - if (iRequest != 0 && !pTarget->GetUseParallelAsyncIO()) - { - li.QuadPart = IORequestGenerator::GetNextFileOffset(*p, iFile, li.QuadPart); - } - - p->vOverlappedIdToTargetId.push_back(iFile); - if (!p->pTimeSpan->GetCompletionRoutines()) - { - p->vOverlapped[iOverlapped].hEvent = nullptr; //we don't need event, because we use IO completion port - } - else - { - //in case of completion routines hEvent field is not used, - //so we can use it to pass a pointer to the thread parameters - p->vOverlapped[iOverlapped].hEvent = (HANDLE)p; - } - - printfv(p->pProfile->GetVerbose(), "t[%u:%u] initial I/O op at %I64u (starting in block: %I64u)\n", - p->ulThreadNo, - iFile, - li.QuadPart, - li.QuadPart / pTarget->GetBlockSizeInBytes()); - - p->vOverlapped[iOverlapped].Offset = li.LowPart; - p->vOverlapped[iOverlapped].OffsetHigh = li.HighPart; + // + // wait for a signal to start + // + printfv(p->pProfile->GetVerbose(), "thread %u: waiting for a signal to start\n", p->ulThreadNo); + if( WAIT_FAILED == WaitForSingleObject(p->hStartEvent, INFINITE) ) + { + PrintError("Waiting for a signal to start failed (error code: %u)\n", GetLastError()); + fOk = false; + goto cleanup; + } + printfv(p->pProfile->GetVerbose(), "thread %u: received signal to start\n", p->ulThreadNo); - ++iOverlapped; - } - } + //check if everything is ok + if (g_bError) + { + fOk = false; + goto cleanup; + } - // - // wait for a signal to start - // - printfv(p->pProfile->GetVerbose(), "thread %u: waiting for a signal to start\n", p->ulThreadNo); - if( WAIT_FAILED == WaitForSingleObject(p->hStartEvent, INFINITE) ) + //error handling and memory freeing is done in doWorkUsingIOCompletionPorts and doWorkUsingCompletionRoutines + if (cIORequests == 1) + { + // use synchronous IO (it will also clse the event) + if (!doWorkUsingSynchronousIO(p)) { - PrintError("Waiting for a signal to start failed (error code: %u)\n", GetLastError()); fOk = false; goto cleanup; } - printfv(p->pProfile->GetVerbose(), "thread %u: received signal to start\n", p->ulThreadNo); - - //check if everything is ok - if (g_bError) + } + else if (!p->pTimeSpan->GetCompletionRoutines()) + { + // use IO Completion Ports (it will also close the I/O completion port) + if (!doWorkUsingIOCompletionPorts(p, hCompletionPort)) { fOk = false; goto cleanup; } - - //error handling and memory freeing is done in doWorkUsingIOCompletionPorts and doWorkUsingCompletionRoutines - if (!p->pTimeSpan->GetCompletionRoutines()) - { - // use IO Completion Ports (it will also close the I/O completion port) - if (!doWorkUsingIOCompletionPorts(p, hCompletionPort)) - { - fOk = false; - goto cleanup; - } - } - else + } + else + { + //use completion routines + if (!doWorkUsingCompletionRoutines(p)) { - //use completion routines - if (!doWorkUsingCompletionRoutines(p)) - { - fOk = false; - goto cleanup; - } + fOk = false; + goto cleanup; } + } - assert(!g_bError); // at this point we shouldn't be seeing initialization error - } // end of overlapped IO operations + assert(!g_bError); // at this point we shouldn't be seeing initialization error // save results @@ -1524,19 +1425,19 @@ DWORD WINAPI threadFunc(LPVOID cookie) } // free memory allocated with VirtualAlloc - for (auto i = p->vpDataBuffers.begin(); i != p->vpDataBuffers.end(); i++) + for (auto& vpDataBuffer : p->vpDataBuffers) { - if (nullptr != *i) + if (nullptr != vpDataBuffer) { #pragma prefast(suppress:6001, "Prefast does not understand this vector will only contain validly allocated buffer pointers") - VirtualFree(*i, 0, MEM_RELEASE); + VirtualFree(vpDataBuffer, 0, MEM_RELEASE); } } // close files - for (auto i = p->vhTargets.begin(); i != p->vhTargets.end(); i++) + for (auto& vhUniqueHandle : vhUniqueHandles) { - CloseHandle(*i); + CloseHandle(vhUniqueHandle); } // close completion ports @@ -1545,6 +1446,7 @@ DWORD WINAPI threadFunc(LPVOID cookie) CloseHandle(hCompletionPort); } + delete p->pRand; delete p; // notify master thread that we've finished @@ -1554,7 +1456,7 @@ DWORD WINAPI threadFunc(LPVOID cookie) } /*****************************************************************************/ -struct ETWSessionInfo IORequestGenerator::_GetResultETWSession(const EVENT_TRACE_PROPERTIES *pTraceProperties) const +struct ETWSessionInfo IORequestGenerator::_GetResultETWSession(const EVENT_TRACE_PROPERTIES *pTraceProperties) { struct ETWSessionInfo session = {}; if (nullptr != pTraceProperties) @@ -1576,8 +1478,7 @@ struct ETWSessionInfo IORequestGenerator::_GetResultETWSession(const EVENT_TRACE DWORD IORequestGenerator::_CreateDirectoryPath(const char *pszPath) const { - char *c = nullptr; //variable used to browse the path - char dirPath[MAX_PATH]; //copy of the path (it will be altered) + char dirPath[MAX_PATH]; //copy of the path (it will be altered) //only support absolute paths that specify the drive letter if (pszPath[0] == '\0' || pszPath[1] != ':') @@ -1590,7 +1491,7 @@ DWORD IORequestGenerator::_CreateDirectoryPath(const char *pszPath) const return ERROR_BUFFER_OVERFLOW; } - c = dirPath; + char *c = dirPath; while('\0' != *c) { if ('\\' == *c) @@ -1602,7 +1503,7 @@ DWORD IORequestGenerator::_CreateDirectoryPath(const char *pszPath) const //create directory if it doesn't exist if (GetFileAttributes(dirPath) == INVALID_FILE_ATTRIBUTES) { - if (CreateDirectory(dirPath, NULL) == FALSE) + if (CreateDirectory(dirPath, nullptr) == FALSE) { return GetLastError(); } @@ -1635,7 +1536,7 @@ bool IORequestGenerator::_CreateFile(UINT64 ullFileSize, const char *pszFilename // there are various forms of paths we do not support creating subdir hierarchies // for - relative and unc paths specifically. this is fine, and not neccesary to // warn about. we can add support in the future. - DWORD dwError = _CreateDirectoryPath(pszFilename); + const DWORD dwError = _CreateDirectoryPath(pszFilename); if (dwError != ERROR_SUCCESS && dwError != ERROR_NOT_SUPPORTED) { PrintError("WARNING: Could not create intermediate directory (error code: %u)\n", dwError); @@ -1708,31 +1609,28 @@ bool IORequestGenerator::_CreateFile(UINT64 ullFileSize, const char *pszFilename return false; } - UINT32 ulBufSize; - UINT64 ullRemainSize; - - ulBufSize = 1024*1024; - if (ullFileSize < (UINT64)ulBufSize) + UINT32 ulBufSize = 1024*1024; + if (ullFileSize < static_cast(ulBufSize)) { - ulBufSize = (UINT32)ullFileSize; + ulBufSize = static_cast(ullFileSize); } vector vBuf(ulBufSize); for (UINT32 i=0; i(i & 0xFF); } - ullRemainSize = ullFileSize; + UINT64 ullRemainSize = ullFileSize; while (ullRemainSize > 0) { DWORD dwBytesWritten; - if ((UINT64)ulBufSize > ullRemainSize) + if (static_cast(ulBufSize) > ullRemainSize) { - ulBufSize = (UINT32)ullRemainSize; + ulBufSize = static_cast(ullRemainSize); } - if (!WriteFile(hFile, &vBuf[0], ulBufSize, &dwBytesWritten, NULL)) + if (!WriteFile(hFile, &vBuf[0], ulBufSize, &dwBytesWritten, nullptr)) { PrintError("Error while writng during file creation (error code: %u)\n", GetLastError()); CloseHandle(hFile); @@ -1756,7 +1654,7 @@ bool IORequestGenerator::_CreateFile(UINT64 ullFileSize, const char *pszFilename LARGE_INTEGER li; if( GetFileSizeEx(hFile, &li) ) { - assert(li.QuadPart == (LONGLONG)ullFileSize); + assert(li.QuadPart == static_cast(ullFileSize)); } #endif @@ -1766,11 +1664,11 @@ bool IORequestGenerator::_CreateFile(UINT64 ullFileSize, const char *pszFilename } /*****************************************************************************/ -void IORequestGenerator::_TerminateWorkerThreads(vector& vhThreads) const +void IORequestGenerator::_TerminateWorkerThreads(vector& vhThreads) { for (UINT32 x = 0; x < vhThreads.size(); ++x) { - assert(NULL != vhThreads[x]); + assert(nullptr != vhThreads[x]); #pragma warning( push ) #pragma warning( disable : 6258 ) if (!TerminateThread(vhThreads[x], 0)) @@ -1781,11 +1679,11 @@ void IORequestGenerator::_TerminateWorkerThreads(vector& vhThreads) cons } } /*****************************************************************************/ -void IORequestGenerator::_AbortWorkerThreads(HANDLE hStartEvent, vector& vhThreads) const +void IORequestGenerator::_AbortWorkerThreads(HANDLE hStartEvent, vector& vhThreads) { - assert(NULL != hStartEvent); + assert(nullptr != hStartEvent); - if (NULL == hStartEvent) + if (nullptr == hStartEvent) { return; } @@ -1807,7 +1705,7 @@ void IORequestGenerator::_AbortWorkerThreads(HANDLE hStartEvent, vector& } /*****************************************************************************/ -bool IORequestGenerator::_StopETW(bool fUseETW, TRACEHANDLE hTraceSession) const +bool IORequestGenerator::_StopETW(bool fUseETW, TRACEHANDLE hTraceSession) { bool fOk = true; if (fUseETW) @@ -1844,6 +1742,7 @@ void IORequestGenerator::_InitializeGlobalParameters() bool IORequestGenerator::_PrecreateFiles(Profile& profile) const { bool fOk = true; + if (profile.GetPrecreateFiles() != PrecreateFiles::None) { vector vFilesToCreate = _GetFilesToPrecreate(profile); @@ -1863,6 +1762,7 @@ bool IORequestGenerator::_PrecreateFiles(Profile& profile) const profile.MarkFilesAsPrecreated(vCreatedFiles); } } + return fOk; } @@ -1884,7 +1784,7 @@ bool IORequestGenerator::GenerateRequests(Profile& profile, IResultParser& resul } // TODO: show results only for timespans that succeeded - SystemInformation system; + const SystemInformation system; string sResults = resultParser.ParseResults(profile, system, vResults); print("%s", sResults.c_str()); } @@ -1898,7 +1798,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co //FUTURE EXTENSION: add a check if the folder is compressed (cache is always enabled in case of compressed folders) //check if I/O request generator is already running - LONG lGenState = InterlockedExchange(&g_lGeneratorRunning, 1); + const LONG lGenState = InterlockedExchange(&g_lGeneratorRunning, 1); if (1 == lGenState) { PrintError("FATAL ERROR: I/O Request Generator already running\n"); @@ -1908,12 +1808,11 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co //initialize all global parameters (in case of second run, after the first one is finished) _InitializeGlobalParameters(); - HANDLE hStartEvent = nullptr; // start event (used to inform the worker threads that they should start the work) - HANDLE hEndEvent = nullptr; // end event (used only in case of completin routines (not for IO Completion Ports)) + HANDLE hEndEvent = nullptr; // end event (used only in case of completin routines (not for IO Completion Ports)) memset(&g_EtwEventCounters, 0, sizeof(struct ETWEventCounters)); // reset all etw event counters - bool fUseETW = false; //true if user wants ETW + const bool fUseETW = false; //true if user wants ETW // // load dlls @@ -1927,23 +1826,24 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co //FUTURE EXTENSION: check for conflicts in alignment (when cache is turned off only sector aligned I/O are permitted) //FUTURE EXTENSION: check if file sizes are enough to have at least first requests not wrapping around - + + Random r; vector vTargets = timeSpan.GetTargets(); // allocate memory for random data write buffers - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - if ((i->GetRandomDataWriteBufferSize() > 0) && !i->AllocateAndFillRandomDataWriteBuffer()) + if ((vTarget.GetRandomDataWriteBufferSize() > 0) && !vTarget.AllocateAndFillRandomDataWriteBuffer(&r)) { return false; } } // check if user wanted to create a file - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - if ((i->GetFileSize() > 0) && (i->GetPrecreated() == false)) + if ((vTarget.GetFileSize() > 0) && (!vTarget.GetPrecreated())) { - string str = i->GetPath(); + string str = vTarget.GetPath(); if (str.empty()) { PrintError("You have to provide a filename\n"); @@ -1957,7 +1857,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co } //create only regular files - if (!_CreateFile(i->GetFileSize(), str.c_str(), i->GetZeroWriteBuffers(), profile.GetVerbose())) + if (!_CreateFile(vTarget.GetFileSize(), str.c_str(), vTarget.GetZeroWriteBuffers(), profile.GetVerbose())) { return false; } @@ -1968,9 +1868,9 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co UINT32 cThreads = timeSpan.GetThreadCount(); if (cThreads < 1) { - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - cThreads += i->GetThreadsPerFile(); + cThreads += vTarget.GetThreadsPerFile(); } } @@ -1988,8 +1888,8 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co //create start event // - hStartEvent = CreateEvent(NULL, TRUE, FALSE, ""); - if (NULL == hStartEvent) + HANDLE hStartEvent = CreateEvent(nullptr, TRUE, FALSE, ""); // start event (used to inform the worker threads that they should start the work) + if (nullptr == hStartEvent) { PrintError("Error creating the start event\n"); return false; @@ -2000,14 +1900,22 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co // if (timeSpan.GetCompletionRoutines()) { - hEndEvent = CreateEvent(NULL, TRUE, FALSE, ""); - if (NULL == hEndEvent) + hEndEvent = CreateEvent(nullptr, TRUE, FALSE, ""); + if (nullptr == hEndEvent) { PrintError("Error creating the end event\n"); + _AbortWorkerThreads(hStartEvent, vhThreads); return false; } } + // + // set to high priority to ensure the controller thread gets to run immediately + // when signalled. + // + + SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); + // // create the threads // @@ -2030,7 +1938,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co for (UINT32 iThread = 0; iThread < cThreads; ++iThread) { printfv(profile.GetVerbose(), "creating thread %u\n", iThread); - ThreadParameters *cookie = new ThreadParameters(); // threadFunc is going to free the memory + auto*cookie = new ThreadParameters(); // threadFunc is going to free the memory if (nullptr == cookie) { PrintError("FATAL ERROR: could not allocate memory\n"); @@ -2038,16 +1946,47 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co return false; } + // each thread has a different random seed + auto*pRand = new Random(timeSpan.GetRandSeed() + iThread); + if (nullptr == pRand) + { + PrintError("FATAL ERROR: could not allocate memory\n"); + _AbortWorkerThreads(hStartEvent, vhThreads); + delete cookie; + return false; + } + UINT32 ulRelativeThreadNo = 0; if (timeSpan.GetThreadCount() > 0) { - // fixed thread mode: all threads operate on all files + // fixed thread mode: threads operate on specified files // and receive the entire seq index array. // relative thread number is the same as thread number. - cookie->vTargets = vTargets; cookie->pullSharedSequentialOffsets = &vullSharedSequentialOffsets[0]; ulRelativeThreadNo = iThread; + for (auto& vTarget : vTargets) + { + const vector vThreadTargets = vTarget.GetThreadTargets(); + + // no thread targets specified - add to all threads + if (vThreadTargets.empty()) + { + cookie->vTargets.push_back(vTarget); + } + else + { + // check if the target should be added to the current thread + for (auto vThreadTarget : vThreadTargets) + { + if (vThreadTarget.GetThread() == iThread) + { + cookie->vTargets.push_back(vTarget); + break; + } + } + } + } } else { @@ -2056,7 +1995,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co auto psi = vullSharedSequentialOffsets.begin(); for (auto i = vTargets.begin(); i != vTargets.end(); - i++, psi++) + ++i, ++psi) { // per-file thread mode: groups of threads operate on individual files // and receive the specific seq index for their file (note: singular). @@ -2093,12 +2032,13 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co cookie->pfAccountingOn = &fAccountingOn; cookie->pullStartTime = &ullStartTime; cookie->ulRandSeed = timeSpan.GetRandSeed() + iThread; // each thread has a different random seed + cookie->pRand = pRand; //Set thread group and proc affinity // Default: Round robin cores in order of groups, starting at group 0. // Fill each group before moving to next. - if (vAffinity.size() == 0) + if (vAffinity.empty()) { cookie->wGroupNum = wGroupCtr; cookie->bProcNum = bProcCtr; @@ -2109,7 +2049,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co // Assigned affinity. Round robin through the assignment list. else { - ULONG i = iThread % vAffinity.size(); + const ULONG i = iThread % vAffinity.size(); cookie->wGroupNum = vAffinity[i].wGroup; cookie->bProcNum = vAffinity[i].bProc; @@ -2120,13 +2060,14 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co InterlockedIncrement(&g_lRunningThreadsCount); DWORD dwThreadId; - HANDLE hThread = CreateThread(NULL, 64 * 1024, threadFunc, cookie, 0, &dwThreadId); - if (NULL == hThread) + HANDLE hThread = CreateThread(nullptr, 64 * 1024, threadFunc, cookie, 0, &dwThreadId); + if (nullptr == hThread) { //in case of error terminate running worker threads PrintError("ERROR: unable to create thread (error code: %u)\n", GetLastError()); InterlockedDecrement(&g_lRunningThreadsCount); _AbortWorkerThreads(hStartEvent, vhThreads); + delete pRand; delete cookie; return false; } @@ -2135,11 +2076,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co vhThreads[iThread] = hThread; } - //FUTURE EXTENSION: SetPriorityClass HIGH/ABOVE_NORMAL - //FUTURE EXTENSION: lower priority so the worker threads will initialize (-2) - //FUTURE EXTENSION: raise priority so this thread will run after the time end - - if (STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, hStartEvent) && (NULL != pSynch->hStartEvent)) + if (STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, hStartEvent) && (nullptr != pSynch->hStartEvent)) { if (WAIT_OBJECT_0 != WaitForSingleObject(pSynch->hStartEvent, INFINITE)) { @@ -2152,13 +2089,13 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co // // get cycle count (it will be used to calculate actual work time) // - DWORD dwWaitStatus = 0; + DWORD dwWaitStatus; //bAccountingOn = FALSE; // clear the accouning flag so that threads didn't count what they do while in the warmup phase - BOOL bSynchStop = STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, hStopEvent) && (NULL != pSynch->hStopEvent); + const BOOL bSynchStop = STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, hStopEvent) && (nullptr != pSynch->hStopEvent); BOOL bBreak = FALSE; - PEVENT_TRACE_PROPERTIES pETWSession = NULL; + PEVENT_TRACE_PROPERTIES pETWSession = nullptr; printfv(profile.GetVerbose(), "starting warm up...\n"); // @@ -2179,7 +2116,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co { if (bSynchStop) { - assert(NULL != pSynch->hStopEvent); + assert(nullptr != pSynch->hStopEvent); dwWaitStatus = WaitForSingleObject(pSynch->hStopEvent, 1000 * timeSpan.GetWarmup()); if (WAIT_OBJECT_0 != dwWaitStatus && WAIT_TIMEOUT != dwWaitStatus) { @@ -2215,7 +2152,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co return false; } - if (NULL == CreateThread(NULL, 64 * 1024, etwThreadFunc, NULL, 0, NULL)) + if (nullptr == CreateThread(nullptr, 64 * 1024, etwThreadFunc, nullptr, 0, nullptr)) /* result of CreateThread not stored*/ { PrintError("Warning: unable to create thread for ETW session\n"); _TerminateWorkerThreads(vhThreads); @@ -2230,7 +2167,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co if (_GetSystemPerfInfo(&vPerfInit[0], g_SystemInformation.processorTopology._ulProcCount) == FALSE) { PrintError("Error reading performance counters\n"); - _StopETW(fUseETW, hTraceSession); + _StopETW(fUseETW, hTraceSession); /* result unused */ _TerminateWorkerThreads(vhThreads); return false; } @@ -2242,7 +2179,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co // notify the front-end that the test is about to start; // do it before starting timing in order not to perturb measurements // - if (STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, pfnCallbackTestStarted) && (NULL != pSynch->pfnCallbackTestStarted)) + if (STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, pfnCallbackTestStarted) && (nullptr != pSynch->pfnCallbackTestStarted)) { pSynch->pfnCallbackTestStarted(); } @@ -2257,12 +2194,12 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co assert(timeSpan.GetDuration() > 0); if (bSynchStop) { - assert(NULL != pSynch->hStopEvent); + assert(nullptr != pSynch->hStopEvent); dwWaitStatus = WaitForSingleObject(pSynch->hStopEvent, 1000 * timeSpan.GetDuration()); if (WAIT_OBJECT_0 != dwWaitStatus && WAIT_TIMEOUT != dwWaitStatus) { PrintError("Error during WaitForSingleObject\n"); - _StopETW(fUseETW, hTraceSession); + _StopETW(fUseETW, hTraceSession); /* result unused */ _TerminateWorkerThreads(vhThreads); //FUTURE EXTENSION: worker threads should have a chance to free allocated memory (see also other places calling terminateWorkerThreads()) return FALSE; } @@ -2282,7 +2219,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co // notify the front-end that the test has just finished; // do it after stopping timing in order not to perturb measurements // - if (STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, pfnCallbackTestFinished) && (NULL != pSynch->pfnCallbackTestFinished)) + if (STRUCT_SYNCHRONIZATION_SUPPORTS(pSynch, pfnCallbackTestFinished) && (nullptr != pSynch->pfnCallbackTestFinished)) { pSynch->pfnCallbackTestFinished(); } @@ -2290,28 +2227,10 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co if (_GetSystemPerfInfo(&vPerfDone[0], g_SystemInformation.processorTopology._ulProcCount) == FALSE) { PrintError("Error getting performance counters\n"); - _StopETW(fUseETW, hTraceSession); + _StopETW(fUseETW, hTraceSession); /* result unused */ _TerminateWorkerThreads(vhThreads); return false; } - - // - // stop etw session - // - if (fUseETW) - { - printfv(profile.GetVerbose(), "stopping ETW session\n"); - pETWSession = StopETWSession(hTraceSession); - if (NULL == pETWSession) - { - PrintError("Error stopping ETW session\n"); - return false; - } - else - { - free(pETWSession); - } - } } else { @@ -2323,7 +2242,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co { if (bSynchStop) { - assert(NULL != pSynch->hStopEvent); + assert(nullptr != pSynch->hStopEvent); dwWaitStatus = WaitForSingleObject(pSynch->hStopEvent, 1000 * timeSpan.GetCooldown()); if (WAIT_OBJECT_0 != dwWaitStatus && WAIT_TIMEOUT != dwWaitStatus) { @@ -2346,7 +2265,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co g_bRun = FALSE; if (timeSpan.GetCompletionRoutines()) { - if (!SetEvent(hEndEvent)) + if ((hEndEvent == nullptr) || !SetEvent(hEndEvent)) { PrintError("Error signaling end event\n"); // stopETW(bUseETW, hTraceSession); @@ -2377,12 +2296,12 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co // close events' handles // CloseHandle(hStartEvent); - hStartEvent = NULL; + hStartEvent = nullptr; - if (NULL != hEndEvent) + if (nullptr != hEndEvent) { CloseHandle(hEndEvent); - hEndEvent = NULL; + hEndEvent = nullptr; } //FUTURE EXTENSION: hStartEvent and hEndEvent should be closed in case of error too @@ -2394,16 +2313,16 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co assert(vPerfDone[p].IdleTime.QuadPart >= vPerfInit[p].IdleTime.QuadPart); assert(vPerfDone[p].KernelTime.QuadPart >= vPerfInit[p].KernelTime.QuadPart); assert(vPerfDone[p].UserTime.QuadPart >= vPerfInit[p].UserTime.QuadPart); - //assert(vPerfDone[p].Reserved1[0].QuadPart >= vPerfInit[p].Reserved1[0].QuadPart); - //assert(vPerfDone[p].Reserved1[1].QuadPart >= vPerfInit[p].Reserved1[1].QuadPart); - //assert(vPerfDone[p].Reserved2 >= vPerfInit[p].Reserved2); + assert(vPerfDone[p].Reserved1[0].QuadPart >= vPerfInit[p].Reserved1[0].QuadPart); + assert(vPerfDone[p].Reserved1[1].QuadPart >= vPerfInit[p].Reserved1[1].QuadPart); + assert(vPerfDone[p].Reserved2 >= vPerfInit[p].Reserved2); vPerfDiff[p].IdleTime.QuadPart = vPerfDone[p].IdleTime.QuadPart - vPerfInit[p].IdleTime.QuadPart; vPerfDiff[p].KernelTime.QuadPart = vPerfDone[p].KernelTime.QuadPart - vPerfInit[p].KernelTime.QuadPart; vPerfDiff[p].UserTime.QuadPart = vPerfDone[p].UserTime.QuadPart - vPerfInit[p].UserTime.QuadPart; - //vPerfDiff[p].Reserved1[0].QuadPart = vPerfDone[p].Reserved1[0].QuadPart - vPerfInit[p].Reserved1[0].QuadPart; - //vPerfDiff[p].Reserved1[1].QuadPart = vPerfDone[p].Reserved1[1].QuadPart - vPerfInit[p].Reserved1[1].QuadPart; - //vPerfDiff[p].Reserved2 = vPerfDone[p].Reserved2 - vPerfInit[p].Reserved2; + vPerfDiff[p].Reserved1[0].QuadPart = vPerfDone[p].Reserved1[0].QuadPart - vPerfInit[p].Reserved1[0].QuadPart; + vPerfDiff[p].Reserved1[1].QuadPart = vPerfDone[p].Reserved1[1].QuadPart - vPerfInit[p].Reserved1[1].QuadPart; + vPerfDiff[p].Reserved2 = vPerfDone[p].Reserved2 - vPerfInit[p].Reserved2; } // @@ -2439,9 +2358,9 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co } // free memory used by random data write buffers - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - i->FreeRandomDataWriteBuffer(); + vTarget.FreeRandomDataWriteBuffer(); } // TODO: this won't catch error cases, which exit early @@ -2449,7 +2368,7 @@ bool IORequestGenerator::_GenerateRequestsForTimeSpan(const Profile& profile, co return true; } -vector IORequestGenerator::_GetFilesToPrecreate(const Profile& profile) const +vector IORequestGenerator::_GetFilesToPrecreate(const Profile& profile) { vector vFilesToCreate; const vector& vTimeSpans = profile.GetTimeSpans(); @@ -2468,10 +2387,10 @@ vector IORequestGenerator::_Get } } - PrecreateFiles filter = profile.GetPrecreateFiles(); + const PrecreateFiles filter = profile.GetPrecreateFiles(); for (auto fileMapEntry : filesMap) { - if (fileMapEntry.second.size() > 0) + if (!fileMapEntry.second.empty()) { UINT64 ullLastNonZeroSize = fileMapEntry.second[0].ullFileSize; UINT64 ullMaxSize = fileMapEntry.second[0].ullFileSize; @@ -2527,3 +2446,4 @@ vector IORequestGenerator::_Get return vFilesToCreate; } + diff --git a/IORequestGenerator/OverlappedQueue.cpp b/IORequestGenerator/OverlappedQueue.cpp index 23fbd36..251bb6b 100644 --- a/IORequestGenerator/OverlappedQueue.cpp +++ b/IORequestGenerator/OverlappedQueue.cpp @@ -28,9 +28,9 @@ SOFTWARE. */ #include "OverlappedQueue.h" -#include +#include -OverlappedQueue::OverlappedQueue(void) : +OverlappedQueue::OverlappedQueue() : _pHead(nullptr), _pTail(nullptr), _cItems(0) @@ -48,23 +48,23 @@ void OverlappedQueue::Add(OVERLAPPED *pOverlapped) else { assert(_pTail != nullptr); - _pTail->Internal = (ULONG_PTR)pOverlapped; + _pTail->Internal = reinterpret_cast(pOverlapped); } _pTail = pOverlapped; _cItems++; } -bool OverlappedQueue::IsEmpty(void) const +bool OverlappedQueue::IsEmpty() const { return (_pHead == nullptr); } -OVERLAPPED *OverlappedQueue::Remove(void) +OVERLAPPED *OverlappedQueue::Remove() { assert(!IsEmpty()); OVERLAPPED *pOverlapped = _pHead; - _pHead = (OVERLAPPED *)pOverlapped->Internal; + _pHead = reinterpret_cast(pOverlapped->Internal); if (_pHead == nullptr) { _pTail = nullptr; diff --git a/IORequestGenerator/ThroughputMeter.cpp b/IORequestGenerator/ThroughputMeter.cpp index 6b3fcf2..974796e 100644 --- a/IORequestGenerator/ThroughputMeter.cpp +++ b/IORequestGenerator/ThroughputMeter.cpp @@ -29,12 +29,13 @@ SOFTWARE. #include "ThroughputMeter.h" -ThroughputMeter::ThroughputMeter(void) : - _fRunning(false) +ThroughputMeter::ThroughputMeter() : + _fRunning(false), _fThrottle(false), _fThink(false), _cbCompleted(0), _cbBlockSize(0), _cBytesPerMillisecond(0), + _ullStartTimestamp(0), _ullDelayUntil(0), _thinkTime(0), _burstSize(0), _cIO(0) { } -bool ThroughputMeter::IsRunning(void) const +bool ThroughputMeter::IsRunning() const { return _fRunning; } @@ -71,36 +72,27 @@ void ThroughputMeter::Start(DWORD cBytesPerMillisecond, DWORD dwBlockSize, DWORD } } -DWORD ThroughputMeter::GetSleepTime(void) const +DWORD ThroughputMeter::GetSleepTime() const { if (_fThink) { - ULONGLONG ullTimestamp = GetTickCount64(); + const ULONGLONG ullTimestamp = GetTickCount64(); if (ullTimestamp < _ullDelayUntil) { - return (DWORD)(_ullDelayUntil - ullTimestamp); - } - else - { - return (_fThrottle) ? _GetThrottleTime() : 0; - } - } - else - { - if (_fThrottle) // think time has not been specified only check for throttling - { - return _GetThrottleTime(); - } - else - { - return 0; + return static_cast(_ullDelayUntil - ullTimestamp); } + return (_fThrottle) ? _GetThrottleTime() : 0; } + if (_fThrottle) // think time has not been specified only check for throttling + { + return _GetThrottleTime(); + } + return 0; } -DWORD ThroughputMeter::_GetThrottleTime(void) const +DWORD ThroughputMeter::_GetThrottleTime() const { - ULONGLONG cbExpected = (GetTickCount64() - _ullStartTimestamp) * _cBytesPerMillisecond; + const ULONGLONG cbExpected = (GetTickCount64() - _ullStartTimestamp) * _cBytesPerMillisecond; return cbExpected >= (_cbCompleted + _cbBlockSize) ? 0 : 1; } diff --git a/IORequestGenerator/etw.cpp b/IORequestGenerator/etw.cpp index 445c3f4..28f5176 100644 --- a/IORequestGenerator/etw.cpp +++ b/IORequestGenerator/etw.cpp @@ -28,9 +28,7 @@ SOFTWARE. */ #include "etw.h" -#include "common.h" -#include -#include +//#include "Common.h" #include //WNODE_HEADER @@ -122,29 +120,26 @@ DEFINE_GUID ( /* 3d6fa8d1-fe05-11d0-9dda-00c04fd7ba7c */ BOOL TraceEvents() { TRACEHANDLE handles[1]; - EVENT_TRACE_LOGFILE logfile; + EVENT_TRACE_LOGFILE logfile{const_cast(KERNEL_LOGGER_NAME), NULL, EVENT_TRACE_REAL_TIME_MODE, true}; - memset(&logfile, 0, sizeof(EVENT_TRACE_LOGFILE)); + //memset(&logfile, 0, sizeof(EVENT_TRACE_LOGFILE)); - logfile.LoggerName = KERNEL_LOGGER_NAME; - logfile.LogFileName = NULL; - logfile.LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + //logfile.LoggerName = KERNEL_LOGGER_NAME; + //logfile.LogFileName = nullptr; + //logfile.LogFileMode = EVENT_TRACE_REAL_TIME_MODE; - logfile.IsKernelTrace = true; + //logfile.IsKernelTrace = true; handles[0] = OpenTrace(&logfile); - if( (TRACEHANDLE)INVALID_HANDLE_VALUE == handles[0] ) + if( reinterpret_cast(INVALID_HANDLE_VALUE) == handles[0] ) { PrintError("ETW ERROR: OpenTrace failed (error code: %d)\n", GetLastError()); return false; } - else - { - ProcessTrace(handles, 1, 0, 0); - CloseTrace(handles[0]); - } + ProcessTrace(handles, 1, nullptr, nullptr); + CloseTrace(handles[0]); - return true; + return true; } /*****************************************************************************/ @@ -152,25 +147,21 @@ BOOL TraceEvents() // PEVENT_TRACE_PROPERTIES allocateEventTraceProperties() { - PEVENT_TRACE_PROPERTIES pProperties = NULL; - size_t size = 0; - - - size = sizeof(EVENT_TRACE_PROPERTIES)+sizeof(KERNEL_LOGGER_NAME); - pProperties = (PEVENT_TRACE_PROPERTIES)malloc(size); - if( NULL == pProperties ) + const size_t size = sizeof(EVENT_TRACE_PROPERTIES)+sizeof(KERNEL_LOGGER_NAME); + auto pProperties = static_cast(malloc(size)); + if( nullptr == pProperties ) { PrintError("FATAL ERROR: unable to allocate memory (error code: %d)\n", GetLastError()); - return NULL; + return nullptr; } memset(pProperties, 0, size); pProperties->LoggerNameOffset = sizeof(EVENT_TRACE_PROPERTIES); - pProperties->Wnode.BufferSize = (ULONG)size; + pProperties->Wnode.BufferSize = static_cast(size); pProperties->Wnode.Flags = WNODE_FLAG_TRACED_GUID; - strcpy_s((char *)pProperties+pProperties->LoggerNameOffset, + strcpy_s(reinterpret_cast(pProperties)+pProperties->LoggerNameOffset, size-pProperties->LoggerNameOffset, KERNEL_LOGGER_NAME); return pProperties; @@ -372,9 +363,7 @@ void WINAPI eventRegistry(PEVENT_TRACE pEvent) // TRACEHANDLE StartETWSession(const Profile& profile) { - PEVENT_TRACE_PROPERTIES pProperties; - - pProperties = allocateEventTraceProperties(); + PEVENT_TRACE_PROPERTIES pProperties = allocateEventTraceProperties(); if (nullptr == pProperties) { return 0; @@ -459,7 +448,7 @@ TRACEHANDLE StartETWSession(const Profile& profile) pProperties->Wnode.Guid = SystemTraceControlGuid; TRACEHANDLE hTraceSession; - ULONG ret = StartTrace(&hTraceSession, KERNEL_LOGGER_NAME, pProperties); + const ULONG ret = StartTrace(&hTraceSession, KERNEL_LOGGER_NAME, pProperties); free(pProperties); if (ERROR_SUCCESS != ret) { @@ -475,21 +464,17 @@ TRACEHANDLE StartETWSession(const Profile& profile) // PEVENT_TRACE_PROPERTIES StopETWSession(TRACEHANDLE hTraceSession) { - PEVENT_TRACE_PROPERTIES pProperties; - - pProperties = allocateEventTraceProperties(); - if( NULL == pProperties ) + PEVENT_TRACE_PROPERTIES pProperties = allocateEventTraceProperties(); + if( nullptr == pProperties ) { - return NULL; + return nullptr; } - ULONG ret; - - ret = ControlTrace(hTraceSession, NULL, pProperties, EVENT_TRACE_CONTROL_STOP); + const ULONG ret = ControlTrace(hTraceSession, nullptr, pProperties, EVENT_TRACE_CONTROL_STOP); if( ERROR_SUCCESS != ret ) { PrintError("Error stopping trace session\n"); - return NULL; + return nullptr; } //wait diff --git a/ResultParser/ResultParser.cpp b/ResultParser/ResultParser.cpp index d178136..8551630 100644 --- a/ResultParser/ResultParser.cpp +++ b/ResultParser/ResultParser.cpp @@ -31,16 +31,15 @@ SOFTWARE. // #include "ResultParser.h" -#include "common.h" +#include "Common.h" -#include -#include +#include +#include #include //ntdll.dll -#include //WNODE_HEADER -#include +//#include -#include +#include // TODO: refactor to a single function shared with the XmlResultParser void ResultParser::_Print(const char *format, ...) @@ -60,11 +59,11 @@ void ResultParser::_Print(const char *format, ...) void ResultParser::_DisplayFileSize(UINT64 fsize) { - if( fsize > (UINT64)10*1024*1024*1024 ) // > 10GB + if( fsize > static_cast(10)*1024*1024*1024 ) // > 10GB { _Print("%uGiB", fsize >> 30); } - else if( fsize > (UINT64)10*1024*1024 ) // > 10MB + else if( fsize > static_cast(10)*1024*1024 ) // > 10MB { _Print("%uMiB", fsize >> 20); } @@ -217,7 +216,7 @@ void ResultParser::_DisplayETW(struct ETWMask ETWMask, struct ETWEventCounters E } } -void ResultParser::_PrintTarget(const Target &target, bool fUseThreadsPerFile, bool fCompletionRoutines) +void ResultParser::_PrintTarget(const Target &target, bool fUseThreadsPerFile, bool fUseRequestsPerFile, bool fCompletionRoutines) { _Print("\tpath: '%s'\n", target.GetPath().c_str()); _Print("\t\tthink time: %ums\n", target.GetThinkTime()); @@ -235,6 +234,8 @@ void ResultParser::_PrintTarget(const Target &target, bool fUseThreadsPerFile, b case TargetCacheMode::DisableOSCache: _Print("\t\tsoftware cache disabled\n"); break; + case TargetCacheMode::Undefined: + _Print("\t\tcache mode underfined!\n"); } if (target.GetWriteThroughMode() == WriteThroughMode::On) @@ -250,6 +251,8 @@ void ResultParser::_PrintTarget(const Target &target, bool fUseThreadsPerFile, b case TargetCacheMode::DisableOSCache: _Print("\t\thardware write cache disabled, writethrough on\n"); break; + case TargetCacheMode::Undefined: + _Print("\t\thardware write cache mode undefined!\n"); } } else @@ -267,7 +270,7 @@ void ResultParser::_PrintTarget(const Target &target, bool fUseThreadsPerFile, b { _Print("\t\twrite buffer size: %I64u\n", target.GetRandomDataWriteBufferSize()); string sWriteBufferSourcePath = target.GetRandomDataWriteBufferSourcePath(); - if (sWriteBufferSourcePath != "") + if (!sWriteBufferSourcePath.empty()) { _Print("\t\twrite buffer source: '%s'\n", sWriteBufferSourcePath.c_str()); } @@ -308,7 +311,11 @@ void ResultParser::_PrintTarget(const Target &target, bool fUseThreadsPerFile, b } _Print("%I64u)\n", target.GetBlockAlignmentInBytes()); - _Print("\t\tnumber of outstanding I/O operations: %d\n", target.GetRequestCount()); + if (fUseRequestsPerFile) + { + _Print("\t\tnumber of outstanding I/O operations: %d\n", target.GetRequestCount()); + } + if (0 != target.GetBaseFileOffsetInBytes()) { _Print("\t\tbase file offset: %I64u\n", target.GetBaseFileOffsetInBytes()); @@ -390,7 +397,7 @@ void ResultParser::_PrintTimeSpan(const TimeSpan& timeSpan) _Print("\trandom seed: %u\n", timeSpan.GetRandSeed()); const auto& vAffinity = timeSpan.GetAffinityAssignments(); - if ( vAffinity.size() > 0) + if (!vAffinity.empty()) { _Print("\tadvanced affinity round robin (group/core): "); for (unsigned int x = 0; x < vAffinity.size(); ++x) @@ -404,10 +411,17 @@ void ResultParser::_PrintTimeSpan(const TimeSpan& timeSpan) _Print("\n"); } + if (timeSpan.GetRandomWriteData()) + { + _Print("\tgenerating random data for each write IO\n"); + _Print("\t WARNING: this increases the CPU cost of issuing writes and should only\n"); + _Print("\t be compared to other results using the -Zr flag\n"); + } + vector vTargets(timeSpan.GetTargets()); - for (auto i = vTargets.begin(); i != vTargets.end(); i++) + for (auto& vTarget : vTargets) { - _PrintTarget(*i, (timeSpan.GetThreadCount() == 0), timeSpan.GetCompletionRoutines()); + _PrintTarget(vTarget, (timeSpan.GetThreadCount() == 0), (timeSpan.GetThreadCount() == 0 || timeSpan.GetRequestCount() == 0), timeSpan.GetCompletionRoutines()); } } @@ -423,23 +437,31 @@ void ResultParser::_PrintProfile(const Profile& profile) const vector& vTimeSpans = profile.GetTimeSpans(); int c = 1; - for (auto i = vTimeSpans.begin(); i != vTimeSpans.end(); i++) + for (const auto& vTimeSpan : vTimeSpans) { _Print("\ttimespan: %3d\n", c++); _Print("\t-------------\n"); - _PrintTimeSpan(*i); + _PrintTimeSpan(vTimeSpan); _Print("\n"); } } -void ResultParser::_PrintCpuUtilization(const Results& results) +void ResultParser::_PrintCpuUtilization(const Results& results, const SystemInformation& system) { - size_t ulProcCount = results.vSystemProcessorPerfInfo.size(); - double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); + const size_t ulProcCount = results.vSystemProcessorPerfInfo.size(); + size_t ulBaseProc = 0; + size_t ulActiveProcCount = 0; + const size_t ulNumGroups = system.processorTopology._vProcessorGroupInformation.size(); + const double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); char szFloatBuffer[1024]; - _Print("\nCPU | Usage | User | Kernel | Idle\n"); + if (ulNumGroups == 1) { + _Print("\nCPU | Usage | User | Kernel | Idle\n"); + } + else { + _Print("\nGroup | CPU | Usage | User | Kernel | Idle\n"); + } _Print("-------------------------------------------\n"); double busyTime = 0; @@ -447,39 +469,69 @@ void ResultParser::_PrintCpuUtilization(const Results& results) double totalUserTime = 0; double totalKrnlTime = 0; - for (unsigned int x = 0; x= ulProcCount) { + break; + } + + for (unsigned int ulProcessor = 0; ulProcessor < pGroup->_maximumProcessorCount; ulProcessor++) { + if (!pGroup->IsProcessorActive(static_cast(ulProcessor))) { + continue; + } - idleTime = 100.0 * results.vSystemProcessorPerfInfo[x].IdleTime.QuadPart / 10000000 / fTime; - krnlTime = 100.0 * results.vSystemProcessorPerfInfo[x].KernelTime.QuadPart / 10000000 / fTime; - userTime = 100.0 * results.vSystemProcessorPerfInfo[x].UserTime.QuadPart / 10000000 / fTime; + const double idleTime = 100.0 * results.vSystemProcessorPerfInfo[ulBaseProc + ulProcessor].IdleTime.QuadPart / 10000000 / fTime; + const double krnlTime = 100.0 * results.vSystemProcessorPerfInfo[ulBaseProc + ulProcessor].KernelTime.QuadPart / 10000000 / fTime; + const double userTime = 100.0 * results.vSystemProcessorPerfInfo[ulBaseProc + ulProcessor].UserTime.QuadPart / 10000000 / fTime; - thisTime = (krnlTime + userTime) - idleTime; + const double thisTime = (krnlTime + userTime) - idleTime; - sprintf_s(szFloatBuffer, sizeof(szFloatBuffer), "%4u| %6.2lf%%| %6.2lf%%| %6.2lf%%| %6.2lf%%\n", - x, - thisTime, - userTime, - krnlTime - idleTime, - idleTime); - _Print("%s", szFloatBuffer); + if (ulNumGroups == 1) { + sprintf_s(szFloatBuffer, sizeof(szFloatBuffer), "%4u| %6.2lf%%| %6.2lf%%| %6.2lf%%| %6.2lf%%\n", + ulProcessor, + thisTime, + userTime, + krnlTime - idleTime, + idleTime); + } + else { + sprintf_s(szFloatBuffer, sizeof(szFloatBuffer), "%6u| %4u| %6.2lf%%| %6.2lf%%| %6.2lf%%| %6.2lf%%\n", + ulGroup, + ulProcessor, + thisTime, + userTime, + krnlTime - idleTime, + idleTime); + } + + _Print("%s", szFloatBuffer); + + busyTime += thisTime; + totalIdleTime += idleTime; + totalUserTime += userTime; + totalKrnlTime += krnlTime; + ulActiveProcCount += 1; + } - busyTime += thisTime; - totalIdleTime += idleTime; - totalUserTime += userTime; - totalKrnlTime += krnlTime; + ulBaseProc += pGroup->_maximumProcessorCount; } + + if (ulActiveProcCount == 0) { + ulActiveProcCount = 1; + } + _Print("-------------------------------------------\n"); - sprintf_s(szFloatBuffer, sizeof(szFloatBuffer), "avg.| %6.2lf%%| %6.2lf%%| %6.2lf%%| %6.2lf%%\n", - busyTime / ulProcCount, - totalUserTime / ulProcCount, - (totalKrnlTime - totalIdleTime) / ulProcCount, - totalIdleTime / ulProcCount); + sprintf_s(szFloatBuffer, sizeof(szFloatBuffer), + ulNumGroups == 1 ? + "avg.| %6.2lf%%| %6.2lf%%| %6.2lf%%| %6.2lf%%\n" : + " avg.| %6.2lf%%| %6.2lf%%| %6.2lf%%| %6.2lf%%\n", + busyTime / ulActiveProcCount, + totalUserTime / ulActiveProcCount, + (totalKrnlTime - totalIdleTime) / ulActiveProcCount, + totalIdleTime / ulActiveProcCount); _Print("%s", szFloatBuffer); } @@ -501,8 +553,8 @@ void ResultParser::_PrintSectionBorderLine(const TimeSpan& timeSpan) void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, const Results& results) { - double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); - double fBucketTime = timeSpan.GetIoBucketDurationInMilliseconds() / 1000.0; + const double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); + const double fBucketTime = timeSpan.GetIoBucketDurationInMilliseconds() / 1000.0; UINT64 ullTotalBytesCount = 0; UINT64 ullTotalIOCount = 0; Histogram totalLatencyHistogram; @@ -515,11 +567,9 @@ void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, for (unsigned int iThread = 0; iThread < results.vThreadResults.size(); ++iThread) { const ThreadResults& threadResults = results.vThreadResults[iThread]; - for (unsigned int iFile = 0; iFile < threadResults.vTargetResults.size(); iFile++) + for (const auto& targetResults : threadResults.vTargetResults) { - const TargetResults& targetResults = threadResults.vTargetResults[iFile]; - - UINT64 ullBytesCount = 0; + UINT64 ullBytesCount = 0; UINT64 ullIOCount = 0; Histogram latencyHistogram; @@ -565,18 +615,18 @@ void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, iThread, ullBytesCount, ullIOCount, - (double)ullBytesCount / 1024 / 1024 / fTime, - (double)ullIOCount / fTime); + static_cast(ullBytesCount) / 1024 / 1024 / fTime, + static_cast(ullIOCount) / fTime); if (timeSpan.GetMeasureLatency()) { - double avgLat = latencyHistogram.GetAvg()/1000; + const double avgLat = latencyHistogram.GetAvg()/1000; _Print(" | %8.3f", avgLat); } if (timeSpan.GetCalculateIopsStdDev()) { - double iopsStdDev = ioBucketizer.GetStandardDeviation() / fBucketTime; + const double iopsStdDev = ioBucketizer.GetStandardDeviationIOPS() / fBucketTime; _Print(" | %10.2f", iopsStdDev); } @@ -584,7 +634,7 @@ void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, { if (latencyHistogram.GetSampleSize() > 0) { - double latStdDev = latencyHistogram.GetStandardDeviation() / 1000; + const double latStdDev = latencyHistogram.GetStandardDeviation() / 1000; _Print(" | %8.3f", latStdDev); } else @@ -615,8 +665,8 @@ void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, _Print("total: %15llu | %12llu | %10.2f | %10.2f", ullTotalBytesCount, ullTotalIOCount, - (double)ullTotalBytesCount / 1024 / 1024 / fTime, - (double)ullTotalIOCount / fTime); + static_cast(ullTotalBytesCount) / 1024 / 1024 / fTime, + static_cast(ullTotalIOCount) / fTime); if (timeSpan.GetMeasureLatency()) { @@ -625,7 +675,7 @@ void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, if (timeSpan.GetCalculateIopsStdDev()) { - double iopsStdDev = totalIoBucketizer.GetStandardDeviation() / fBucketTime; + const double iopsStdDev = totalIoBucketizer.GetStandardDeviationIOPS() / fBucketTime; _Print(" | %10.2f", iopsStdDev); } @@ -633,7 +683,7 @@ void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, { if (totalLatencyHistogram.GetSampleSize() > 0) { - double latStdDev = totalLatencyHistogram.GetStandardDeviation() / 1000; + const double latStdDev = totalLatencyHistogram.GetStandardDeviation() / 1000; _Print(" | %8.3f", latStdDev); } else @@ -647,65 +697,66 @@ void ResultParser::_PrintSection(_SectionEnum section, const TimeSpan& timeSpan, void ResultParser::_PrintLatencyPercentiles(const Results& results) { - //Print one chart for each target - unordered_map> perTargetReadHistogram; - unordered_map> perTargetWriteHistogram; - unordered_map> perTargetTotalHistogram; - - for (const auto& thread : results.vThreadResults) - { - for (const auto& target : thread.vTargetResults) - { - std::string path = target.sPath; - - perTargetReadHistogram[path].Merge(target.readLatencyHistogram); - - perTargetWriteHistogram[path].Merge(target.writeLatencyHistogram); - - perTargetTotalHistogram[path].Merge(target.readLatencyHistogram); - perTargetTotalHistogram[path].Merge(target.writeLatencyHistogram); - } - } - - for (auto i : perTargetReadHistogram) - { - std::string path = i.first; - _Print("\n%10s\n", path.c_str()); - _PrintLatencyChart(perTargetReadHistogram[path], - perTargetWriteHistogram[path], - perTargetTotalHistogram[path]); - } - - - //Print one chart for the latencies aggregated across all targets - Histogram readLatencyHistogram; - Histogram writeLatencyHistogram; - Histogram totalLatencyHistogram; - - for (const auto& thread : results.vThreadResults) - { - for (const auto& target : thread.vTargetResults) - { - readLatencyHistogram.Merge(target.readLatencyHistogram); - - writeLatencyHistogram.Merge(target.writeLatencyHistogram); - - totalLatencyHistogram.Merge(target.writeLatencyHistogram); - totalLatencyHistogram.Merge(target.readLatencyHistogram); - } - } - - _Print("\ntotal:\n"); - _PrintLatencyChart(readLatencyHistogram, writeLatencyHistogram, totalLatencyHistogram); + //Print one chart for each target IF more than one target + unordered_map> perTargetReadHistogram; + unordered_map> perTargetWriteHistogram; + unordered_map> perTargetTotalHistogram; + + for (const auto& thread : results.vThreadResults) + { + for (const auto& target : thread.vTargetResults) + { + const std::string path = target.sPath; + + perTargetReadHistogram[path].Merge(target.readLatencyHistogram); + + perTargetWriteHistogram[path].Merge(target.writeLatencyHistogram); + + perTargetTotalHistogram[path].Merge(target.readLatencyHistogram); + perTargetTotalHistogram[path].Merge(target.writeLatencyHistogram); + } + } + + //Skip if only one target + if (perTargetTotalHistogram.size() > 1) { + for (const auto& i : perTargetTotalHistogram) + { + std::string path = i.first; + _Print("\n%s\n", path.c_str()); + _PrintLatencyChart(perTargetReadHistogram[path], + perTargetWriteHistogram[path], + perTargetTotalHistogram[path]); + } + } + + //Print one chart for the latencies aggregated across all targets + Histogram readLatencyHistogram; + Histogram writeLatencyHistogram; + Histogram totalLatencyHistogram; + + for (const auto& thread : results.vThreadResults) + { + for (const auto& target : thread.vTargetResults) + { + readLatencyHistogram.Merge(target.readLatencyHistogram); + + writeLatencyHistogram.Merge(target.writeLatencyHistogram); + + totalLatencyHistogram.Merge(target.writeLatencyHistogram); + totalLatencyHistogram.Merge(target.readLatencyHistogram); + } + } + + _Print("\ntotal:\n"); + _PrintLatencyChart(readLatencyHistogram, writeLatencyHistogram, totalLatencyHistogram); } void ResultParser::_PrintLatencyChart(const Histogram& readLatencyHistogram, - const Histogram& writeLatencyHistogram, - const Histogram& totalLatencyHistogram) + const Histogram& writeLatencyHistogram, + const Histogram& totalLatencyHistogram) { - - bool fHasReads = readLatencyHistogram.GetSampleSize() > 0; - bool fHasWrites = writeLatencyHistogram.GetSampleSize() > 0; + const bool fHasReads = readLatencyHistogram.GetSampleSize() > 0; + const bool fHasWrites = writeLatencyHistogram.GetSampleSize() > 0; _Print(" %%-ile | Read (ms) | Write (ms) | Total (ms)\n"); _Print("----------------------------------------------\n"); @@ -785,14 +836,12 @@ string ResultParser::ParseResults(Profile& profile, const SystemInformation& sys const Results& results = vResults[iResult]; const TimeSpan& timeSpan = profile.GetTimeSpans()[iResult]; - size_t ulProcCount = results.vSystemProcessorPerfInfo.size(); - double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); //test duration - - char szFloatBuffer[1024]; + const unsigned int ulProcCount = system.processorTopology._ulActiveProcCount; + const double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); //test duration // There either is a fixed number of threads for all files to share (GetThreadCount() > 0) or a number of threads per file. // In the latter case vThreadResults.size() == number of threads per file * file count - size_t ulThreadCnt = (timeSpan.GetThreadCount() > 0) ? timeSpan.GetThreadCount() : results.vThreadResults.size(); + const size_t ulThreadCnt = (timeSpan.GetThreadCount() > 0) ? timeSpan.GetThreadCount() : results.vThreadResults.size(); if (fTime < 0.0000001) { @@ -801,14 +850,20 @@ string ResultParser::ParseResults(Profile& profile, const SystemInformation& sys else { // TODO: parameters.bCreateFile; + + char szFloatBuffer[1024]; _Print("\n"); sprintf_s(szFloatBuffer, sizeof(szFloatBuffer), "actual test time:\t%.2lfs\n", fTime); _Print("%s", szFloatBuffer); _Print("thread count:\t\t%u\n", ulThreadCnt); + if (timeSpan.GetThreadCount() != 0 && timeSpan.GetRequestCount() != 0) { + _Print("request count:\t\t%u\n", timeSpan.GetRequestCount()); + } + _Print("proc count:\t\t%u\n", ulProcCount); - _PrintCpuUtilization(results); + _PrintCpuUtilization(results, system); _Print("\nTotal IO\n"); _PrintSection(_SectionEnum::TOTAL, timeSpan, results); @@ -847,45 +902,45 @@ string ResultParser::ParseResults(Profile& profile, const SystemInformation& sys UINT64 cTotalWriteIO = 0; UINT64 cTotalReadIO = 0; UINT64 cTotalTicks = 0; - for (auto pResults = vResults.begin(); pResults != vResults.end(); pResults++) + for (auto& vResult : vResults) { - double time = PerfTimer::PerfTimeToSeconds(pResults->ullTimeCount); + const double time = PerfTimer::PerfTimeToSeconds(vResult.ullTimeCount); if (time >= 0.0000001) // skip timespans that were interrupted { - cTotalTicks += pResults->ullTimeCount; - auto vThreadResults = pResults->vThreadResults; - for (auto pThreadResults = vThreadResults.begin(); pThreadResults != vThreadResults.end(); pThreadResults++) + cTotalTicks += vResult.ullTimeCount; + auto vThreadResults = vResult.vThreadResults; + for (auto& vThreadResult : vThreadResults) { - for (auto pTargetResults = pThreadResults->vTargetResults.begin(); pTargetResults != pThreadResults->vTargetResults.end(); pTargetResults++) - { - cbTotalRead += pTargetResults->ullReadBytesCount; - cbTotalWritten += pTargetResults->ullWriteBytesCount; - cTotalReadIO += pTargetResults->ullReadIOCount; - cTotalWriteIO += pTargetResults->ullWriteIOCount; + for (auto& vTargetResult : vThreadResult.vTargetResults) + { + cbTotalRead += vTargetResult.ullReadBytesCount; + cbTotalWritten += vTargetResult.ullWriteBytesCount; + cTotalReadIO += vTargetResult.ullReadIOCount; + cTotalWriteIO += vTargetResult.ullWriteIOCount; } } } } - double totalTime = PerfTimer::PerfTimeToSeconds(cTotalTicks); + const double totalTime = PerfTimer::PerfTimeToSeconds(cTotalTicks); _Print("write | %15I64u | %12I64u | %10.2lf | %10.2lf\n", cbTotalWritten, cTotalWriteIO, - (double)cbTotalWritten / 1024 / 1024 / totalTime, - (double)cTotalWriteIO / totalTime); + static_cast(cbTotalWritten) / 1024 / 1024 / totalTime, + static_cast(cTotalWriteIO) / totalTime); _Print("read | %15I64u | %12I64u | %10.2lf | %10.2lf\n", cbTotalRead, cTotalReadIO, - (double)cbTotalRead / 1024 / 1024 / totalTime, - (double)cTotalReadIO / totalTime); + static_cast(cbTotalRead) / 1024 / 1024 / totalTime, + static_cast(cTotalReadIO) / totalTime); _Print("-------------------------------------------------------------------------------\n"); _Print("total | %15I64u | %12I64u | %10.2lf | %10.2lf\n\n", cbTotalRead + cbTotalWritten, cTotalReadIO + cTotalWriteIO, - (double)(cbTotalRead + cbTotalWritten) / 1024 / 1024 / totalTime, - (double)(cTotalReadIO + cTotalWriteIO) / totalTime); + static_cast(cbTotalRead + cbTotalWritten) / 1024 / 1024 / totalTime, + static_cast(cTotalReadIO + cTotalWriteIO) / totalTime); _Print("total test time:\t%.2lfs\n", totalTime); } diff --git a/XmlProfileParser/XmlProfileParser.cpp b/XmlProfileParser/XmlProfileParser.cpp index 51a03b2..6073474 100644 --- a/XmlProfileParser/XmlProfileParser.cpp +++ b/XmlProfileParser/XmlProfileParser.cpp @@ -31,7 +31,7 @@ SOFTWARE. #include #include #include -#include +#include HRESULT ReportXmlError( const char *pszName, @@ -43,9 +43,8 @@ HRESULT ReportXmlError( long errorCode = E_FAIL; CComBSTR bReason; BSTR bstr; - HRESULT hr; - hr = pXMLError->get_line(&line); + HRESULT hr = pXMLError->get_line(&line); if (FAILED(hr)) { line = 0; @@ -67,8 +66,8 @@ HRESULT ReportXmlError( } fprintf(stderr, - "ERROR: failed to load %s, line %lu, line position %lu, errorCode %08x\nERROR: reason: %S\n", - pszName, line, linePos, errorCode, (PWCHAR)bReason); + "ERROR: failed to load %s, line %li, line position %li, errorCode %08lx\nERROR: reason: %S\n", + pszName, line, linePos, errorCode, static_cast(bReason)); return errorCode; } @@ -79,17 +78,17 @@ bool XmlProfileParser::ParseFile(const char *pszPath, Profile *pProfile) assert(pProfile != nullptr); // import schema from the named resource - HRSRC hSchemaXmlResource = FindResource(NULL, L"DISKSPD.XSD", RT_HTML); - assert(hSchemaXmlResource != NULL); - HGLOBAL hSchemaXml = LoadResource(NULL, hSchemaXmlResource); - assert(hSchemaXml != NULL); - LPVOID pSchemaXml = LockResource(hSchemaXml); - assert(pSchemaXml != NULL); + HRSRC hSchemaXmlResource = FindResource(nullptr, L"DISKSPD.XSD", RT_HTML); + assert(hSchemaXmlResource != nullptr); + const HGLOBAL hSchemaXml = LoadResource(nullptr, hSchemaXmlResource); + assert(hSchemaXml != nullptr); + const auto pSchemaXml = LockResource(hSchemaXml); + assert(pSchemaXml != nullptr); // convert from utf-8 produced by the xsd authoring tool to utf-16 - int cchSchemaXml = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pSchemaXml, -1, NULL, 0); + const int cchSchemaXml = MultiByteToWideChar(CP_UTF8, 0, static_cast(pSchemaXml), -1, nullptr, 0); vector vWideSchemaXml(cchSchemaXml); - int dwcchWritten = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)pSchemaXml, -1, vWideSchemaXml.data(), cchSchemaXml); + const int dwcchWritten = MultiByteToWideChar(CP_UTF8, 0, static_cast(pSchemaXml), -1, vWideSchemaXml.data(), cchSchemaXml); UNREFERENCED_PARAMETER(dwcchWritten); assert(dwcchWritten == cchSchemaXml); // ... and finally, packed in a bstr for the loadXml interface @@ -171,7 +170,7 @@ bool XmlProfileParser::ParseFile(const char *pszPath, Profile *pProfile) if (SUCCEEDED(hr)) { VARIANT_BOOL fvIsOk; - CComVariant vPath(pszPath); + const CComVariant vPath(pszPath); hr = spXmlDoc->load(vPath, &fvIsOk); if (SUCCEEDED(hr) && fvIsOk != VARIANT_TRUE) { @@ -390,7 +389,7 @@ HRESULT XmlProfileParser::_ParseEtw(IXMLDOMDocument2 *pXmlDoc, Profile *pProfile HRESULT XmlProfileParser::_ParseTimeSpans(IXMLDOMDocument2 *pXmlDoc, Profile *pProfile) { CComPtr spNodeList = nullptr; - CComVariant query("//Profile/TimeSpans/TimeSpan"); + const CComVariant query("//Profile/TimeSpans/TimeSpan"); HRESULT hr = pXmlDoc->selectNodes(query.bstrVal, &spNodeList); if (SUCCEEDED(hr)) { @@ -457,6 +456,16 @@ HRESULT XmlProfileParser::_ParseTimeSpan(IXMLDOMNode *pXmlNode, TimeSpan *pTimeS } } + if (SUCCEEDED(hr)) + { + bool fRandomWriteData; + hr = _GetBool(pXmlNode, "RandomWriteData", &fRandomWriteData); + if (SUCCEEDED(hr) && (hr != S_FALSE)) + { + pTimeSpan->SetRandomWriteData(fRandomWriteData); + } + } + if (SUCCEEDED(hr)) { UINT32 ulThreadCount; @@ -467,6 +476,16 @@ HRESULT XmlProfileParser::_ParseTimeSpan(IXMLDOMNode *pXmlNode, TimeSpan *pTimeS } } + if (SUCCEEDED(hr)) + { + UINT32 ulRequestCount; + hr = _GetUINT32(pXmlNode, "RequestCount", &ulRequestCount); + if (SUCCEEDED(hr) && (hr != S_FALSE)) + { + pTimeSpan->SetRequestCount(ulRequestCount); + } + } + if (SUCCEEDED(hr)) { bool fDisableAffinity; @@ -538,7 +557,7 @@ HRESULT XmlProfileParser::_ParseTimeSpan(IXMLDOMNode *pXmlNode, TimeSpan *pTimeS HRESULT XmlProfileParser::_ParseTargets(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan) { - CComVariant query("Targets/Target"); + const CComVariant query("Targets/Target"); CComPtr spNodeList = nullptr; HRESULT hr = pXmlNode->selectNodes(query.bstrVal, &spNodeList); if (SUCCEEDED(hr)) @@ -566,7 +585,7 @@ HRESULT XmlProfileParser::_ParseTargets(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSp HRESULT XmlProfileParser::_ParseRandomDataSource(IXMLDOMNode *pXmlNode, Target *pTarget) { CComPtr spNodeList = nullptr; - CComVariant query("RandomDataSource"); + const CComVariant query("RandomDataSource"); HRESULT hr = pXmlNode->selectNodes(query.bstrVal, &spNodeList); if (SUCCEEDED(hr)) { @@ -600,7 +619,7 @@ HRESULT XmlProfileParser::_ParseRandomDataSource(IXMLDOMNode *pXmlNode, Target * HRESULT XmlProfileParser::_ParseWriteBufferContent(IXMLDOMNode *pXmlNode, Target *pTarget) { CComPtr spNodeList = nullptr; - CComVariant query("WriteBufferContent"); + const CComVariant query("WriteBufferContent"); HRESULT hr = pXmlNode->selectNodes(query.bstrVal, &spNodeList); if (SUCCEEDED(hr)) { @@ -898,6 +917,69 @@ HRESULT XmlProfileParser::_ParseTarget(IXMLDOMNode *pXmlNode, Target *pTarget) pTarget->SetIOPriorityHint(hint[ulIOPriority - 1]); } } + + if (SUCCEEDED(hr)) + { + UINT32 ulWeight; + hr = _GetUINT32(pXmlNode, "Weight", &ulWeight); + if (SUCCEEDED(hr) && (hr != S_FALSE)) + { + pTarget->SetWeight(ulWeight); + } + } + + if (SUCCEEDED(hr)) + { + hr = _ParseThreadTargets(pXmlNode, pTarget); + } + return hr; +} + +HRESULT XmlProfileParser::_ParseThreadTargets(IXMLDOMNode *pXmlNode, Target *pTarget) +{ + const CComVariant query("ThreadTarget"); + CComPtr spNodeList = nullptr; + HRESULT hr = pXmlNode->selectNodes(query.bstrVal, &spNodeList); + if (SUCCEEDED(hr)) + { + long cNodes; + hr = spNodeList->get_length(&cNodes); + if (SUCCEEDED(hr)) + { + for (int i = 0; i < cNodes; i++) + { + CComPtr spNode = nullptr; + hr = spNodeList->get_item(i, &spNode); + if (SUCCEEDED(hr)) + { + ThreadTarget threadTarget; + _ParseThreadTarget(spNode, &threadTarget); + pTarget->AddThreadTarget(threadTarget); + } + } + } + } + return hr; +} + +HRESULT XmlProfileParser::_ParseThreadTarget(IXMLDOMNode *pXmlNode, ThreadTarget *pThreadTarget) +{ + UINT32 ulThread; + HRESULT hr = _GetUINT32(pXmlNode, "Thread", &ulThread); + if (SUCCEEDED(hr) && (hr != S_FALSE)) + { + pThreadTarget->SetThread(ulThread); + } + + if (SUCCEEDED(hr)) + { + UINT32 ulWeight; + hr = _GetUINT32(pXmlNode, "Weight", &ulWeight); + if (SUCCEEDED(hr) && (hr != S_FALSE)) + { + pThreadTarget->SetWeight(ulWeight); + } + } return hr; } @@ -912,7 +994,7 @@ HRESULT XmlProfileParser::_ParseTarget(IXMLDOMNode *pXmlNode, Target *pTarget) HRESULT XmlProfileParser::_ParseAffinityAssignment(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan) { CComPtr spNodeList = nullptr; - CComVariant query("Affinity/AffinityAssignment"); + const CComVariant query("Affinity/AffinityAssignment"); HRESULT hr = pXmlNode->selectNodes(query.bstrVal, &spNodeList); if (SUCCEEDED(hr)) { @@ -930,7 +1012,7 @@ HRESULT XmlProfileParser::_ParseAffinityAssignment(IXMLDOMNode *pXmlNode, TimeSp hr = spNode->get_text(&bstrText); if (SUCCEEDED(hr)) { - pTimeSpan->AddAffinityAssignment((WORD)0, (BYTE)_wtoi((wchar_t *)bstrText)); + pTimeSpan->AddAffinityAssignment(static_cast(0), static_cast(_wtoi(static_cast(bstrText)))); SysFreeString(bstrText); } } @@ -945,7 +1027,7 @@ HRESULT XmlProfileParser::_ParseAffinityAssignment(IXMLDOMNode *pXmlNode, TimeSp HRESULT XmlProfileParser::_ParseAffinityGroupAssignment(IXMLDOMNode *pXmlNode, TimeSpan *pTimeSpan) { CComPtr spNodeList = nullptr; - CComVariant query("Affinity/AffinityGroupAssignment"); + const CComVariant query("Affinity/AffinityGroupAssignment"); HRESULT hr = pXmlNode->selectNodes(query.bstrVal, &spNodeList); if (SUCCEEDED(hr)) @@ -964,7 +1046,7 @@ HRESULT XmlProfileParser::_ParseAffinityGroupAssignment(IXMLDOMNode *pXmlNode, T hr = _GetUINT32Attr(spNode, "Group", &dwGroup); if (SUCCEEDED(hr)) { - _GetUINT32Attr(spNode, "Processor", &dwProc); + _GetUINT32Attr(spNode, "Processor", &dwProc); /* result unused */ } if (SUCCEEDED(hr)) { @@ -980,7 +1062,7 @@ HRESULT XmlProfileParser::_ParseAffinityGroupAssignment(IXMLDOMNode *pXmlNode, T } if (SUCCEEDED(hr)) { - pTimeSpan->AddAffinityAssignment((WORD)dwGroup, (BYTE)dwProc); + pTimeSpan->AddAffinityAssignment(static_cast(dwGroup), static_cast(dwProc)); } } @@ -991,10 +1073,10 @@ HRESULT XmlProfileParser::_ParseAffinityGroupAssignment(IXMLDOMNode *pXmlNode, T return hr; } -HRESULT XmlProfileParser::_GetUINT32(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT32 *pulValue) const +HRESULT XmlProfileParser::_GetUINT32(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT32 *pulValue) { CComPtr spNode = nullptr; - CComVariant query(pszQuery); + const CComVariant query(pszQuery); HRESULT hr = pXmlNode->selectSingleNode(query.bstrVal, &spNode); if (SUCCEEDED(hr) && (hr != S_FALSE)) { @@ -1002,29 +1084,29 @@ HRESULT XmlProfileParser::_GetUINT32(IXMLDOMNode *pXmlNode, const char *pszQuery hr = spNode->get_text(&bstrText); if (SUCCEEDED(hr)) { - *pulValue = _wtoi((wchar_t *)bstrText); // TODO: make sure it works on large unsigned ints + *pulValue = _wtoi(static_cast(bstrText)); // TODO: make sure it works on large unsigned ints SysFreeString(bstrText); } } return hr; } -HRESULT XmlProfileParser::_GetUINT32Attr(IXMLDOMNode *pXmlNode, const char *pszAttr, UINT32 *pulValue) const +HRESULT XmlProfileParser::_GetUINT32Attr(IXMLDOMNode *pXmlNode, const char *pszAttr, UINT32 *pulValue) { CComPtr spNamedNodeMap = nullptr; - CComBSTR attr(pszAttr); - HRESULT hr = pXmlNode->get_attributes(&spNamedNodeMap); + const CComBSTR attr(pszAttr); + const HRESULT hr = pXmlNode->get_attributes(&spNamedNodeMap); if (SUCCEEDED(hr) && (hr != S_FALSE)) { CComPtr spNode = nullptr; - HRESULT hr = spNamedNodeMap->getNamedItem(attr, &spNode); - if (SUCCEEDED(hr) && (hr != S_FALSE)) + HRESULT hr2 = spNamedNodeMap->getNamedItem(attr, &spNode); + if (SUCCEEDED(hr2) && (hr2 != S_FALSE)) { BSTR bstrText; - hr = spNode->get_text(&bstrText); - if (SUCCEEDED(hr)) + hr2 = spNode->get_text(&bstrText); + if (SUCCEEDED(hr2)) { - *pulValue = _wtoi((wchar_t *)bstrText); // TODO: make sure it works on large unsigned ints + *pulValue = _wtoi(static_cast(bstrText)); // TODO: make sure it works on large unsigned ints SysFreeString(bstrText); } } @@ -1032,10 +1114,10 @@ HRESULT XmlProfileParser::_GetUINT32Attr(IXMLDOMNode *pXmlNode, const char *pszA return hr; } -HRESULT XmlProfileParser::_GetString(IXMLDOMNode *pXmlNode, const char *pszQuery, string *psValue) const +HRESULT XmlProfileParser::_GetString(IXMLDOMNode *pXmlNode, const char *pszQuery, string *psValue) { CComPtr spNode = nullptr; - CComVariant query(pszQuery); + const CComVariant query(pszQuery); HRESULT hr = pXmlNode->selectSingleNode(query.bstrVal, &spNode); if (SUCCEEDED(hr) && (hr != S_FALSE)) { @@ -1045,7 +1127,9 @@ HRESULT XmlProfileParser::_GetString(IXMLDOMNode *pXmlNode, const char *pszQuery { // TODO: use wstring? char path[MAX_PATH] = {}; - WideCharToMultiByte(CP_UTF8, 0 /*dwFlags*/, (wchar_t *)bstrText, static_cast(wcslen((wchar_t *)bstrText)), path, sizeof(path)-1, 0 /*lpDefaultChar*/, 0 /*lpUsedDefaultChar*/); + WideCharToMultiByte(CP_UTF8, 0 /*dwFlags*/, static_cast(bstrText), + static_cast(wcslen(static_cast(bstrText))), path, sizeof(path) - 1, + nullptr /*lpDefaultChar*/, nullptr /*lpUsedDefaultChar*/); *psValue = string(path); } SysFreeString(bstrText); @@ -1053,10 +1137,10 @@ HRESULT XmlProfileParser::_GetString(IXMLDOMNode *pXmlNode, const char *pszQuery return hr; } -HRESULT XmlProfileParser::_GetUINT64(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT64 *pullValue) const +HRESULT XmlProfileParser::_GetUINT64(IXMLDOMNode *pXmlNode, const char *pszQuery, UINT64 *pullValue) { CComPtr spNode = nullptr; - CComVariant query(pszQuery); + const CComVariant query(pszQuery); HRESULT hr = pXmlNode->selectSingleNode(query.bstrVal, &spNode); if (SUCCEEDED(hr) && (hr != S_FALSE)) { @@ -1064,17 +1148,17 @@ HRESULT XmlProfileParser::_GetUINT64(IXMLDOMNode *pXmlNode, const char *pszQuery hr = spNode->get_text(&bstrText); if (SUCCEEDED(hr)) { - *pullValue = _wtoi64((wchar_t *)bstrText); // TODO: make sure it works on large unsigned ints + *pullValue = _wtoi64(static_cast(bstrText)); // TODO: make sure it works on large unsigned ints } SysFreeString(bstrText); } return hr; } -HRESULT XmlProfileParser::_GetDWORD(IXMLDOMNode *pXmlNode, const char *pszQuery, DWORD *pdwValue) const +HRESULT XmlProfileParser::_GetDWORD(IXMLDOMNode *pXmlNode, const char *pszQuery, DWORD *pdwValue) { UINT32 value = 0; - HRESULT hr = _GetUINT32(pXmlNode, pszQuery, &value); + const HRESULT hr = _GetUINT32(pXmlNode, pszQuery, &value); if (SUCCEEDED(hr)) { *pdwValue = value; @@ -1082,19 +1166,18 @@ HRESULT XmlProfileParser::_GetDWORD(IXMLDOMNode *pXmlNode, const char *pszQuery, return hr; } -HRESULT XmlProfileParser::_GetBool(IXMLDOMNode *pXmlNode, const char *pszQuery, bool *pfValue) const +HRESULT XmlProfileParser::_GetBool(IXMLDOMNode *pXmlNode, const char *pszQuery, bool *pfValue) { - HRESULT hr = S_OK; - CComPtr spNode = nullptr; - CComVariant query(pszQuery); - hr = pXmlNode->selectSingleNode(query.bstrVal, &spNode); + CComPtr spNode = nullptr; + const CComVariant query(pszQuery); + HRESULT hr = pXmlNode->selectSingleNode(query.bstrVal, &spNode); if (SUCCEEDED(hr) && (hr != S_FALSE)) { BSTR bstrText; hr = spNode->get_text(&bstrText); if (SUCCEEDED(hr)) { - *pfValue = (_wcsicmp(L"true", (wchar_t *)bstrText) == 0); + *pfValue = (_wcsicmp(L"true", static_cast(bstrText)) == 0); SysFreeString(bstrText); } } @@ -1109,4 +1192,4 @@ HRESULT XmlProfileParser::_GetVerbose(IXMLDOMDocument2 *pXmlDoc, bool *pfVerbose HRESULT XmlProfileParser::_GetProgress(IXMLDOMDocument2 *pXmlDoc, DWORD *pdwProgress) { return _GetDWORD(pXmlDoc, "//Profile/Progress", pdwProgress); -} \ No newline at end of file +} diff --git a/XmlProfileParser/diskspd.xsd b/XmlProfileParser/diskspd.xsd index 96544e7..3e6e254 100644 --- a/XmlProfileParser/diskspd.xsd +++ b/XmlProfileParser/diskspd.xsd @@ -126,6 +126,26 @@ + + + + + + + + + + + + + @@ -147,10 +167,17 @@ -z set random seed [default=0 if parameter not provided, GetTickCount() if value not provided] --> + + + + + + diff --git a/XmlResultParser/xmlresultparser.cpp b/XmlResultParser/xmlresultparser.cpp index e3babbf..0df8b58 100644 --- a/XmlResultParser/xmlresultparser.cpp +++ b/XmlResultParser/xmlresultparser.cpp @@ -89,15 +89,15 @@ void XmlResultParser::_PrintTargetIops(const IoBucketizer& readBucketizer, const if (readBucketizer.GetNumberOfValidBuckets() > 0) { - _Print("%.3f\n", readBucketizer.GetStandardDeviation() / (bucketTimeInMs / 1000.0)); + _Print("%.3f\n", readBucketizer.GetStandardDeviationIOPS() / (bucketTimeInMs / 1000.0)); } if (writeBucketizer.GetNumberOfValidBuckets() > 0) { - _Print("%.3f\n", writeBucketizer.GetStandardDeviation() / (bucketTimeInMs / 1000.0)); + _Print("%.3f\n", writeBucketizer.GetStandardDeviationIOPS() / (bucketTimeInMs / 1000.0)); } if (totalIoBucketizer.GetNumberOfValidBuckets() > 0) { - _Print("%.3f\n", totalIoBucketizer.GetStandardDeviation() / (bucketTimeInMs / 1000.0)); + _Print("%.3f\n", totalIoBucketizer.GetStandardDeviationIOPS() / (bucketTimeInMs / 1000.0)); } _PrintIops(readBucketizer, writeBucketizer, bucketTimeInMs); _Print("\n"); @@ -198,10 +198,13 @@ void XmlResultParser::_PrintETW(struct ETWMask ETWMask, struct ETWEventCounters _Print("\n"); } -void XmlResultParser::_PrintCpuUtilization(const Results& results) +void XmlResultParser::_PrintCpuUtilization(const Results& results, const SystemInformation& system) { - size_t ulProcCount = results.vSystemProcessorPerfInfo.size(); - double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); + const size_t ulProcCount = results.vSystemProcessorPerfInfo.size(); + size_t ulBaseProc = 0; + size_t ulActiveProcCount = 0; + const size_t ulNumGroups = system.processorTopology._vProcessorGroupInformation.size(); + const double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); _Print("\n"); @@ -210,37 +213,54 @@ void XmlResultParser::_PrintCpuUtilization(const Results& results) double totalUserTime = 0; double totalKrnlTime = 0; - for (unsigned int x = 0; x\n"); - _Print("%d\n", x); - _Print("%.2f\n", thisTime); - _Print("%.2f\n", userTime); - _Print("%.2f\n", krnlTime - idleTime); - _Print("%.2f\n", idleTime); - _Print("\n"); - - busyTime += thisTime; - totalIdleTime += idleTime; - totalUserTime += userTime; - totalKrnlTime += krnlTime; + for (unsigned int ulGroup = 0; ulGroup < ulNumGroups; ulGroup++) { + const ProcessorGroupInformation *pGroup = &system.processorTopology._vProcessorGroupInformation[ulGroup]; + + // System has multiple groups but we only have counters for the first one + if (ulBaseProc >= ulProcCount) { + break; + } + + for (unsigned int ulProcessor = 0; ulProcessor < pGroup->_maximumProcessorCount; ulProcessor++) { + if (!pGroup->IsProcessorActive(static_cast(ulProcessor))) { + continue; + } + + const double idleTime = 100.0 * results.vSystemProcessorPerfInfo[ulBaseProc + ulProcessor].IdleTime.QuadPart / 10000000 / fTime; + const double krnlTime = 100.0 * results.vSystemProcessorPerfInfo[ulBaseProc + ulProcessor].KernelTime.QuadPart / 10000000 / fTime; + const double userTime = 100.0 * results.vSystemProcessorPerfInfo[ulBaseProc + ulProcessor].UserTime.QuadPart / 10000000 / fTime; + + const double thisTime = (krnlTime + userTime) - idleTime; + + _Print("\n"); + _Print("%d\n", ulGroup); + _Print("%d\n", ulProcessor); + _Print("%.2f\n", thisTime); + _Print("%.2f\n", userTime); + _Print("%.2f\n", krnlTime - idleTime); + _Print("%.2f\n", idleTime); + _Print("\n"); + + busyTime += thisTime; + totalIdleTime += idleTime; + totalUserTime += userTime; + totalKrnlTime += krnlTime; + + ulActiveProcCount++; + } + + ulBaseProc += pGroup->_maximumProcessorCount; } + + if (ulActiveProcCount == 0) { + ulActiveProcCount = 1; + } + _Print("\n"); - _Print("%.2f\n", busyTime / ulProcCount); - _Print("%.2f\n", totalUserTime / ulProcCount); - _Print("%.2f\n", (totalKrnlTime - totalIdleTime) / ulProcCount); - _Print("%.2f\n", totalIdleTime / ulProcCount); + _Print("%.2f\n", busyTime / ulActiveProcCount); + _Print("%.2f\n", totalUserTime / ulActiveProcCount); + _Print("%.2f\n", (totalKrnlTime - totalIdleTime) / ulActiveProcCount); + _Print("%.2f\n", totalIdleTime / ulActiveProcCount); _Print("\n"); _Print("\n"); @@ -255,21 +275,43 @@ void XmlResultParser::_PrintIops(const IoBucketizer& readBucketizer, const IoBuc done = true; double r = 0.0; + double r_min = 0.0; + double r_max = 0.0; + double r_avg = 0.0; + double r_stddev = 0.0; + double w = 0.0; + double w_min = 0.0; + double w_max = 0.0; + double w_avg = 0.0; + double w_stddev = 0.0; if (readBucketizer.GetNumberOfValidBuckets() > i) { - r = readBucketizer.GetIoBucket(i) / (bucketTimeInMs / 1000.0); + r = readBucketizer.GetIoBucketCount(i) / (bucketTimeInMs / 1000.0); + r_min = readBucketizer.GetIoBucketMinDurationUsec(i) / 1000.0; + r_max = readBucketizer.GetIoBucketMaxDurationUsec(i) / 1000.0; + r_avg = readBucketizer.GetIoBucketAvgDurationUsec(i) / 1000.0; + r_stddev = readBucketizer.GetIoBucketDurationStdDevUsec(i) / 1000.0; done = false; } if (writeBucketizer.GetNumberOfValidBuckets() > i) { - w = writeBucketizer.GetIoBucket(i) / (bucketTimeInMs / 1000.0); + w = writeBucketizer.GetIoBucketCount(i) / (bucketTimeInMs / 1000.0); + w_min = writeBucketizer.GetIoBucketMinDurationUsec(i) / 1000.0; + w_max = writeBucketizer.GetIoBucketMaxDurationUsec(i) / 1000.0; + w_avg = writeBucketizer.GetIoBucketAvgDurationUsec(i) / 1000.0; + w_stddev = writeBucketizer.GetIoBucketDurationStdDevUsec(i) / 1000.0; done = false; } if (!done) { - _Print("\n", bucketTimeInMs*(i + 1), r, w, r + w); + _Print("\n", + bucketTimeInMs*(i + 1), r, w, r + w, + r_min, r_max, r_avg, r_stddev, + w_min, w_max, w_avg, w_stddev); } } } @@ -360,7 +402,7 @@ void XmlResultParser::_PrintLatencyPercentiles(const Results& results) vPercentiles.push_back(make_pair(6, 99.999999)); vPercentiles.push_back(make_pair(7, 99.9999999)); - for (auto p : vPercentiles) + for (const auto p : vPercentiles) { _Print("\n"); _Print("%.*f\n", p.first, p.second); @@ -410,19 +452,20 @@ string XmlResultParser::ParseResults(Profile& profile, const SystemInformation& const TimeSpan& timeSpan = profile.GetTimeSpans()[iResults]; _Print("\n"); - double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); //test duration + const double fTime = PerfTimer::PerfTimeToSeconds(results.ullTimeCount); //test duration if (fTime >= 0.0000001) { // There either is a fixed number of threads for all files to share (GetThreadCount() > 0) or a number of threads per file. // In the latter case vThreadResults.size() == number of threads per file * file count - size_t ulThreadCnt = (timeSpan.GetThreadCount() > 0) ? timeSpan.GetThreadCount() : results.vThreadResults.size(); - size_t ulProcCount = results.vSystemProcessorPerfInfo.size(); + const size_t ulThreadCnt = (timeSpan.GetThreadCount() > 0) ? timeSpan.GetThreadCount() : results.vThreadResults.size(); + const unsigned int ulProcCount = system.processorTopology._ulActiveProcCount; _Print("%.2f\n", fTime); _Print("%u\n", ulThreadCnt); + _Print("%u\n", timeSpan.GetRequestCount()); _Print("%u\n", ulProcCount); - _PrintCpuUtilization(results); + _PrintCpuUtilization(results, system); if (timeSpan.GetMeasureLatency()) { diff --git a/diskspd_vs/.clang-tidy b/diskspd_vs/.clang-tidy new file mode 100644 index 0000000..b107eba --- /dev/null +++ b/diskspd_vs/.clang-tidy @@ -0,0 +1 @@ +Checks: '*' \ No newline at end of file diff --git a/diskspd_vs/CmdLineParser/CmdLineParser.vcxproj b/diskspd_vs/CmdLineParser/CmdLineParser.vcxproj index 015b2cd..36ea088 100644 --- a/diskspd_vs/CmdLineParser/CmdLineParser.vcxproj +++ b/diskspd_vs/CmdLineParser/CmdLineParser.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -21,34 +21,34 @@ {0EF5CE78-8E92-4A1B-A255-0F544AADA291} CmdLineParser - 8.1 + 10.0.16299.0 StaticLibrary true MultiByte - v140 + v141 StaticLibrary true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 @@ -119,12 +119,14 @@ - Level3 + Level4 MaxSpeed true true true true + true + stdcpplatest true diff --git a/diskspd_vs/CmdRequestCreator/CmdRequestCreator.vcxproj b/diskspd_vs/CmdRequestCreator/CmdRequestCreator.vcxproj index 94a3165..a9ae563 100644 --- a/diskspd_vs/CmdRequestCreator/CmdRequestCreator.vcxproj +++ b/diskspd_vs/CmdRequestCreator/CmdRequestCreator.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -21,34 +21,34 @@ {D238F8AA-DE12-49E7-B4A7-9B69579A69C0} CmdRequestCreator - 8.1 + 10.0.16299.0 Application true MultiByte - v140 + v141 Application true MultiByte - v140 + v141 Application false true MultiByte - v140 + v141 Application false true MultiByte - v140 + v141 @@ -127,12 +127,14 @@ - Level3 + Level4 MaxSpeed true true true true + true + stdcpplatest true diff --git a/diskspd_vs/Common/Common.vcxproj b/diskspd_vs/Common/Common.vcxproj index 9481af5..f3b27e0 100644 --- a/diskspd_vs/Common/Common.vcxproj +++ b/diskspd_vs/Common/Common.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -21,34 +21,34 @@ {B253AB42-F482-417A-82CE-EDAFCD26F366} Common - 8.1 + 10.0.16299.0 StaticLibrary true MultiByte - v140 + v141 StaticLibrary true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 @@ -108,12 +108,14 @@ - Level3 + Level4 MaxSpeed true true true true + true + stdcpplatest true diff --git a/diskspd_vs/IORequestGenerator/IORequestGenerator.vcxproj b/diskspd_vs/IORequestGenerator/IORequestGenerator.vcxproj index 61dbc14..cae984a 100644 --- a/diskspd_vs/IORequestGenerator/IORequestGenerator.vcxproj +++ b/diskspd_vs/IORequestGenerator/IORequestGenerator.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -21,34 +21,34 @@ {62DB1E99-FBA0-45FD-9355-423059BA03B8} IORequestGenerator - 8.1 + 10.0.16299.0 StaticLibrary true MultiByte - v140 + v141 StaticLibrary true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 @@ -119,12 +119,14 @@ - Level3 + Level4 MaxSpeed true true true true + true + stdcpplatest true diff --git a/diskspd_vs/ResultParser/ResultParser.vcxproj b/diskspd_vs/ResultParser/ResultParser.vcxproj index 88609da..f42a587 100644 --- a/diskspd_vs/ResultParser/ResultParser.vcxproj +++ b/diskspd_vs/ResultParser/ResultParser.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -21,34 +21,34 @@ {F6C211DC-B076-4716-BCDC-D7DE88973B66} ResultParser - 8.1 + 10.0.16299.0 StaticLibrary true MultiByte - v140 + v141 StaticLibrary true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 @@ -119,12 +119,14 @@ - Level3 + Level4 MaxSpeed true true true true + true + stdcpplatest true diff --git a/diskspd_vs/XmlProfileParser/XmlProfileParser.vcxproj b/diskspd_vs/XmlProfileParser/XmlProfileParser.vcxproj index 3131bba..cb5081c 100644 --- a/diskspd_vs/XmlProfileParser/XmlProfileParser.vcxproj +++ b/diskspd_vs/XmlProfileParser/XmlProfileParser.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -21,34 +21,34 @@ {EFF06674-B068-45F1-9661-DB9363B025B3} XmlProfileParser - 8.1 + 10.0.16299.0 StaticLibrary true Unicode - v140 + v141 StaticLibrary true Unicode - v140 + v141 StaticLibrary false true Unicode - v140 + v141 StaticLibrary false true Unicode - v140 + v141 @@ -119,12 +119,14 @@ - Level3 + Level4 MaxSpeed true true true true + true + stdcpplatest true @@ -134,12 +136,9 @@ - - ..\..\XmlProfileParser\diskspd.xsd - - + diff --git a/diskspd_vs/XmlResultParser/XmlResultParser.vcxproj b/diskspd_vs/XmlResultParser/XmlResultParser.vcxproj index 5427343..1616603 100644 --- a/diskspd_vs/XmlResultParser/XmlResultParser.vcxproj +++ b/diskspd_vs/XmlResultParser/XmlResultParser.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -22,34 +22,34 @@ {60A28E9C-C245-4D99-9C1C-EC911031743F} Win32Proj XmlResultParser - 8.1 + 10.0.16299.0 StaticLibrary true MultiByte - v140 + v141 StaticLibrary true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 StaticLibrary false true MultiByte - v140 + v141 @@ -132,7 +132,7 @@ - Level3 + Level4 MaxSpeed @@ -141,6 +141,8 @@ WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) true true + true + stdcpplatest Windows diff --git a/diskspd_vs/diskspd.sln b/diskspd_vs/diskspd.sln index 69af998..f0b32a1 100644 --- a/diskspd_vs/diskspd.sln +++ b/diskspd_vs/diskspd.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2013 -VisualStudioVersion = 12.0.30723.0 +# Visual Studio 15 +VisualStudioVersion = 15.0.27428.2011 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CmdLineParser", "CmdLineParser\CmdLineParser.vcxproj", "{0EF5CE78-8E92-4A1B-A255-0F544AADA291}" EndProject @@ -25,6 +25,11 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Common", "Common\Common.vcx EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "XmlResultParser", "XmlResultParser\XmlResultParser.vcxproj", "{60A28E9C-C245-4D99-9C1C-EC911031743F}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{480CD062-AF37-41E9-A660-6968CD2AE545}" + ProjectSection(SolutionItems) = preProject + .clang-tidy = .clang-tidy + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 @@ -93,4 +98,7 @@ Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {C90B928D-6CD3-402A-9704-A0CBEF91C734} + EndGlobalSection EndGlobal