Skip to content

Remove short name option #1910

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: develop
Choose a base branch
from
37 changes: 13 additions & 24 deletions tensilelite/Tensile/BenchmarkProblems.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@
from Tensile.Toolchain.Component import Assembler
from Tensile.SolutionStructs.Problem import ProblemType, ProblemSizes
from Tensile.SolutionStructs.Solution import Solution
from Tensile.SolutionStructs.Validators.MatrixInstruction import matrixInstructionToMIParameters, validateMIParameters
from Tensile.SolutionStructs.Naming import getMinNaming, getNameMin, getSerialNaming, getNameFull, getKeyNoInternalArgs
from Tensile.SolutionStructs.Validators.MatrixInstruction import matrixInstructionToMIParameters, \
validateMIParameters
from Tensile.SolutionStructs.Naming import getKeyNoInternalArgs, getSolutionNameMin, getKernelNameMin

from .BenchmarkStructs import BenchmarkProcess, constructForkPermutations
from .Contractions import ProblemType as ContractionsProblemType
Expand All @@ -55,7 +56,6 @@
from Tensile.Common.GlobalParameters import globalParameters, startTime



def _generateForkedSolutions(problemType, constantParams, forkPermutations, assembler: Assembler, \
debugConfig: DebugConfig, depthUConfig: DepthUConfig, isaInfoMap: Dict[IsaVersion, IsaInfo]):
"""Creates a list with a Solution object for each parameter combination in forkPermutations"""
Expand Down Expand Up @@ -202,7 +202,6 @@ def writeBenchmarkFiles(
asmToolchain: AssemblyToolchain,
srcToolchain: SourceToolchain,
sourcePath: Path,
useShortNames: bool,
debugConfig: DebugConfig,
depthUConfig: DepthUConfig,
deviceId: int,
Expand Down Expand Up @@ -235,14 +234,7 @@ def writeBenchmarkFiles(
kernelHelperObjs.append(ko)
kernelHelperNames.add(kname)

kernelSerialNaming = getSerialNaming(kernels)
kernelMinNaming = getMinNaming(kernels)
kernelWriterAssembly = KernelWriterAssembly(
kernelMinNaming,
kernelSerialNaming,
asmToolchain.assembler,
debugConfig,
)
kernelWriterAssembly = KernelWriterAssembly(asmToolchain.assembler, debugConfig)

cmdLineArchs = [var for isa in isaInfoMap.keys() for var in gfxToVariants(isaToGfx(isa))]
# cmdLineArchs = [variant isaToGfx(isa) for isa in isaInfoMap.keys() for gfxToVariants()]
Expand All @@ -258,14 +250,14 @@ def writeBenchmarkFiles(
kernelWriterAssembly,
debugConfig.splitGSU,
cmdLineArchs,
kernelSerialNaming,
kernelMinNaming,
errorTolerant=True,
generateSourcesAndExit=globalParameters["GenerateSourcesAndExit"], # put in debug config
compress=False,
useShortNames=useShortNames
)
# ^ this is where solutions is mutated
for s in solutions:
s["SolutionNameMin"] = getSolutionNameMin(solution, debugConfig.splitGSU)
s["KernelNameMin"] = getKernelNameMin(solution, debugConfig.splitGSU)

newLibraryDir = ensurePath(sourcePath / 'library')
newLibraryFile = os.path.join(newLibraryDir, "TensileLibrary")
Expand All @@ -278,7 +270,7 @@ def writeBenchmarkFiles(
depthUConfig,
isaInfoMap,
)
newLibrary.applyNaming(debugConfig.splitGSU, kernelMinNaming)
newLibrary.applyNaming(debugConfig.splitGSU)
LibraryIO.write(newLibraryFile, state(newLibrary), globalParameters["LibraryFormat"])

codeObjectFiles = [os.path.relpath(f, sourcePath) \
Expand Down Expand Up @@ -322,7 +314,7 @@ def writeBenchmarkFiles(

def _benchmarkProblemType(problemTypeConfig, problemSizeGroupConfig, problemSizeGroupIdx, useCache,
asmToolchain: AssemblyToolchain, srcToolchain: SourceToolchain, cCompiler: str,
buildTmpPath: Path, benchmarkProblemsPath: Path, useShortNames: bool,
buildTmpPath: Path, benchmarkProblemsPath: Path,
debugConfig: DebugConfig, depthUConfig: DepthUConfig, deviceId: int,
gfxName: str, isaInfoMap: Dict[str, IsaInfo]
):
Expand Down Expand Up @@ -430,7 +422,7 @@ def _benchmarkProblemType(problemTypeConfig, problemSizeGroupConfig, problemSize
printExit(msg)

for solution in solutions:
print2("# ({}:{}) {}".format(0, 0, getNameFull(solution, debugConfig.splitGSU)))
print2("# ({}:{}) {}".format(0, 0, getSolutionNameMin(solution, debugConfig.splitGSU)))
print2(HR)

# write benchmarkFiles
Expand All @@ -439,7 +431,7 @@ def _benchmarkProblemType(problemTypeConfig, problemSizeGroupConfig, problemSize
benchmarkStep.problemSizes, benchmarkStep.biasTypeArgs, \
benchmarkStep.factorDimArgs, benchmarkStep.activationArgs, \
benchmarkStep.icacheFlushArgs, shortName, [], asmToolchain, srcToolchain, \
sourcePath, useShortNames, debugConfig, depthUConfig, deviceId, gfxName, isaInfoMap)
sourcePath, debugConfig, depthUConfig, deviceId, gfxName, isaInfoMap)
# ^ this mutates solutions

# write cache data
Expand All @@ -457,12 +449,11 @@ def _benchmarkProblemType(problemTypeConfig, problemSizeGroupConfig, problemSize
.format(len(solutions), prevCount ))

# add SolutionIndex and SolutionNameMin into benchmark yaml
solutionMinNaming = getMinNaming(solutions)
for i in range(0, len(solutions)):
solution = solutions[i]
solution["SolutionIndex"] = i
solution["SolutionNameMin"] = getNameMin(solution, solutionMinNaming, debugConfig.splitGSU)
solution["KernelNameMin"] = getNameMin(solution, solutionMinNaming, debugConfig.splitGSU, True)
solution["SolutionNameMin"] = getSolutionNameMin(solution, debugConfig.splitGSU)
solution["KernelNameMin"] = getKernelNameMin(solution, debugConfig.splitGSU)
else:
solutions = None
print1("# Using cached solution data")
Expand Down Expand Up @@ -512,7 +503,6 @@ def main(
cCompiler: str,
outputPath: Path,
buildTmpPath: Path,
useShortNames: bool,
debugConfig: DebugConfig,
depthUConfig: DepthUConfig,
deviceId: int,
Expand Down Expand Up @@ -567,7 +557,6 @@ def main(
cCompiler,
buildTmpPath,
benchmarkProblemsPath,
useShortNames,
debugConfig,
depthUConfig,
deviceId,
Expand Down
9 changes: 3 additions & 6 deletions tensilelite/Tensile/ClientWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class ClientLogLevel(Enum):
################################################################################
# Main
################################################################################
def main(config, assembler: Assembler, cCompiler: str, isaInfoMap, outputPath: Path, deviceId: int, gfxName: str, useShortNames: bool=False):
def main(config, assembler: Assembler, cCompiler: str, isaInfoMap, outputPath: Path, deviceId: int, gfxName: str):

libraryLogicPath = ensurePath(outputPath / LIBRARY_LOGIC_DIR)
clientLibraryPath = ensurePath(outputPath / LIBRARY_CLIENT_DIR)
Expand All @@ -115,7 +115,7 @@ def main(config, assembler: Assembler, cCompiler: str, isaInfoMap, outputPath: P
else:
env["PYTHONPATH"] = module_path

createLibraryScript = getBuildClientLibraryScript(clientLibraryPath, libraryLogicPath, str(assembler.path), isaToGfx(list(isaInfoMap.keys())[0]), useShortNames)
createLibraryScript = getBuildClientLibraryScript(clientLibraryPath, libraryLogicPath, str(assembler.path), isaToGfx(list(isaInfoMap.keys())[0]))
subprocess.run(shlex.split(createLibraryScript), env=env, cwd=clientLibraryPath)
coList = glob(os.path.join(clientLibraryPath, "library/*.co"))
yamlList = glob(os.path.join(clientLibraryPath, "library/*.yaml"))
Expand Down Expand Up @@ -230,7 +230,7 @@ def runClient(libraryLogicPath, forBenchmark, enableTileSelection, cxxCompiler:

return process.returncode

def getBuildClientLibraryScript(buildPath, libraryLogicPath, cxxCompiler, targetGfx, useShortNames: bool=False):
def getBuildClientLibraryScript(buildPath, libraryLogicPath, cxxCompiler, targetGfx):
import io
runScriptFile = io.StringIO()

Expand All @@ -239,9 +239,6 @@ def getBuildClientLibraryScript(buildPath, libraryLogicPath, cxxCompiler, target
if not globalParameters["LazyLibraryLoading"]:
callCreateLibraryCmd += " --no-lazy-library-loading"

if useShortNames:
callCreateLibraryCmd += " --short-file-names"

if globalParameters.get("AsmDebug", False):
callCreateLibraryCmd += " --asm-debug"

Expand Down
1 change: 0 additions & 1 deletion tensilelite/Tensile/Common/GlobalParameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,6 @@ def assignGlobalParameters(config, isaInfoMap: Dict[IsaVersion, IsaInfo]):
# The following keys may be present in the config, but are not (or no longer) global parameters.
ignoreKeys = [
"Architecture",
"ShortNames",
"PrintLevel",
"Device",
"UseCompression",
Expand Down
98 changes: 98 additions & 0 deletions tensilelite/Tensile/Common/RequiredParameters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
################################################################################
#
# Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell cop-
# ies of the
# Software, and to permit persons to whom the Software is furnished
# to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IM-
# PLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNE-
# CTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
################################################################################

from functools import lru_cache
from .ValidParameters import validParameters


@lru_cache
def getRequiredParametersFull() -> set:
return frozenset(validParameters.keys())


@lru_cache
def getRequiredParametersMin() -> set:
return frozenset({
'1LDSBuffer',
'ActivationFuncCall',
'AssertFree0ElementMultiple',
'AssertFree1ElementMultiple',
'AssertSummationElementMultiple',
'ClusterLocalRead',
'ConvertAfterDS',
'DirectToVgprA',
'DirectToVgprB',
'ExpandPointerSwap',
'ForceDisableShadowInit',
'GlobalReadPerMfma',
'GlobalReadVectorWidthA',
'GlobalReadVectorWidthB',
'GlobalSplitUAlgorithm',
'GroupLoadStore',
'ISA',
'InnerUnroll',
'Kernel',
'LdsBlockSizePerPadA',
'LdsBlockSizePerPadB',
'LdsBlockSizePerPadMetadata',
'LdsPadA',
'LdsPadB',
'LdsPadMetadata',
'LocalReadVectorWidth',
'LocalWritePerMfma',
'MIArchVgpr',
'MaxOccupancy',
'NonTemporal',
'NonTemporalA',
'NonTemporalB',
'NonTemporalC',
'NonTemporalD',
'NonTemporalMetadata',
'NumElementsPerBatchStore',
'NumLoadsCoalescedA',
'NumLoadsCoalescedB',
'OptNoLoadLoop',
'PrefetchGlobalRead',
'PrefetchLocalRead',
'PreloadKernArgs',
'ScheduleIterAlg',
'SourceSwap',
'StorePriorityOpt',
'StoreRemapVectorWidth',
'StoreSyncOpt',
'StoreVectorWidth',
'StreamK',
'StreamKXCCMapping',
'TransposeLDS',
'UnrollLoopSwapGlobalReadOrder',
'Use64bShadowLimit',
'UseInstOffsetForGRO',
'UseSgprForGRO',
'VectorStore',
'VectorWidthA',
'VectorWidthB',
'WaveSeparateGlobalReadA',
'WaveSeparateGlobalReadB',
'WavefrontSize',
'WorkGroup',
})
10 changes: 3 additions & 7 deletions tensilelite/Tensile/KernelWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from .Activation import ActivationModule
from .Common import printWarning, roundUp, print2, DebugConfig, DataDirection, \
INDEX_CHARS, IsaVersion
from Tensile.SolutionStructs.Naming import getKernelName
from Tensile.SolutionStructs.Naming import getKernelNameMin
from Tensile.Toolchain.Component import Assembler

import abc
Expand Down Expand Up @@ -369,13 +369,9 @@ class KernelWriter(metaclass=abc.ABCMeta):
##############################################################################
def __init__(
self,
kernelMinNaming,
kernelSerialNaming,
assembler: Assembler,
debugConfig: DebugConfig,
):
self.kernelMinNaming = kernelMinNaming
self.kernelSerialNaming = kernelSerialNaming
self.assembler = assembler
self.debugConfig = debugConfig

Expand Down Expand Up @@ -3273,7 +3269,7 @@ def _initKernel(self, kernel, tensorParametersA, tensorParametersB):
ti.setKernel(version, kernel["WavefrontSize"])

self.consts = ConstValues()
self.states = StateValues(version=version, kernel=kernel, kernelName=getKernelName(self.kernelMinNaming, self.debugConfig.splitGSU, kernel))
self.states = StateValues(version=version, kernel=kernel, kernelName=getKernelNameMin(kernel, self.debugConfig.splitGSU))
self.vgprs = StateVgprs()
self.sgprs = collections.OrderedDict()
self.codes = CodeModules()
Expand Down Expand Up @@ -5439,7 +5435,7 @@ def getSourceFileString(self, kernel) -> Tuple[int, str]:
pass

def getHeaderFileString(self, kernel):
kernelName = getKernelName(self.kernelMinNaming, self.debugConfig.splitGSU, kernel)
kernelName = getKernelNameMin(kernel, self.debugConfig.splitGSU)
fileString = "" # CHeader
fileString += "extern const unsigned char %s_coba[]; // code object byte array\n" % kernelName

Expand Down
14 changes: 5 additions & 9 deletions tensilelite/Tensile/KernelWriterAssembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,14 @@ class KernelWriterAssembly(KernelWriter):
##############################################################################
def __init__(
self,
kernelMinNaming,
kernelSerialNaming,
assembler: Assembler,
debugConfig: DebugConfig,
):
super(KernelWriterAssembly, self).__init__(kernelMinNaming, kernelSerialNaming, assembler, debugConfig)
super(KernelWriterAssembly, self).__init__(assembler, debugConfig)


def _getCustomKernelSource(self, useShortNames, kernel, CustomKernelDirectory):
kernelName = getKernelFileBase(useShortNames, self.debugConfig.splitGSU, self.kernelMinNaming, self.kernelSerialNaming, kernel)
def _getCustomKernelSource(self, kernel, CustomKernelDirectory):
kernelName = getKernelFileBase(self.debugConfig.splitGSU, kernel)
with open(os.path.join(CustomKernelDirectory, (kernelName + ".s"))) as f:
rocmVersion = self.assembler.rocm_version
if not (rocmVersion.major >= 6 and rocmVersion.patch >= 32650):
Expand All @@ -123,17 +121,15 @@ def _getCustomKernelSource(self, useShortNames, kernel, CustomKernelDirectory):
return code


def getSourceFileString(self,
kernel,
useShortNames: bool=False) -> Tuple[int, str]:
def getSourceFileString(self, kernel) -> Tuple[int, str]:
assert kernel["KernelLanguage"] == "Assembly"
# Skip if .o files will have already been built for this file
if kernel.duplicate:
self.language = "ASM"
return (0, "") # should this be an non zero number

try:
code = self._getCustomKernelSource(useShortNames, kernel, CUSTOM_KERNEL_PATH) if isCustomKernelConfig(kernel) else self._getKernelSource(kernel)
code = self._getCustomKernelSource(kernel, CUSTOM_KERNEL_PATH) if isCustomKernelConfig(kernel) else self._getKernelSource(kernel)
errcode = 0
except RuntimeError as e:
printWarning(f"Failed to generate assembly source code for {kernel}: {e}")
Expand Down
Loading
Loading