Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/source/features/sampling.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ The PyTorch backend supports a wide variety of features, listed below:
There are two sampling backends available.

* Torch Sampler
* TRTLLM Sampler
* TRTLLM Sampler (deprecated)

Torch Sampler currently supports a superset of features of TRTLLM Sampler, and is intended as the long-term solution. One can specify which sampler to use explicitly with:
Torch Sampler is used by default and supports a superset of features of TRTLLM Sampler. TRTLLM Sampler will be removed in release 1.4.
One can specify which sampler to use explicitly with:

```python
from tensorrt_llm import LLM
Expand Down
170 changes: 125 additions & 45 deletions jenkins/TensorRT_LLM_PLC.groovy
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
@Library(['trtllm-jenkins-shared-lib@main']) _
import groovy.json.JsonSlurper

def createKubernetesPodConfig()
{
Expand Down Expand Up @@ -78,23 +79,30 @@ def getLLMRepo () {
def installTools() {
container("cpu") {
sh "apt update"
sh "apt install -y git git-lfs openjdk-17-jdk python3-dev python3-venv curl unzip wget"
sh "apt install -y git git-lfs openjdk-17-jdk python3-dev python3-venv curl zip unzip wget"
}
}

def validateBranchName(String branch) {
container("cpu") {
def rc = sh(script: "git check-ref-format --branch '${branch}'", returnStatus: true)
if (rc != 0) {
error("Invalid branch name: '${branch}'")
}
}
}

def checkoutSource ()
{
container("cpu") {
trtllm_utils.setupGitMirror()
stage("Checkout TRTLLM Source") {
def LLM_REPO = getLLMRepo()
sh "git config --global --add safe.directory ${env.WORKSPACE}"
trtllm_utils.checkoutSource(LLM_REPO, params.branchName, env.WORKSPACE, false, true)
}
def LLM_REPO = getLLMRepo()
sh "git config --global --add safe.directory ${env.WORKSPACE}"
trtllm_utils.checkoutSource(LLM_REPO, params.branchName, env.WORKSPACE, false, true)
}
}

def getPulseToken() {
def getPulseToken(serviceId, scopes) {
def token
//Configure credential 'starfleet-client-id' under Jenkins Credential Manager
withCredentials([usernamePassword(
Expand All @@ -104,10 +112,10 @@ def getPulseToken() {
)]) {
// Do not save AUTH_HEADER to a groovy variable since that
// will expose the auth_header without being masked
token= sh(script: '''
AUTH_HEADER=$(echo -n $SF_CLIENT_ID:$SF_CLIENT_SECRET | base64 -w0)
curl -s --request POST --header "Authorization: Basic $AUTH_HEADER" --header "Content-Type: application/x-www-form-urlencoded" "https://4ubglassowmtsi7ogqwarmut7msn1q5ynts62fwnr1i.ssa.nvidia.com/token?grant_type=client_credentials&scope=verify:nspectid%20sourcecode:blackduck%20update:report" | jq ".access_token" | tr -d '"'
''', returnStdout: true).trim()
token= sh(script: """
AUTH_HEADER=\$(echo -n \$SF_CLIENT_ID:\$SF_CLIENT_SECRET | base64 -w0)
curl -s --request POST --header "Authorization: Basic \$AUTH_HEADER" --header "Content-Type: application/x-www-form-urlencoded" "https://${serviceId}.ssa.nvidia.com/token?grant_type=client_credentials&scope=${scopes}" | jq ".access_token" | tr -d '"'
""", returnStdout: true).trim()
}
return token
}
Expand Down Expand Up @@ -169,22 +177,22 @@ def sonarScan()
}
}

def pulseScan(llmRepo, branchName) {
def pulseScanSourceCode(llmRepo, branchName) {
container("docker") {
sh "apk add jq curl"
def token = getPulseToken()
def token = getPulseToken("4ubglassowmtsi7ogqwarmut7msn1q5ynts62fwnr1i", "verify:nspectid%20sourcecode:blackduck%20update:report")
if (!token) {
throw new Exception("Invalid token get")
}
withCredentials([
usernamePassword(
credentialsId: "svc_tensorrt_gitlab_read_api_token",
usernameVariable: 'USERNAME',
passwordVariable: 'PASSWORD'
usernameVariable: 'GITLAB_USERNAME',
passwordVariable: 'GITLAB_PASSWORD'
),
string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL')
]) {
trtllm_utils.llmExecStepWithRetry(this, script: "docker login ${DEFAULT_GIT_URL}:5005 -u ${USERNAME} -p ${PASSWORD}")
trtllm_utils.llmExecStepWithRetry(this, script: "docker login ${DEFAULT_GIT_URL}:5005 -u ${GITLAB_USERNAME} -p ${GITLAB_PASSWORD}")
docker.withRegistry("https://${DEFAULT_GIT_URL}:5005") {
docker.image("pstooling/pulse-group/pulse-open-source-scanner/pulse-oss-cli:stable")
.inside("--user 0 --privileged -v /var/run/docker.sock:/var/run/docker.sock") {
Expand All @@ -207,25 +215,96 @@ def pulseScan(llmRepo, branchName) {
}
}
container("cpu") {
sh "cat nspect_scan_report.json"
sh 'unzip -p sbom.zip "*.json" > sbom_toupload.json'
sh "cat sbom_toupload.json"
def outputDir = "scan_report/source_code"
sh "mkdir -p ${outputDir}"
sh "unzip -p sbom.zip \"*.json\" > ${outputDir}/sbom.json"
sh "mv nspect_scan_report.json ${outputDir}/vulns.json"
}
}
def pulseScanContainer(llmRepo, branchName) {
// imageTags: key -> [image: <full image:tag>, platform: <platform or empty>]
def imageTags = [:]
container("cpu") {
def output = sh(
script: "python3 ./jenkins/scripts/get_image_key_to_tag.py ${params.branchName}",
returnStdout: true
).trim()
println(output)
def containerTagMap = new JsonSlurper().parseText(output)
imageTags["release_amd64"] = [image: containerTagMap["NGC Release Image amd64"], platform: "linux/amd64"]
imageTags["release_arm64"] = [image: containerTagMap["NGC Release Image arm64"], platform: "linux/arm64"]

def baseImage = sh(script: "grep -m1 '^ARG BASE_IMAGE=' docker/Dockerfile.multi | cut -d= -f2", returnStdout: true).trim()
def baseTag = sh(script: "grep -m1 '^ARG BASE_TAG=' docker/Dockerfile.multi | cut -d= -f2", returnStdout: true).trim()
imageTags["base_amd64"] = [image: "${baseImage}:${baseTag}", platform: "linux/amd64"]
imageTags["base_arm64"] = [image: "${baseImage}:${baseTag}", platform: "linux/arm64"]
}
container("docker") {
sh "apk add jq curl"
def token = getPulseToken("x9thwm-cootr2q1jdv5p7b8iw4fs4ob3x6nqqsoznyk", "nspect.verify%20scan.anchore")
if (!token) {
throw new Exception("Invalid token get")
}
withCredentials([
usernamePassword(
credentialsId: "svc_tensorrt_gitlab_read_api_token",
usernameVariable: 'GITLAB_USERNAME',
passwordVariable: 'GITLAB_PASSWORD'
),
usernamePassword(
credentialsId: "urm-artifactory-creds",
usernameVariable: 'URM_USERNAME',
passwordVariable: 'URM_PASSWORD'
),
string(credentialsId: 'default-git-url', variable: 'DEFAULT_GIT_URL'),
]) {
trtllm_utils.llmExecStepWithRetry(this, script: "docker login ${DEFAULT_GIT_URL}:5005 -u ${GITLAB_USERNAME} -p ${GITLAB_PASSWORD}")
trtllm_utils.llmExecStepWithRetry(this, script: "docker login urm.nvidia.com -u ${URM_USERNAME} -p ${URM_PASSWORD}")
docker.withRegistry("https://${DEFAULT_GIT_URL}:5005") {
docker.image("gitlab-master.nvidia.com:5005/pstooling/pulse-group/pulse-container-scanner/pulse-cli:5.1.0")
.inside("--user 0 --privileged -v /var/run/docker.sock:/var/run/docker.sock") {
withEnv([
"NSPECT_ID=NSPECT-95LK-6FZF",
"SSA_TOKEN=${token}",
]) {
imageTags.each { key, entry ->
def platform = entry.platform.replace("linux/", "")
def outputDir = "scan_report/${key}"
sh "mkdir -p ${outputDir}"
echo "Scanning ${key}: ${entry.image} (${entry.platform}) -> ${outputDir}"
sh "pulse-cli -n \$NSPECT_ID --ssa \$SSA_TOKEN scan-image -i ${entry.image} --platform ${entry.platform} --sbom=cyclonedx-json --output-dir=${outputDir} -o"
}
}
}
}
}
}
}

def processScanResults(branchName) {
container("cpu") {
def ELASTICSEARCH_POST_URL = "http://nvdataflow.nvidia.com/dataflow/swdl-tensorrt-infra-plc-scan/posting"
def ELASTICSEARCH_QUERY_URL = "https://gpuwa.nvidia.com/elasticsearch"
def TRTLLM_ES_INDEX_BASE = "df-swdl-tensorrt-infra-plc-scan"
def TRTLLM_ES_INDEX_PREAPPROVED_BASE = "df-swdl-tensorrt-infra-plc-container-pre-approve"
def jobPath = env.JOB_NAME.replaceAll("/", "%2F")
def pipelineUrl = "${env.JENKINS_URL}blue/organizations/jenkins/${jobPath}/detail/${jobPath}/${env.BUILD_NUMBER}/pipeline"
withCredentials([string(credentialsId: 'trtllm_plc_slack_webhook', variable: 'PLC_SLACK_WEBHOOK')]) {
def ELASTICSEARCH_POST_URL = "http://nvdataflow.nvidia.com/dataflow/swdl-tensorrt-infra-plc/posting"
def ELASTICSEARCH_QUERY_URL = "https://gpuwa.nvidia.com/elasticsearch"
def TRTLLM_ES_INDEX_BASE = "df-swdl-tensorrt-infra-plc"
def jobPath = env.JOB_NAME.replaceAll("/", "%2F")
def pipelineUrl = "${env.JENKINS_URL}blue/organizations/jenkins/${jobPath}/detail/${jobPath}/${env.BUILD_NUMBER}/pipeline"
withEnv([
"TRTLLM_ES_POST_URL=${ELASTICSEARCH_POST_URL}",
"TRTLLM_ES_QUERY_URL=${ELASTICSEARCH_QUERY_URL}",
"TRTLLM_ES_INDEX_BASE=${TRTLLM_ES_INDEX_BASE}",
"TRTLLM_ES_INDEX_PREAPPROVED_BASE=${TRTLLM_ES_INDEX_PREAPPROVED_BASE}",
"TRTLLM_PLC_WEBHOOK=${PLC_SLACK_WEBHOOK}"
]) {
sh """
python3 -m venv venv
venv/bin/pip install requests elasticsearch==7.13.4
venv/bin/python ./jenkins/scripts/submit_vulnerability_report.py --build-url ${pipelineUrl} --build-number ${env.BUILD_NUMBER} --branch ${params.branchName}
venv/bin/python ./jenkins/scripts/pulse_in_pipeline_scanning/main.py \
--build-url ${pipelineUrl} \
--build-number ${env.BUILD_NUMBER} \
--branch ${branchName} \
--report-directory ${pwd()}/scan_report
"""
}
}
Expand Down Expand Up @@ -256,7 +335,6 @@ pipeline {
// Jobs in other folders (e.g. personal/dev pipelines) will have no cron trigger.
parameterizedCron(env.JOB_NAME.startsWith('LLM/helpers/') ? '''
H 2 * * * %branchName=main;repoUrlKey=tensorrt_llm_github
H 3 * * * %branchName=release/1.2;repoUrlKey=tensorrt_llm_github
''' : '')
}
stages {
Expand All @@ -265,40 +343,42 @@ pipeline {
script {
installTools()
checkoutSource()
validateBranchName(params.branchName)
}
}
}
stage('Run TRT-LLM PLC Jobs') {
parallel {
stage("Source Code OSS Scanning"){
stages {
stage("Generate Lock Files"){
steps
{
script {
generateLockFiles(env.LLM_REPO, env.BRANCH_NAME)
}
}
stage("Source Code OSS Scanning") {
steps {
script {
generateLockFiles(env.LLM_REPO, env.BRANCH_NAME)
pulseScanSourceCode(env.LLM_REPO, env.BRANCH_NAME)
}
stage("Run Pulse Scanning"){
steps
{
script {
pulseScan(env.LLM_REPO, env.BRANCH_NAME)
}
}
}
}
stage("Run Container Scanning") {
steps {
script {
pulseScanContainer(env.LLM_REPO, env.BRANCH_NAME)
}
}
}
stage("SonarQube Code Analysis"){
steps
{
stage("SonarQube Code Analysis") {
steps {
script {
sonarScan()
}
}
}
}
}
stage("Process Scan Result") {
steps {
script {
processScanResults(env.BRANCH_NAME)
}
}
}
} // stages
} // pipeline
94 changes: 94 additions & 0 deletions jenkins/scripts/get_image_key_to_tag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import sys
import urllib.error
import urllib.request


def fetch_url(url):
try:
with urllib.request.urlopen(url) as response:
return response.status, response.read()
except urllib.error.HTTPError as e:
return e.code, None
except Exception as e:
print(f"Error fetching {url}: {e}", file=sys.stderr)
return None, None


def get_latest_build_number(jenkins_base):
for build_type in ("lastBuild", "lastCompletedBuild"):
url = f"{jenkins_base}/{build_type}/api/json"
status, data = fetch_url(url)
if status == 200 and data:
try:
return json.loads(data)["number"]
except (json.JSONDecodeError, KeyError):
pass
if build_type == "lastBuild":
print(
"Failed to get last build number. Trying last completed build...", file=sys.stderr
)
return None


def main():
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <branch_name>", file=sys.stderr)
sys.exit(1)

branch_name = sys.argv[1]
jenkins_base = (
f"https://prod.blsm.nvidia.com/sw-tensorrt-top-1/job/LLM/job/{branch_name}/job/L0_PostMerge"
)
artifactory_base = (
f"https://urm.nvidia.com/artifactory/sw-tensorrt-generic-local/"
f"llm-artifacts/LLM/{branch_name}/L0_PostMerge"
)

print(f"Fetching latest build number from Jenkins for branch: {branch_name}", file=sys.stderr)

build_number = get_latest_build_number(jenkins_base)
if build_number is None:
print(
f"Error: Could not determine the latest build number from {jenkins_base}",
file=sys.stderr,
)
sys.exit(1)

print(f"Latest build number: {build_number}", file=sys.stderr)

while build_number > 0:
artifact_url = f"{artifactory_base}/{build_number}/imageKeyToTag.json"
print(f"Fetching: {artifact_url}", file=sys.stderr)
status, data = fetch_url(artifact_url)
if status == 200 and data:
sys.stdout.write(data.decode())
sys.exit(0)
print(
f"Got HTTP {status} for build {build_number}, trying build {build_number - 1}...",
file=sys.stderr,
)
build_number -= 1

print("Error: Could not find imageKeyToTag.json in any recent build", file=sys.stderr)
sys.exit(1)


if __name__ == "__main__":
main()
Loading
Loading