From a8cfbb65031fa18595a6d102404a921ad4c40325 Mon Sep 17 00:00:00 2001 From: derrod Date: Thu, 3 Oct 2024 18:02:56 +0200 Subject: [PATCH 1/5] obs-nvenc-test: Add device architecture to device info --- .../obs-nvenc-test/obs-nvenc-test.cpp | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp index 211f6f15efced1..d32312f3ddc9d8 100644 --- a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp +++ b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp @@ -47,9 +47,25 @@ typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned); typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_GEN)(nvmlDevice_t, unsigned *); typedef nvmlReturn_t (*NVML_GET_DEVICE_PCIE_WIDTH)(nvmlDevice_t, unsigned *); typedef nvmlReturn_t (*NVML_GET_DEVICE_NAME)(nvmlDevice_t, char *, unsigned); +typedef nvmlReturn_t (*NVML_GET_DEVICE_ARCHITECTURE)(nvmlDevice_t, unsigned *); typedef nvmlReturn_t (*NVML_GET_ENCODER_SESSIONS)(nvmlDevice_t, unsigned *, void *); typedef nvmlReturn_t (*NVML_GET_ENCODER_CAPACITY)(nvmlDevice_t, nvmlEncoderType, unsigned *); typedef nvmlReturn_t (*NVML_GET_ENCODER_UTILISATION)(nvmlDevice_t, unsigned *, unsigned *); + +/* Only Kepler is defined in NVIDIA's documentation, but it's also the main one we care about. */ +constexpr uint32_t NVML_DEVICE_ARCH_KEPLER = 2; + +const unordered_map arch_to_name = { + {NVML_DEVICE_ARCH_KEPLER, "Kepler"}, + {3, "Kepler"}, + {4, "Maxwell"}, + {5, "Volta"}, + {6, "Turing"}, + {7, "Ampere"}, + {8, "Ada"}, + {9, "Hopper"}, +}; + /* List of capabilities to be queried per codec */ static const vector> capabilities = { {NV_ENC_CAPS_NUM_MAX_BFRAMES, "bframes"}, @@ -82,6 +98,7 @@ struct device_info { string cuda_uuid; string name; + uint32_t architecture; uint32_t pcie_gen; uint32_t pcie_width; @@ -105,6 +122,7 @@ struct NVML { NVML_GET_DEVICE_NAME getDeviceName; NVML_GET_DEVICE_PCIE_GEN getDevicePCIeGen; NVML_GET_DEVICE_PCIE_WIDTH getDevicePCIeWidth; + NVML_GET_DEVICE_ARCHITECTURE getDeviceArchitecture; NVML_GET_ENCODER_SESSIONS getEncoderSessions; NVML_GET_ENCODER_CAPACITY getEncoderCapacity; NVML_GET_ENCODER_UTILISATION getEncoderUtilisation; @@ -133,13 +151,14 @@ struct NVML { getDeviceName = (NVML_GET_DEVICE_NAME)load_nvml_func("nvmlDeviceGetName"); getDevicePCIeGen = (NVML_GET_DEVICE_PCIE_GEN)load_nvml_func("nvmlDeviceGetCurrPcieLinkGeneration"); getDevicePCIeWidth = (NVML_GET_DEVICE_PCIE_WIDTH)load_nvml_func("nvmlDeviceGetCurrPcieLinkWidth"); + getDeviceArchitecture = (NVML_GET_DEVICE_ARCHITECTURE)load_nvml_func("nvmlDeviceGetArchitecture"); getEncoderSessions = (NVML_GET_ENCODER_SESSIONS)load_nvml_func("nvmlDeviceGetEncoderSessions"); getEncoderCapacity = (NVML_GET_ENCODER_CAPACITY)load_nvml_func("nvmlDeviceGetEncoderCapacity"); getEncoderUtilisation = (NVML_GET_ENCODER_UTILISATION)load_nvml_func("nvmlDeviceGetEncoderUtilization"); if (!init || !shutdown || !getDriverVersion || !getDeviceHandleByPCIBusId || !getDeviceUUID || !getDeviceName || !getDevicePCIeGen || !getDevicePCIeWidth || !getEncoderSessions || - !getEncoderCapacity || !getEncoderUtilisation) { + !getEncoderCapacity || !getEncoderUtilisation || !getDeviceArchitecture) { return false; } @@ -298,6 +317,7 @@ static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, device_info nvml.getDevicePCIeGen(dev, &device_info.pcie_gen); nvml.getDevicePCIeWidth(dev, &device_info.pcie_width); nvml.getEncoderSessions(dev, &device_info.encoder_sessions, nullptr); + nvml.getDeviceArchitecture(dev, &device_info.architecture); nvml.getEncoderUtilisation(dev, &device_info.utilisation, &device_info.sample_period); nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_H264, &device_info.capacity_h264); nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_HEVC, &device_info.capacity_hevc); @@ -453,12 +473,17 @@ int check_thread() /* Per-device info (mostly for debugging) */ for (size_t idx = 0; idx < device_infos.size(); idx++) { const auto &info = device_infos[idx]; + string_view architecture = "Unknown"; + if (arch_to_name.count(info.architecture)) + architecture = arch_to_name.at(info.architecture); printf("\n[device.%zu]\n" "pci_id=%s\n" "nvml_uuid=%s\n" "cuda_uuid=%s\n" "name=%s\n" + "architecture=%u\n" + "architecture_name=%s\n" "pcie_link_width=%d\n" "pcie_link_gen=%d\n" "encoder_sessions=%u\n" @@ -468,8 +493,8 @@ int check_thread() "capacity_hevc=%u\n" "capacity_av1=%u\n", idx, info.pci_id.c_str(), info.nvml_uuid.c_str(), info.cuda_uuid.c_str(), info.name.c_str(), - info.pcie_width, info.pcie_gen, info.encoder_sessions, info.utilisation, info.sample_period, - info.capacity_h264, info.capacity_hevc, info.capacity_av1); + info.architecture, architecture.data(), info.pcie_width, info.pcie_gen, info.encoder_sessions, + info.utilisation, info.sample_period, info.capacity_h264, info.capacity_hevc, info.capacity_av1); for (const auto &[codec, codec_caps] : info.caps) { printf("\n[device.%zu.%s]\n", idx, codec.c_str()); From 7cfb41db882b9cc7afe05a2704781eaf2ad2a36c Mon Sep 17 00:00:00 2001 From: derrod Date: Thu, 3 Oct 2024 18:03:17 +0200 Subject: [PATCH 2/5] obs-nvenc-test: Query device info even if NVENC fails --- plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp index d32312f3ddc9d8..921c542f80099e 100644 --- a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp +++ b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp @@ -422,17 +422,17 @@ bool nvenc_checks(codec_caps_map &caps, vector &device_infos) return false; } - if (nvenc_ver < NVENC_CONFIGURED_VERSION) { - printf("reason=outdated_driver\n"); - return false; - } - device_infos.resize(cuda_devices); for (int idx = 0; idx < cuda_devices; idx++) { if (get_adapter_caps(idx, caps, device_infos[idx], nvml)) nvenc_devices++; } + if (nvenc_ver < NVENC_CONFIGURED_VERSION) { + printf("reason=outdated_driver\n"); + return false; + } + printf("nvenc_devices=%d\n", nvenc_devices); if (!nvenc_devices) { printf("reason=no_supported_devices\n"); From d2e6ec1328ef09855cbd9fa7d72658eff91ab4e9 Mon Sep 17 00:00:00 2001 From: derrod Date: Thu, 3 Oct 2024 19:24:46 +0200 Subject: [PATCH 3/5] obs-nvenc-test: Add error message for session limit being hit --- .../obs-nvenc-test/obs-nvenc-test.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp index 921c542f80099e..6251f47796d92d 100644 --- a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp +++ b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp @@ -249,7 +249,7 @@ struct NVSession { ~NVSession() { nv.nvEncDestroyEncoder(ptr); } - bool OpenSession(const CUDACtx &ctx) + NVENCSTATUS OpenSession(const CUDACtx &ctx) { NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params = {}; params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER; @@ -257,7 +257,7 @@ struct NVSession { params.device = ctx.ctx; params.deviceType = NV_ENC_DEVICE_TYPE_CUDA; - return nv.nvEncOpenEncodeSessionEx(¶ms, &ptr) == NV_ENC_SUCCESS; + return nv.nvEncOpenEncodeSessionEx(¶ms, &ptr); } }; @@ -293,7 +293,8 @@ static bool init_cuda() return true; } -static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, device_info &device_info, NVML &nvml) +static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, device_info &device_info, NVML &nvml, + bool &session_limit) { CUDACtx cudaCtx; NVSession nvSession; @@ -324,7 +325,9 @@ static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, device_info nvml.getEncoderCapacity(dev, NVML_ENCODER_QUERY_AV1, &device_info.capacity_av1); } - if (!nvSession.OpenSession(cudaCtx)) + auto res = nvSession.OpenSession(cudaCtx); + session_limit = session_limit || res == NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY; + if (res != NV_ENC_SUCCESS) return false; uint32_t guid_count = 0; @@ -390,6 +393,7 @@ bool nvenc_checks(codec_caps_map &caps, vector &device_infos) int cuda_devices = 0; int nvenc_devices = 0; char driver_ver[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE]; + bool session_limit = false; /* NVIDIA driver version */ if (nvml.getDriverVersion(driver_ver, sizeof(driver_ver)) == NVML_SUCCESS) { @@ -424,10 +428,15 @@ bool nvenc_checks(codec_caps_map &caps, vector &device_infos) device_infos.resize(cuda_devices); for (int idx = 0; idx < cuda_devices; idx++) { - if (get_adapter_caps(idx, caps, device_infos[idx], nvml)) + if (get_adapter_caps(idx, caps, device_infos[idx], nvml, session_limit)) nvenc_devices++; } + if (session_limit) { + printf("reason=session_limit\n"); + return false; + } + if (nvenc_ver < NVENC_CONFIGURED_VERSION) { printf("reason=outdated_driver\n"); return false; From 6b7b759c6cbccd2452cd2b33e928b63e1b08539b Mon Sep 17 00:00:00 2001 From: derrod Date: Fri, 18 Oct 2024 18:40:26 +0200 Subject: [PATCH 4/5] obs-nvenc-test: Add SDK 13.0 features and Blackwell --- .../obs-nvenc-test/obs-nvenc-test.cpp | 36 +++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp index 6251f47796d92d..e9631fd56b7c23 100644 --- a/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp +++ b/plugins/obs-nvenc/obs-nvenc-test/obs-nvenc-test.cpp @@ -64,6 +64,7 @@ const unordered_map arch_to_name = { {7, "Ampere"}, {8, "Ada"}, {9, "Hopper"}, + {10, "Blackwell"}, }; /* List of capabilities to be queried per codec */ @@ -83,6 +84,11 @@ static const vector> capabilities = { /* SDK 12.2+ features */ {NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER, "temporal_filter"}, {NV_ENC_CAPS_SUPPORT_LOOKAHEAD_LEVEL, "lookahead_level"}, + {NV_ENC_CAPS_SUPPORT_UNIDIRECTIONAL_B, "unidirectional_b"}, +#endif +#if NVENCAPI_MAJOR_VERSION >= 13 + /* SDK 13.0+ features */ + {NV_ENC_CAPS_SUPPORT_YUV422_ENCODE, "yuv_422"}, #endif }; @@ -363,9 +369,22 @@ static bool get_adapter_caps(int adapter_idx, codec_caps_map &caps, device_info continue; device_info.caps[codec_name][name] = v; - if (v > caps[codec_name][name]) - caps[codec_name][name] = v; + caps[codec_name][name] = std::max(v, caps[codec_name][name]); } + +#if NVENCAPI_MAJOR_VERSION > 12 || NVENCAPI_MINOR_VERSION >= 2 + /* Explicitly check if UHQ tuning is supported since temporal filtering query is true for all codecs. */ + NV_ENC_PRESET_CONFIG preset_config = {}; + preset_config.version = NV_ENC_PRESET_CONFIG_VER; + preset_config.presetCfg.version = NV_ENC_CONFIG_VER; + + NVENCSTATUS res = nv.nvEncGetEncodePresetConfigEx(nvSession.ptr, *guid, NV_ENC_PRESET_P7_GUID, + NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY, + &preset_config); + + device_info.caps[codec_name]["uhq"] = res == NV_ENC_SUCCESS ? 1 : 0; + caps[codec_name]["uhq"] = std::max(device_info.caps[codec_name]["uhq"], caps[codec_name]["uhq"]); +#endif } return true; @@ -448,6 +467,19 @@ bool nvenc_checks(codec_caps_map &caps, vector &device_infos) return false; } + uint32_t latest_architecture = 0; + string_view architecture = "Unknown"; + + for (auto &info : device_infos) + latest_architecture = std::max(info.architecture, latest_architecture); + + if (arch_to_name.count(latest_architecture)) + architecture = arch_to_name.at(latest_architecture); + + printf("latest_architecture=%u\n" + "latest_architecture_name=%s\n", + latest_architecture, architecture.data()); + return true; } From b7bc77d3b8cfa1ca6ed615fc36fbef74b03148ed Mon Sep 17 00:00:00 2001 From: derrod Date: Fri, 18 Oct 2024 18:40:37 +0200 Subject: [PATCH 5/5] obs-nvenc: Add SDK 13.0 features --- plugins/obs-nvenc/data/locale/en-US.ini | 1 + plugins/obs-nvenc/nvenc-helpers.c | 2 + plugins/obs-nvenc/nvenc-helpers.h | 11 +++- plugins/obs-nvenc/nvenc-internal.h | 5 ++ plugins/obs-nvenc/nvenc-opts-parser.c | 6 ++ plugins/obs-nvenc/nvenc-properties.c | 19 ++++-- plugins/obs-nvenc/nvenc.c | 83 ++++++++++++++++++++++++- 7 files changed, 117 insertions(+), 10 deletions(-) diff --git a/plugins/obs-nvenc/data/locale/en-US.ini b/plugins/obs-nvenc/data/locale/en-US.ini index f1217c56e7a8a0..952a5671f39143 100644 --- a/plugins/obs-nvenc/data/locale/en-US.ini +++ b/plugins/obs-nvenc/data/locale/en-US.ini @@ -50,6 +50,7 @@ SplitEncode.Auto="Auto" SplitEncode.Disabled="Disabled" SplitEncode.Enabled="Two-way split" SplitEncode.ThreeWay="Three-way split" +SplitEncode.FourWay="Four-way split" Opts="Custom Encoder Options" Opts.TT="Space-separated list of options to apply to the rate control and codec settings,\nbased their names in the nvEncodeAPI header.\ne.g. \"lookaheadDepth=16 aqStrength=4\"" diff --git a/plugins/obs-nvenc/nvenc-helpers.c b/plugins/obs-nvenc/nvenc-helpers.c index c6ea42131a9571..91fac5cbd6e120 100644 --- a/plugins/obs-nvenc/nvenc-helpers.c +++ b/plugins/obs-nvenc/nvenc-helpers.c @@ -281,6 +281,8 @@ static void read_codec_caps(config_t *config, enum codec_type codec, const char caps->temporal_aq = config_get_bool(config, section, "temporal_aq"); caps->ten_bit = config_get_bool(config, section, "10bit"); caps->four_four_four = config_get_bool(config, section, "yuv_444"); + caps->four_two_two = config_get_bool(config, section, "yuv_422"); + caps->uhq = config_get_bool(config, section, "uhq"); } static bool nvenc_check(void) diff --git a/plugins/obs-nvenc/nvenc-helpers.h b/plugins/obs-nvenc/nvenc-helpers.h index c3c0275074457b..7b2abe1010137d 100644 --- a/plugins/obs-nvenc/nvenc-helpers.h +++ b/plugins/obs-nvenc/nvenc-helpers.h @@ -18,6 +18,10 @@ #define NVENC_12_2_OR_LATER #endif +#if NVENCAPI_MAJOR_VERSION >= 13 +#define NVENC_13_0_OR_LATER +#endif + enum codec_type { CODEC_H264, CODEC_HEVC, @@ -46,18 +50,19 @@ struct encoder_caps { int max_width; int max_height; - /* These don't seem to work correctly, thanks NVIDIA. */ - int temporal_filter; - int lookahead_level; + int temporal_filter; /* Broken prior to the 551.21 driver. */ + int lookahead_level; /* Broken prior to the 570.20 driver. */ bool dyn_bitrate; bool lookahead; bool lossless; bool temporal_aq; + bool uhq; /* Yeah... */ bool ten_bit; bool four_four_four; + bool four_two_two; }; typedef NVENCSTATUS(NVENCAPI *NV_CREATE_INSTANCE_FUNC)(NV_ENCODE_API_FUNCTION_LIST *); diff --git a/plugins/obs-nvenc/nvenc-internal.h b/plugins/obs-nvenc/nvenc-internal.h index 531ae9b222faf2..05645e82a25885 100644 --- a/plugins/obs-nvenc/nvenc-internal.h +++ b/plugins/obs-nvenc/nvenc-internal.h @@ -107,6 +107,11 @@ struct nvenc_data { size_t roi_map_size; uint32_t roi_increment; +#ifdef NVENC_13_0_OR_LATER + CONTENT_LIGHT_LEVEL *cll; + MASTERING_DISPLAY_INFO *mdi; +#endif + struct nvenc_properties props; CUcontext cu_ctx; diff --git a/plugins/obs-nvenc/nvenc-opts-parser.c b/plugins/obs-nvenc/nvenc-opts-parser.c index 4e30b46817f30d..88945a461a83b5 100644 --- a/plugins/obs-nvenc/nvenc-opts-parser.c +++ b/plugins/obs-nvenc/nvenc-opts-parser.c @@ -129,6 +129,9 @@ static bool apply_h264_opt(struct obs_option *opt, NV_ENC_CONFIG_H264 *nv_conf) APPLY_INT_OPT(idrPeriod, uint32_t, PRIu32) APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE, PRIu32) +#ifdef NVENC_13_0_OR_LATER + APPLY_INT_OPT(tfLevel, NV_ENC_TEMPORAL_FILTER_LEVEL, PRIu32) +#endif APPLY_BIT_OPT(enableFillerDataInsertion, 1) @@ -162,6 +165,9 @@ static bool apply_av1_opt(struct obs_option *opt, NV_ENC_CONFIG_AV1 *nv_conf) APPLY_INT_OPT(numTileRows, uint32_t, PRIu32) APPLY_INT_OPT(idrPeriod, uint32_t, PRIu32) APPLY_INT_OPT(useBFramesAsRef, NV_ENC_BFRAME_REF_MODE, PRIu32) +#ifdef NVENC_13_0_OR_LATER + APPLY_INT_OPT(tfLevel, NV_ENC_TEMPORAL_FILTER_LEVEL, PRIu32) +#endif APPLY_BIT_OPT(enableBitstreamPadding, 1) diff --git a/plugins/obs-nvenc/nvenc-properties.c b/plugins/obs-nvenc/nvenc-properties.c index b7228544b39315..6a553f8287a656 100644 --- a/plugins/obs-nvenc/nvenc-properties.c +++ b/plugins/obs-nvenc/nvenc-properties.c @@ -157,13 +157,10 @@ obs_properties_t *nvenc_properties_internal(enum codec_type codec) OBS_COMBO_FORMAT_STRING); #define add_tune(val) obs_property_list_add_string(p, obs_module_text("Tuning." val), val) -#ifdef NVENC_12_2_OR_LATER - /* The UHQ tune is only supported on Turing or later. - * It uses the temporal filtering feature, so we can use its - * availability as an indicator that we are on a supported GPU. */ - if (codec == CODEC_HEVC && caps->temporal_filter) + /* The UHQ tune is only supported on Turing or later. */ + if (caps->uhq) add_tune("uhq"); -#endif + add_tune("hq"); add_tune("ll"); add_tune("ull"); @@ -189,6 +186,10 @@ obs_properties_t *nvenc_properties_internal(enum codec_type codec) } else if (codec == CODEC_AV1) { add_profile("main"); } else { +#ifdef NVENC_13_0_OR_LATER + if (caps->ten_bit) + add_profile("high10"); +#endif add_profile("high"); add_profile("main"); add_profile("baseline"); @@ -244,6 +245,12 @@ obs_properties_t *nvenc_properties_internal(enum codec_type codec) obs_property_list_add_int(p, obs_module_text("SplitEncode.ThreeWay"), NV_ENC_SPLIT_THREE_FORCED_MODE); } +#ifdef NVENC_13_0_OR_LATER + if (caps->engines > 3) { + obs_property_list_add_int(p, obs_module_text("SplitEncode.FourWay"), + NV_ENC_SPLIT_FOUR_FORCED_MODE); + } +#endif } #endif diff --git a/plugins/obs-nvenc/nvenc.c b/plugins/obs-nvenc/nvenc.c index 9d2e54e9e5a8ed..ab1ec8621eb721 100644 --- a/plugins/obs-nvenc/nvenc.c +++ b/plugins/obs-nvenc/nvenc.c @@ -213,6 +213,11 @@ static bool is_10_bit(const struct nvenc_data *enc) : obs_encoder_video_tex_active(enc->encoder, VIDEO_FORMAT_P010); } +static bool is_hdr(const enum video_colorspace space) +{ + return space == VIDEO_CS_2100_HLG || space == VIDEO_CS_2100_PQ; +} + static bool init_encoder_base(struct nvenc_data *enc, obs_data_t *settings) { UNUSED_PARAMETER(settings); @@ -480,6 +485,13 @@ static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings) if (enc->in_format == VIDEO_FORMAT_I444) { config->profileGUID = NV_ENC_H264_PROFILE_HIGH_444_GUID; h264_config->chromaFormatIDC = 3; +#ifdef NVENC_13_0_OR_LATER + } else if (astrcmpi(enc->props.profile, "high10") == 0) { + config->profileGUID = NV_ENC_H264_PROFILE_HIGH_10_GUID; + } else if (is_10_bit(enc)) { + warn("Forcing high10 for P010"); + config->profileGUID = NV_ENC_H264_PROFILE_HIGH_10_GUID; +#endif } else if (astrcmpi(enc->props.profile, "main") == 0) { config->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID; } else if (astrcmpi(enc->props.profile, "baseline") == 0) { @@ -488,6 +500,14 @@ static bool init_encoder_h264(struct nvenc_data *enc, obs_data_t *settings) config->profileGUID = NV_ENC_H264_PROFILE_HIGH_GUID; } +#ifdef NVENC_13_0_OR_LATER + /* Note: Only supported on Blackwell! */ + h264_config->inputBitDepth = is_10_bit(enc) ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8; + h264_config->outputBitDepth = memcmp(&config->profileGUID, &NV_ENC_H264_PROFILE_HIGH_10_GUID, sizeof(GUID)) == 0 + ? NV_ENC_BIT_DEPTH_10 + : NV_ENC_BIT_DEPTH_8; +#endif + if (!apply_user_args(enc)) { obs_encoder_set_last_error(enc->encoder, obs_module_text("Opts.Invalid")); return false; @@ -586,7 +606,7 @@ static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings) config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; profile_is_10bpc = true; } else if (is_10_bit(enc)) { - blog(LOG_WARNING, "[obs-nvenc] Forcing main10 for P010"); + warn("Forcing main10 for P010"); config->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID; profile_is_10bpc = true; } else { @@ -600,6 +620,13 @@ static bool init_encoder_hevc(struct nvenc_data *enc, obs_data_t *settings) hevc_config->outputBitDepth = profile_is_10bpc ? NV_ENC_BIT_DEPTH_10 : NV_ENC_BIT_DEPTH_8; #endif +#ifdef NVENC_13_0_OR_LATER + if (is_10_bit(enc) && is_hdr(voi->colorspace)) { + hevc_config->outputMasteringDisplay = 1; + hevc_config->outputMaxCll = 1; + } +#endif + if (!apply_user_args(enc)) { obs_encoder_set_last_error(enc->encoder, obs_module_text("Opts.Invalid")); return false; @@ -687,6 +714,13 @@ static bool init_encoder_av1(struct nvenc_data *enc, obs_data_t *settings) av1_config->numBwdRefs = 1; av1_config->repeatSeqHdr = 1; +#ifdef NVENC_13_0_OR_LATER + if (is_10_bit(enc) && is_hdr(voi->colorspace)) { + av1_config->outputMasteringDisplay = 1; + av1_config->outputMaxCll = 1; + } +#endif + if (!apply_user_args(enc)) { obs_encoder_set_last_error(enc->encoder, obs_module_text("Opts.Invalid")); return false; @@ -773,6 +807,31 @@ static bool init_encoder(struct nvenc_data *enc, enum codec_type codec, obs_data } } +#ifdef NVENC_13_0_OR_LATER + const bool pq = voi->colorspace == VIDEO_CS_2100_PQ; + const bool hlg = voi->colorspace == VIDEO_CS_2100_HLG; + if (pq || hlg) { + enc->cll = bzalloc(sizeof(CONTENT_LIGHT_LEVEL)); + enc->mdi = bzalloc(sizeof(MASTERING_DISPLAY_INFO)); + const uint16_t hdr_nominal_peak_level = pq ? (uint16_t)obs_get_video_hdr_nominal_peak_level() + : (hlg ? 1000 : 0); + /* Currently these are hardcoded across all encoders. */ + enc->mdi->r.x = 13250; + enc->mdi->r.y = 34500; + enc->mdi->g.x = 7500; + enc->mdi->g.y = 3000; + enc->mdi->b.x = 34000; + enc->mdi->b.y = 16000; + enc->mdi->whitePoint.x = 15635; + enc->mdi->whitePoint.y = 16450; + enc->mdi->maxLuma = hdr_nominal_peak_level * 10000; + enc->mdi->minLuma = 0; + + enc->cll->maxContentLightLevel = hdr_nominal_peak_level; + enc->cll->maxPicAverageLightLevel = hdr_nominal_peak_level; + } +#endif + switch (enc->codec) { case CODEC_HEVC: return init_encoder_hevc(enc, settings); @@ -985,6 +1044,13 @@ static void nvenc_destroy(void *data) bfree(enc->sei); bfree(enc->roi_map); +#ifdef NVENC_13_0_OR_LATER + if (enc->mdi) + bfree(enc->mdi); + if (enc->cll) + bfree(enc->cll); +#endif + deque_free(&enc->dts_list); da_free(enc->surfaces); @@ -1204,6 +1270,21 @@ bool nvenc_encode_base(struct nvenc_data *enc, struct nv_bitstream *bs, void *pi : NV_ENC_BUFFER_FORMAT_NV12; } +#ifdef NVENC_13_0_OR_LATER + if (enc->cll) { + if (enc->codec == CODEC_AV1) + params.codecPicParams.av1PicParams.pMaxCll = enc->cll; + else if (enc->codec == CODEC_HEVC) + params.codecPicParams.hevcPicParams.pMaxCll = enc->cll; + } + if (enc->mdi) { + if (enc->codec == CODEC_AV1) + params.codecPicParams.av1PicParams.pMasteringDisplay = enc->mdi; + else if (enc->codec == CODEC_HEVC) + params.codecPicParams.hevcPicParams.pMasteringDisplay = enc->mdi; + } +#endif + /* Add ROI map if enabled */ if (obs_encoder_has_roi(enc->encoder)) add_roi(enc, ¶ms);