Benchmarks: micro benchmark - source code for evaluating NVDEC decoding performance (#560)

**Description**
source code for evaluating NVDEC decoding performance.

---------

Co-authored-by: yukirora <yuting.jiang@microsoft.com>
This commit is contained in:
Yuting Jiang 2023-08-22 18:56:33 +08:00 коммит произвёл GitHub
Родитель 6c0205cece
Коммит 27a10811af
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
19 изменённых файлов: 5208 добавлений и 5 удалений

Просмотреть файл

@ -11,7 +11,7 @@ pool:
container:
image: nvcr.io/nvidia/pytorch:20.12-py3
options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker'
options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
steps:
- script: |
@ -21,6 +21,8 @@ steps:
python3 -m pip install --upgrade pip setuptools==65.7
python3 -m pip install .[test,nvworker]
make postinstall
sudo DEBIAN_FRONTEND=noninteractive apt-get update
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev
displayName: Install dependencies
- script: |
python3 setup.py lint
@ -31,7 +33,7 @@ steps:
- script: |
SB_MICRO_PATH=$PWD python3 setup.py test
displayName: Run unit tests
timeoutInMinutes: 15
timeoutInMinutes: 30
- script: |
bash <(curl -s https://codecov.io/bash) -cF cuda-unit-test
displayName: Report coverage results

4
.github/workflows/codeql-analysis.yml поставляемый
Просмотреть файл

@ -49,6 +49,10 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Install Dependency
run: |
DEBIAN_FRONTEND=noninteractive apt-get update
DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev sudo
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:

3
.gitignore поставляемый
Просмотреть файл

@ -9,9 +9,6 @@ __pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/

Просмотреть файл

@ -26,13 +26,18 @@ RUN apt-get update && \
build-essential \
curl \
dmidecode \
ffmpeg \
git \
iproute2 \
jq \
libaio-dev \
libavcodec-dev \
libavformat-dev \
libavutil-dev \
libcap2 \
libnuma-dev \
libpci-dev \
libswresample-dev \
libtinfo5 \
libtool \
lshw \

Просмотреть файл

@ -25,14 +25,19 @@ RUN apt-get update && \
build-essential \
curl \
dmidecode \
ffmpeg \
git \
iproute2 \
jq \
libaio-dev \
libavcodec-dev \
libavformat-dev \
libavutil-dev \
libboost-program-options-dev \
libcap2 \
libnuma-dev \
libpci-dev \
libswresample-dev \
libtinfo5 \
libtool \
lshw \

Просмотреть файл

@ -0,0 +1,454 @@
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#include <algorithm>
#include <chrono>
#include <cuda.h>
#include <cudaProfiler.h>
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <stdio.h>
#include <string.h>
#include <string>
#include <thread>
#include "../Utils/FFmpegDemuxer.h"
#include "../Utils/NvCodecUtils.h"
#include "OptimizedNvDecoder.h"
#include "ThreadPoolUtils.h"
// Define logger which need in third party utils
simplelogger::Logger *logger = simplelogger::LoggerFactory::CreateConsoleLogger();
// Define the codec map
std::map<std::string, cudaVideoCodec_enum> codecMap = {
{"mpeg1", cudaVideoCodec_MPEG1}, {"mpeg2", cudaVideoCodec_MPEG2}, {"mpeg4", cudaVideoCodec_MPEG4},
{"vc1", cudaVideoCodec_VC1}, {"h264", cudaVideoCodec_H264}, {"jpeg", cudaVideoCodec_JPEG},
{"h264_svc", cudaVideoCodec_H264_SVC}, {"h264_mvc", cudaVideoCodec_H264_MVC}, {"hevc", cudaVideoCodec_HEVC},
{"vp8", cudaVideoCodec_VP8}, {"vp9", cudaVideoCodec_VP9}, {"av1", cudaVideoCodec_AV1}};
/**
* @brief Function to decode video file using OptimizedNvDecoder interface
* @param pDec - Handle to OptimizedNvDecoder
* @param demuxer - Pointer to an FFmpegDemuxer instance
* @param pnFrame - Variable to record the number of frames decoded
* @param ex - Stores current exception in case of failure
*/
void DecProc(OptimizedNvDecoder *pDec, const char *szInFilePath, int *pnFrame, std::exception_ptr &ex) {
try {
std::unique_ptr<FFmpegDemuxer> demuxer(new FFmpegDemuxer(szInFilePath));
int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;
uint8_t *pVideo = NULL, *pFrame = NULL;
do {
// Demux video from file using FFmpegDemuxer
demuxer->Demux(&pVideo, &nVideoBytes);
// Decode the video frame from demuxed packet
nFrameReturned = pDec->Decode(pVideo, nVideoBytes);
if (!nFrame && nFrameReturned)
LOG(INFO) << pDec->GetVideoInfo();
nFrame += nFrameReturned;
} while (nVideoBytes);
*pnFrame = nFrame;
} catch (std::exception &) {
ex = std::current_exception();
}
}
/**
* @brief Function to show help message and exit
*/
void ShowHelpAndExit(const char *szBadOption = NULL) {
std::ostringstream oss;
bool bThrowError = false;
if (szBadOption) {
bThrowError = true;
oss << "Error parsing \"" << szBadOption << "\"" << std::endl;
}
oss << "Options:" << std::endl
<< "-i Input file path. No default value. One of -i and -multi_input is required." << std::endl
<< "-o Output file path of raw data. No default value. Optional." << std::endl
<< "-gpu Ordinal of GPU to use. Default 0. Optional." << std::endl
<< "-thread Number of decoding thread. Default 5. Optional." << std::endl
<< "-total Number of total video to test. Default 100. Optional." << std::endl
<< "-single (No value) Use single cuda context for every thread. Default is multi-context, one context "
"per thread."
<< std::endl
<< "-host (No value) Copy frame to host memory .Default is device memory)" << std::endl
<< "-multi_input The file path which lists the path of multiple video in each line." << std::endl
<< "-codec The codec of video to test. Default H264." << std::endl;
if (bThrowError) {
throw std::invalid_argument(oss.str());
} else {
std::cout << oss.str();
exit(0);
}
}
/**
* @brief Function to parse commandline arguments
*/
void ParseCommandLine(int argc, char *argv[], char *szInputFileName, int &iGpu, int &nThread, int &nTotalVideo,
bool &bSingle, bool &bHost, std::string &inputFilesListPath, std::string &outputFile,
cudaVideoCodec &codec) {
for (int i = 1; i < argc; i++) {
if (!_stricmp(argv[i], "-h")) {
ShowHelpAndExit();
}
if (!_stricmp(argv[i], "-i")) {
if (++i == argc) {
ShowHelpAndExit("-i");
}
sprintf(szInputFileName, "%s", argv[i]);
continue;
}
if (!_stricmp(argv[i], "-o")) {
if (++i == argc) {
ShowHelpAndExit("-o");
}
outputFile = std::string(argv[i]);
continue;
}
if (!_stricmp(argv[i], "-gpu")) {
if (++i == argc) {
ShowHelpAndExit("-gpu");
}
iGpu = atoi(argv[i]);
continue;
}
if (!_stricmp(argv[i], "-thread")) {
if (++i == argc) {
ShowHelpAndExit("-thread");
}
nThread = atoi(argv[i]);
continue;
}
if (!_stricmp(argv[i], "-total")) {
if (++i == argc) {
ShowHelpAndExit("-total");
}
nTotalVideo = atoi(argv[i]);
continue;
}
if (!_stricmp(argv[i], "-multi_input")) {
if (++i == argc) {
ShowHelpAndExit("-multi_input");
}
inputFilesListPath = std::string(argv[i]);
continue;
}
if (!_stricmp(argv[i], "-single")) {
bSingle = true;
continue;
}
if (!_stricmp(argv[i], "-host")) {
bHost = true;
continue;
}
if (!_stricmp(argv[i], "-codec")) {
if (++i == argc) {
ShowHelpAndExit("-codec");
}
std::string codecName = std::string(argv[i]);
std::transform(codecName.begin(), codecName.end(), codecName.begin(),
[](unsigned char c) { return std::tolower(c); });
if (codecMap.find(codecName) != codecMap.end()) {
codec = codecMap[codecName];
} else {
std::cout << "Codec name not found in the map." << std::endl;
exit(1);
}
continue;
}
ShowHelpAndExit(argv[i]);
}
}
/**
* @brief Function to create cuda context and initialize decoder
*/
OptimizedNvDecoder *InitOptimizedNvDecoder(int i, const CUdevice &cuDevice, CUcontext &cuContext, bool bSingle,
bool bHost, cudaVideoCodec codec, CUVIDDECODECAPS decodecaps) {
if (!bSingle) {
ck(cuCtxCreate(&cuContext, 0, cuDevice));
}
OptimizedNvDecoder *sessionObject = new OptimizedNvDecoder(cuContext, !bHost, codec, decodecaps);
sessionObject->setDecoderSessionID(i);
return sessionObject;
}
/**
* @brief Function to decode a video in a thread and measure the latency
*/
double DecodeVideo(size_t i, const std::vector<OptimizedNvDecoder *> &vDec, const char *szInFilePath, int *pnFrame,
std::exception_ptr &ex) {
try {
OptimizedNvDecoder *pDec = vDec[i];
auto start = std::chrono::high_resolution_clock::now();
DecProc(pDec, szInFilePath, pnFrame, ex);
auto end = std::chrono::high_resolution_clock::now();
auto elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
std::cout << "Decode finished --"
<< " duration:" << elapsedTime << " frames:" << *pnFrame << std::endl;
return elapsedTime / 1000.0f;
} catch (const std::exception &e) {
std::cerr << "Exception in deocding: " << e.what() << std::endl;
return 0;
}
}
/**
* @brief Function to read the video paths from a file
*/
std::vector<std::string> ReadMultipleVideoFiles(const std::string &filepath) {
std::ifstream file(filepath);
if (!file) {
std::cerr << "Error opening the file." << std::endl;
exit(1);
}
std::string line;
std::vector<std::string> tokens;
while (std::getline(file, line)) {
tokens.push_back(line);
}
file.close();
return tokens;
}
/**
* @brief Function to get the decoder capability
*/
void GetDefaultDecoderCaps(CUVIDDECODECAPS &decodecaps, cudaVideoCodec codec) {
memset(&decodecaps, 0, sizeof(decodecaps));
decodecaps.eCodecType = codec;
decodecaps.eChromaFormat = cudaVideoChromaFormat_420;
decodecaps.nBitDepthMinus8 = 0;
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
}
/**
* @brief Function to initialize the cuda device, cuda context, query the decoder capability and create decoder for
* each thread
*/
void InitializeContext(std::vector<OptimizedNvDecoder *> &vDec, int iGpu, int nThread, bool bSingle, bool bHost,
cudaVideoCodec codec) {
ck(cuInit(0));
int nGpu = 0;
ck(cuDeviceGetCount(&nGpu));
if (iGpu < 0 || iGpu >= nGpu) {
std::cout << "GPU ordinal out of range. Should be within [" << 0 << ", " << nGpu - 1 << "]" << std::endl;
exit(1);
}
CUdevice cuDevice = 0;
ck(cuDeviceGet(&cuDevice, iGpu));
char szDeviceName[80];
ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice));
std::cout << "GPU in use: " << szDeviceName << std::endl;
CUcontext cuContext = NULL;
ck(cuCtxCreate(&cuContext, 0, cuDevice));
CUVIDDECODECAPS decodecaps;
GetDefaultDecoderCaps(decodecaps, codec);
ThreadPool threadPool(nThread);
std::vector<std::future<OptimizedNvDecoder *>> futures;
for (int i = 0; i < nThread; i++) {
futures.push_back(
threadPool.enqueue(InitOptimizedNvDecoder, cuDevice, cuContext, bSingle, bHost, codec, decodecaps));
}
for (auto &future : futures) {
vDec.push_back(future.get()); // Retrieve the results from each task
}
}
/**
* @brief Function to write the latency and FPS data of each video to a file
*/
void WriteRawData(std::vector<OptimizedNvDecoder *> &vDec, int nThread, const std::vector<double> &data,
std::vector<int> &frames, std::string filename) {
// Open the output file stream
std::ofstream outputFile(filename);
outputFile << "Frame Latency" << std::endl;
for (int i = 0; i < nThread; i++) {
for (const auto &tuple : vDec[i]->GetFrameLatency()) {
int frame = std::get<0>(tuple);
double latency = std::get<1>(tuple);
outputFile << "Frame: " << frame << ", Latency: " << latency << std::endl;
}
}
outputFile << "Video Latency" << std::endl;
for (int i = 0; i < data.size(); i++) {
outputFile << data[i] << std::endl;
}
outputFile << "Video FPS" << std::endl;
for (int i = 0; i < data.size(); i++) {
outputFile << frames[i] / data[i] << std::endl;
}
// Close the file stream
outputFile.close();
}
/**
* @brief Function to calculate the statistical metrics
*/
std::tuple<double, double, double, double, double, double, double, double>
CalMetrics(const std::vector<double> &originData) {
std::vector<double> data = originData;
double sum = std::accumulate(data.begin(), data.end(), 0.0);
double mean = sum / data.size();
double min = *std::min_element(data.begin(), data.end());
double max = *std::max_element(data.begin(), data.end());
std::sort(data.begin(), data.end());
double p50 = data[data.size() / 2];
double p90 = data[static_cast<size_t>(data.size() * 0.9)];
double p95 = data[static_cast<size_t>(data.size() * 0.95)];
double p99 = data[static_cast<size_t>(data.size() * 0.99)];
return std::make_tuple(sum, mean, min, max, p50, p90, p95, p99);
}
/**
* @brief Function to generate the total file list for the given total number of videos.
* If the number of videos is less than the total number of videos, the list will be repeated.
* If the number of videos is greater than the total number of videos, the list will be truncated.
*/
std::vector<std::string> GenerateTotalFileList(const std::string &inputFilesListPath, int nTotalVideo,
const char *szInFilePath) {
std::vector<std::string> files;
if (inputFilesListPath.size() != 0) {
auto videofiles = ReadMultipleVideoFiles(inputFilesListPath);
int smallerSize = videofiles.size();
if (nTotalVideo > smallerSize) {
int numIterations = nTotalVideo / smallerSize;
for (int i = 0; i < numIterations; i++) {
files.insert(files.end(), videofiles.begin(), videofiles.end());
}
int remainingElements = nTotalVideo - (numIterations * smallerSize);
files.insert(files.end(), videofiles.begin(), videofiles.begin() + remainingElements);
} else {
files = std::vector<std::string>(videofiles.begin(), videofiles.begin() + nTotalVideo);
}
std::cout << "Multifile mode - " << nTotalVideo << "videos will be decoded" << std::endl;
} else {
for (int i = 0; i < nTotalVideo; i++) {
files.push_back(std::string(szInFilePath));
}
}
return files;
}
/**
* @brief Function to run the decoding tasks in parallel with thread pool to decode all the videos and record the total
* latency and the total number of frames
*/
float run(std::vector<OptimizedNvDecoder *> &vDec, int nThread, std::vector<std::string> &files,
std::vector<int> &vnFrame, std::vector<std::exception_ptr> &vExceptionPtrs, int *nTotalFrames,
std::vector<double> &vnLatency, std::vector<double> &frLatency, std::vector<double> &vnFPS) {
std::vector<std::future<double>> decodeLatencyFutures;
ThreadPool threadPool(nThread);
// Enqueue the video decoding task into thread pool
auto start = std::chrono::high_resolution_clock::now();
for (int i = 0; i < files.size(); i++) {
auto filePath = files[i].c_str();
CheckInputFile(filePath);
decodeLatencyFutures.push_back(
threadPool.enqueue(DecodeVideo, vDec, filePath, &vnFrame[i], std::ref(vExceptionPtrs[i])));
}
// Wait until decoding tasks finished
for (int i = 0; i < files.size(); i++) {
auto decodeLatency = decodeLatencyFutures[i].get();
vnLatency.push_back(decodeLatency);
*nTotalFrames += vnFrame[i];
}
auto elapsedTime =
(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start)
.count()) /
1000.0f;
for (int i = 0; i < nThread; i++) {
for (const auto &tuple : vDec[i]->GetFrameLatency()) {
int frame = std::get<0>(tuple);
double latency = std::get<1>(tuple);
if (frame > 0) {
frLatency.push_back(latency / frame);
}
}
}
for (int i = 0; i < vnLatency.size(); i++) {
if (vnLatency[i] != 0) {
vnFPS.push_back(vnFrame[i] / vnLatency[i]);
}
}
// Record the total time
return elapsedTime;
}
int main(int argc, char **argv) {
char szInFilePath[256] = "";
int iGpu = 0;
int nThread = 5;
int nTotalVideo = 100;
bool bSingle = false;
bool bHost = false;
std::string inputFilesListPath = "";
std::string outputFilePath = "";
std::vector<std::exception_ptr> vExceptionPtrs(nTotalVideo);
cudaVideoCodec codec = cudaVideoCodec_H264;
try {
// Parse the command line arguments
ParseCommandLine(argc, argv, szInFilePath, iGpu, nThread, nTotalVideo, bSingle, bHost, inputFilesListPath,
outputFilePath, codec);
auto files = GenerateTotalFileList(inputFilesListPath, nTotalVideo, szInFilePath);
// Initialize and prepare the decoder context for each thread
std::vector<OptimizedNvDecoder *> vDec;
InitializeContext(vDec, iGpu, nThread, bSingle, bHost, codec);
// Decode all video with thread pool
std::vector<int> vnFrame(nTotalVideo);
int nTotalFrames = 0;
std::vector<double> vnLatency;
std::vector<double> frLatency;
std::vector<double> videoFPS;
auto elapsedTime =
run(vDec, nThread, files, vnFrame, vExceptionPtrs, &nTotalFrames, vnLatency, frLatency, videoFPS);
// Calculate and output the raw data into file and metrics into stdout
double sum, mean, min, max, p50, p90, p95, p99;
std::tie(sum, mean, min, max, p50, p90, p95, p99) = CalMetrics(vnLatency);
std::cout << "Total Frames Decoded=" << nTotalFrames << " FPS=" << nTotalFrames / elapsedTime << std::endl;
std::cout << "Mean Latency for each video=" << mean * 1000 << " P50 Latency=" << p50 * 1000
<< " P90 Latency=" << p90 * 1000 << " P95 Latency=" << p95 * 1000 << " P99 Latency=" << p99 * 1000
<< "ms" << std::endl;
std::tie(sum, mean, min, max, p50, p90, p95, p99) = CalMetrics(videoFPS);
std::cout << "Mean FPS for each video=" << mean << " P50 FPS=" << p50 << " P90 FPS=" << p90
<< " P95 FPS=" << p95 << " P99 FPS=" << p99 << std::endl;
std::tie(sum, mean, min, max, p50, p90, p95, p99) = CalMetrics(frLatency);
std::cout << "Mean Latency for each frame=" << mean * 1000 << " P50 Latency=" << p50 * 1000
<< " P90 Latency=" << p90 * 1000 << " P95 Latency=" << p95 * 1000 << " P99 Latency=" << p99 * 1000
<< "ms" << std::endl;
if (outputFilePath.size() != 0) {
WriteRawData(vDec, nThread, vnLatency, vnFrame, outputFilePath);
}
// Deinitialization
for (int i = 0; i < nThread; i++) {
delete (vDec[i]);
}
for (int i = 0; i < nThread; i++) {
if (vExceptionPtrs[i]) {
std::rethrow_exception(vExceptionPtrs[i]);
}
}
} catch (const std::exception &ex) {
std::cout << ex.what();
exit(1);
}
return 0;
}

Просмотреть файл

@ -0,0 +1,117 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
cmake_minimum_required(VERSION 3.18)
project(cuda_decode_performance)
find_package(CUDA QUIET)
if(CUDA_FOUND)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(THIRD_PARTY_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Samples)
set(NVCODEC_PUBLIC_INTERFACE_DIR ${THIRD_PARTY_SAMPLE_DIR}/../Interface)
set(NVCODEC_UTILS_DIR ${THIRD_PARTY_SAMPLE_DIR}/Utils)
set(NV_CODEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec)
set(NV_DEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec/NvDecoder)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
find_package(PkgConfig REQUIRED)
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
find_library(AVCODEC_LIBRARY NAMES avcodec
HINTS
${PC_AVCODEC_LIBDIR}
${PC_AVCODEC_LIBRARY_DIRS}
)
find_library(AVFORMAT_LIBRARY NAMES avformat
HINTS
${PC_AVFORMAT_LIBDIR}
${PC_AVFORMAT_LIBRARY_DIRS}
)
find_library(AVUTIL_LIBRARY NAMES avutil
HINTS
${PC_AVUTIL_LIBDIR}
${PC_AVUTIL_LIBRARY_DIRS}
)
find_library(SWRESAMPLE_LIBRARY NAMES swresample
HINTS
${PC_SWRESAMPLE_LIBDIR}
${PC_SWRESAMPLE_LIBRARY_DIRS}
)
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
endif()
set(APP_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/AppDecPerf.cpp
)
set(NV_DEC_SOURCES
${NV_DEC_DIR}/NvDecoder.cpp
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.cpp
)
set(NV_DEC_HDRS
${NV_DEC_DIR}/NvDecoder.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/cuviddec.h
${NVCODEC_PUBLIC_INTERFACE_DIR}/nvcuvid.h
${NVCODEC_UTILS_DIR}/NvCodecUtils.h
${NVCODEC_UTILS_DIR}/FFmpegDemuxer.h
${CMAKE_CURRENT_SOURCE_DIR}/ThreadPoolUtils.h
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.h
)
source_group( "headers" FILES ${NV_DEC_HDRS} )
source_group( "sources" FILES ${APP_SOURCES} ${NV_DEC_SOURCES})
set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}")
find_package(CUDA)
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=\"sm_50,compute_50\")
if ( CMAKE_COMPILER_IS_GNUCC )
if(NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+11" )
list(APPEND CUDA_NVCC_FLAGS -std=c++11)
endif()
endif()
# Check if the file exists
if (NOT EXISTS "/usr/local/lib/libnvcuvid.so" )
execute_process(
COMMAND sudo ln -s /usr/lib/x86_64-linux-gnu/libnvcuvid.so.1 /usr/local/lib/libnvcuvid.so
RESULT_VARIABLE result
)
if(result)
message(FATAL_ERROR "Failed to create symbolic link for nvcuvid lib: ${result}")
endif()
endif ()
find_library(CUVID_LIB nvcuvid
HINTS
"/usr/local/lib/"
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/"
)
cuda_add_executable(${PROJECT_NAME} ${APP_SOURCES} ${NV_DEC_SOURCES} ${NV_DEC_HDRS})
set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS}
${NVCODEC_PUBLIC_INTERFACE_DIR}
${NVCODEC_UTILS_DIR}
${NV_CODEC_DIR}
${NV_APPDEC_COMMON_DIR}
${NV_FFMPEG_HDRS}
${THIRD_PARTY_SAMPLE_DIR}
)
target_link_libraries(${PROJECT_NAME} ${CUDA_CUDA_LIBRARY} ${CMAKE_DL_LIBS} ${CUVID_LIB} ${AVCODEC_LIB}
${AVFORMAT_LIB} ${AVUTIL_LIB} ${SWRESAMPLE_LIB})
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()

Просмотреть файл

@ -0,0 +1,263 @@
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#include <cmath>
#include "OptimizedNvDecoder.h"
int OptimizedNvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags, int64_t nTimestamp) {
m_nDecodedFrame = 0;
m_nDecodedFrameReturned = 0;
CUVIDSOURCEDATAPACKET packet = {0};
packet.payload = pData;
packet.payload_size = nSize;
packet.flags = nFlags | CUVID_PKT_TIMESTAMP;
packet.timestamp = nTimestamp;
if (!pData || nSize == 0) {
packet.flags |= CUVID_PKT_ENDOFSTREAM;
}
auto start = std::chrono::high_resolution_clock::now();
NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
int64_t elapsedTime =
std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start)
.count();
frameLatency.push_back(std::make_tuple(m_nDecodedFrame, elapsedTime / 1000.0f / 1000.0f));
return m_nDecodedFrame;
}
OptimizedNvDecoder::OptimizedNvDecoder(CUcontext &cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec,
CUVIDDECODECAPS decodecaps, bool bLowLatency, bool bDeviceFramePitched,
const Rect *pCropRect, const Dim *pResizeDim, bool extract_user_SEI_Message,
int maxWidth, int maxHeight, unsigned int clkRate, bool force_zero_latency) {
m_cuContext = cuContext;
m_bUseDeviceFrame = bUseDeviceFrame;
m_eCodec = eCodec;
m_bDeviceFramePitched = bDeviceFramePitched;
m_bExtractSEIMessage = extract_user_SEI_Message;
m_nMaxWidth = maxWidth;
m_nMaxHeight = maxHeight;
m_bForce_zero_latency = force_zero_latency;
if (pCropRect)
m_cropRect = *pCropRect;
if (pResizeDim)
m_resizeDim = *pResizeDim;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
ck(cuStreamCreate(&m_cuvidStream, CU_STREAM_DEFAULT));
decoderSessionID = 0;
if (m_bExtractSEIMessage) {
m_fpSEI = fopen("sei_message.txt", "wb");
m_pCurrSEIMessage = new CUVIDSEIMESSAGEINFO;
memset(&m_SEIMessagesDisplayOrder, 0, sizeof(m_SEIMessagesDisplayOrder));
}
CUVIDPARSERPARAMS videoParserParameters = {};
videoParserParameters.CodecType = eCodec;
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
videoParserParameters.ulClockRate = clkRate;
videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
videoParserParameters.pUserData = this;
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
videoParserParameters.pfnDisplayPicture = m_bForce_zero_latency ? NULL : HandlePictureDisplayProc;
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
videoParserParameters.pfnGetSEIMsg = m_bExtractSEIMessage ? HandleSEIMessagesProc : NULL;
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
// reuse the decodecaps queried before
m_decodecaps = decodecaps;
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
}
int OptimizedNvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
START_TIMER
m_videoInfo.str("");
m_videoInfo.clear();
m_videoInfo << "Video Input Information" << std::endl
<< "\tCodec : " << GetVideoCodecString(pVideoFormat->codec) << std::endl
<< "\tFrame rate : " << pVideoFormat->frame_rate.numerator << "/"
<< pVideoFormat->frame_rate.denominator << " = "
<< 1.0 * pVideoFormat->frame_rate.numerator / pVideoFormat->frame_rate.denominator << " fps"
<< std::endl
<< "\tSequence : " << (pVideoFormat->progressive_sequence ? "Progressive" : "Interlaced")
<< std::endl
<< "\tCoded size : [" << pVideoFormat->coded_width << ", " << pVideoFormat->coded_height << "]"
<< std::endl
<< "\tDisplay area : [" << pVideoFormat->display_area.left << ", " << pVideoFormat->display_area.top
<< ", " << pVideoFormat->display_area.right << ", " << pVideoFormat->display_area.bottom << "]"
<< std::endl
<< "\tChroma : " << GetVideoChromaFormatString(pVideoFormat->chroma_format) << std::endl
<< "\tBit depth : " << pVideoFormat->bit_depth_luma_minus8 + 8;
m_videoInfo << std::endl;
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
// re-call the cuvidGetDecoderCaps when the video codeoc and format change
if (m_decodecaps.eCodecType != pVideoFormat->codec || m_decodecaps.eChromaFormat != pVideoFormat->chroma_format ||
m_decodecaps.nBitDepthMinus8 != pVideoFormat->bit_depth_luma_minus8) {
m_decodecaps.eCodecType = pVideoFormat->codec;
m_decodecaps.eChromaFormat = pVideoFormat->chroma_format;
m_decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps(&m_decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
}
if (!m_decodecaps.bIsSupported) {
NVDEC_THROW_ERROR("Codec not supported on this GPU", CUDA_ERROR_NOT_SUPPORTED);
return nDecodeSurface;
}
if ((pVideoFormat->coded_width > m_decodecaps.nMaxWidth) ||
(pVideoFormat->coded_height > m_decodecaps.nMaxHeight)) {
std::ostringstream errorString;
errorString << std::endl
<< "Resolution : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height
<< std::endl
<< "Max Supported (wxh) : " << m_decodecaps.nMaxWidth << "x" << m_decodecaps.nMaxHeight << std::endl
<< "Resolution not supported on this GPU";
const std::string cErr = errorString.str();
NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
return nDecodeSurface;
}
if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) > m_decodecaps.nMaxMBCount) {
std::ostringstream errorString;
errorString << std::endl
<< "MBCount : " << (pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4)
<< std::endl
<< "Max Supported mbcnt : " << m_decodecaps.nMaxMBCount << std::endl
<< "MBCount not supported on this GPU";
NVDEC_THROW_ERROR(errorString.str(), CUDA_ERROR_NOT_SUPPORTED);
return nDecodeSurface;
}
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
// cuvidCreateDecoder() has been called before, and now there's possible config change
return ReconfigureDecoder(pVideoFormat);
}
// eCodec has been set in the constructor (for parser). Here it's set again for potential correction
m_eCodec = pVideoFormat->codec;
m_eChromaFormat = pVideoFormat->chroma_format;
m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
// Set the output surface format same as chroma format
if (m_eChromaFormat == cudaVideoChromaFormat_420 || cudaVideoChromaFormat_Monochrome)
m_eOutputFormat =
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
else if (m_eChromaFormat == cudaVideoChromaFormat_444)
m_eOutputFormat =
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
else if (m_eChromaFormat == cudaVideoChromaFormat_422)
m_eOutputFormat = cudaVideoSurfaceFormat_NV12; // no 4:2:2 output format supported yet so make 420 default
// Check if output format supported. If not, check falback options
if (!(m_decodecaps.nOutputFormatMask & (1 << m_eOutputFormat))) {
if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
else if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
m_eOutputFormat = cudaVideoSurfaceFormat_P016;
else if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444))
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
else if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
else
NVDEC_THROW_ERROR("No supported output format found", CUDA_ERROR_NOT_SUPPORTED);
}
m_videoFormat = *pVideoFormat;
CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
if (pVideoFormat->progressive_sequence)
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
else
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
// With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware
videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
videoDecodeCreateInfo.vidLock = m_ctxLock;
videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
// AV1 has max width/height of sequence in sequence header
if (pVideoFormat->codec == cudaVideoCodec_AV1 && pVideoFormat->seqhdr_data_length > 0) {
CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat;
if (m_nMaxWidth < pVideoFormat->coded_width) {
m_nMaxWidth = vidFormatEx->av1.max_width;
}
if (m_nMaxHeight < pVideoFormat->coded_height) {
m_nMaxHeight = vidFormatEx->av1.max_height;
}
}
if (m_nMaxWidth < (int)pVideoFormat->coded_width)
m_nMaxWidth = pVideoFormat->coded_width;
if (m_nMaxHeight < (int)pVideoFormat->coded_height)
m_nMaxHeight = pVideoFormat->coded_height;
videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth;
videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight;
if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
} else {
if (m_resizeDim.w && m_resizeDim.h) {
videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left;
videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top;
videoDecodeCreateInfo.display_area.right = pVideoFormat->display_area.right;
videoDecodeCreateInfo.display_area.bottom = pVideoFormat->display_area.bottom;
m_nWidth = m_resizeDim.w;
m_nLumaHeight = m_resizeDim.h;
}
if (m_cropRect.r && m_cropRect.b) {
videoDecodeCreateInfo.display_area.left = m_cropRect.l;
videoDecodeCreateInfo.display_area.top = m_cropRect.t;
videoDecodeCreateInfo.display_area.right = m_cropRect.r;
videoDecodeCreateInfo.display_area.bottom = m_cropRect.b;
m_nWidth = m_cropRect.r - m_cropRect.l;
m_nLumaHeight = m_cropRect.b - m_cropRect.t;
}
videoDecodeCreateInfo.ulTargetWidth = m_nWidth;
videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight;
}
m_nChromaHeight = (int)(ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat)));
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
m_displayRect.t = videoDecodeCreateInfo.display_area.top;
m_displayRect.l = videoDecodeCreateInfo.display_area.left;
m_displayRect.r = videoDecodeCreateInfo.display_area.right;
m_videoInfo << "Video Decoding Params:" << std::endl
<< "\tNum Surfaces : " << videoDecodeCreateInfo.ulNumDecodeSurfaces << std::endl
<< "\tCrop : [" << videoDecodeCreateInfo.display_area.left << ", "
<< videoDecodeCreateInfo.display_area.top << ", " << videoDecodeCreateInfo.display_area.right << ", "
<< videoDecodeCreateInfo.display_area.bottom << "]" << std::endl
<< "\tResize : " << videoDecodeCreateInfo.ulTargetWidth << "x"
<< videoDecodeCreateInfo.ulTargetHeight << std::endl
<< "\tDeinterlace : "
<< std::vector<const char *>{"Weave", "Bob", "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
m_videoInfo << std::endl;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
STOP_TIMER("Session Initialization Time: ");
NvDecoder::addDecoderSessionOverHead(getDecoderSessionID(), elapsedTime);
return nDecodeSurface;
}

Просмотреть файл

@ -0,0 +1,52 @@
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#include "NvDecoder/NvDecoder.h"
// This class is derived from NvDecoder class and is used to optimize the cuvidGetDecoderCaps overhead
class OptimizedNvDecoder : public NvDecoder {
public:
OptimizedNvDecoder() {}
/**
* @brief This function is used to initialize the decoder session.
* Application must call this function to initialize the decoder, before
* starting to decode any frames.
* The only difference from the original function is to add a new member m_decodecaps.
* Other part is the same as the original function, refer to NvDecoder.cpp in NVIDIA Video Codec SDK.
*/
OptimizedNvDecoder(CUcontext &cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, CUVIDDECODECAPS decodecaps,
bool bLowLatency = false, bool bDeviceFramePitched = false, const Rect *pCropRect = NULL,
const Dim *pResizeDim = NULL, bool extract_user_SEI_Message = false, int maxWidth = 0,
int maxHeight = 0, unsigned int clkRate = 1000, bool force_zero_latency = false);
/**
* @brief This function is to overwrite the origin Decode function to record the latency on frame level.
*/
int Decode(const uint8_t *pData, int nSize, int nFlags = 0, int64_t nTimestamp = 0);
/**
* @brief This function is used to Get the frameLatency vector
*/
std::vector<std::tuple<int, double>> &GetFrameLatency() { return frameLatency; }
protected:
/**
* @brief Callback function to be registered for getting a callback when decoding of sequence starts
*/
static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) {
if (pUserData == nullptr) {
throw std::runtime_error("pUserData is nullptr");
}
return ((OptimizedNvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
}
/**
* @brief Define the new handler when decoding of sequence starts.
* The only change is to re-query decoder caps when the video codec or format change
* Other part is the same as the original function, refer to NvDecoder.cpp in NVIDIA Video Codec SDK.
*/
int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
CUVIDDECODECAPS m_decodecaps;
std::vector<std::tuple<int, double>> frameLatency;
};

Просмотреть файл

@ -0,0 +1,99 @@
// Copyright(c) Microsoft Corporation.
// Licensed under the MIT License.
#include <condition_variable>
#include <functional>
#include <future>
#include <mutex>
#include <queue>
#include <thread>
#include <vector>
// ThreadPool is a simple thread pool implementation that supports enqueueing the task with the index of thread to use
// and custom arguments like task(thread_index, *args).
class ThreadPool {
public:
/**
* @brief Construct a new ThreadPool object with the given number of threads.
*/
ThreadPool(size_t numThreads) {
for (size_t i = 0; i < numThreads; ++i) {
threads.emplace_back(&ThreadPool::worker, this, i);
}
}
/**
* @brief Destroy the ThreadPool object and join all threads.
*/
~ThreadPool() {
{
std::unique_lock<std::mutex> lock(mutex);
stop = true;
}
cv.notify_all();
for (auto &thread : threads) {
thread.join();
}
}
/**
* @brief TaskWrapper is a wrapper of the task with the index of thread to use and custom arguments like
* task(thread_index, *args).
*/
template <typename R, typename F, typename... Args> struct TaskWrapper {
std::shared_ptr<std::packaged_task<R(size_t)>> task;
template <typename Callable, typename... CallableArgs> TaskWrapper(Callable &&f, CallableArgs &&...args) {
task = std::make_shared<std::packaged_task<R(size_t)>>(
[f, args...](size_t threadIdx) mutable { return f(threadIdx, args...); });
}
void operator()(size_t threadIdx) { (*task)(threadIdx); }
};
/**
* @brief Enqueue enqueues the task with custom arguments and return the results of task when finished.
*/
template <typename F, typename... Args>
auto enqueue(F &&f, Args &&...args) -> std::future<typename std::result_of<F(size_t, Args...)>::type> {
using ReturnType = typename std::result_of<F(size_t, Args...)>::type;
TaskWrapper<ReturnType, F, Args...> wrapper(std::forward<F>(f), std::forward<Args>(args)...);
std::future<ReturnType> res = wrapper.task->get_future();
{
std::unique_lock<std::mutex> lock(mutex);
tasks.emplace(std::move(wrapper));
}
cv.notify_one();
return res;
}
private:
/**
* @brief The worker function that dequeues the task and executes it for each thread index.
*/
void worker(size_t threadIdx) {
while (true) {
std::function<void(size_t)> task;
{
std::unique_lock<std::mutex> lock(mutex);
cv.wait(lock, [this] { return stop || !tasks.empty(); });
if (stop && tasks.empty()) {
return;
}
task = tasks.front();
tasks.pop();
}
task(threadIdx);
}
}
std::vector<std::thread> threads;
std::queue<std::function<void(size_t)>> tasks;
std::mutex mutex;
std::condition_variable cv;
bool stop = false;
};

1173
third_party/Video_Codec_SDK/Interface/cuviddec.h поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

486
third_party/Video_Codec_SDK/Interface/nvcuvid.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,486 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2023 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/********************************************************************************************************************/
//! \file nvcuvid.h
//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
//! \date 2015-2022
//! This file contains the interface constants, structure definitions and function prototypes.
/********************************************************************************************************************/
#if !defined(__NVCUVID_H__)
#define __NVCUVID_H__
#include "cuviddec.h"
#if defined(__cplusplus)
extern "C" {
#endif /* __cplusplus */
#define MAX_CLOCK_TS 3
/***********************************************/
//!
//! High-level helper APIs for video sources
//!
/***********************************************/
typedef void *CUvideosource;
typedef void *CUvideoparser;
typedef long long CUvideotimestamp;
/************************************************************************/
//! \enum cudaVideoState
//! Video source state enums
//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
/************************************************************************/
typedef enum {
cudaVideoState_Error = -1, /**< Error state (invalid source) */
cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */
cudaVideoState_Started = 1 /**< Source is running and delivering data */
} cudaVideoState;
/************************************************************************/
//! \enum cudaAudioCodec
//! Audio compression enums
//! Used in CUAUDIOFORMAT structure
/************************************************************************/
typedef enum {
cudaAudioCodec_MPEG1 = 0, /**< MPEG-1 Audio */
cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */
cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */
cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */
cudaAudioCodec_LPCM, /**< PCM Audio */
cudaAudioCodec_AAC, /**< AAC Audio */
} cudaAudioCodec;
/************************************************************************/
//! \ingroup STRUCTS
//! \struct HEVCTIMECODESET
//! Used to store Time code extracted from Time code SEI in HEVC codec
/************************************************************************/
typedef struct _HEVCTIMECODESET {
unsigned int time_offset_value;
unsigned short n_frames;
unsigned char clock_timestamp_flag;
unsigned char units_field_based_flag;
unsigned char counting_type;
unsigned char full_timestamp_flag;
unsigned char discontinuity_flag;
unsigned char cnt_dropped_flag;
unsigned char seconds_value;
unsigned char minutes_value;
unsigned char hours_value;
unsigned char seconds_flag;
unsigned char minutes_flag;
unsigned char hours_flag;
unsigned char time_offset_length;
unsigned char reserved;
} HEVCTIMECODESET;
/************************************************************************/
//! \ingroup STRUCTS
//! \struct HEVCSEITIMECODE
//! Used to extract Time code SEI in HEVC codec
/************************************************************************/
typedef struct _HEVCSEITIMECODE {
HEVCTIMECODESET time_code_set[MAX_CLOCK_TS];
unsigned char num_clock_ts;
} HEVCSEITIMECODE;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUSEIMESSAGE;
//! Used in CUVIDSEIMESSAGEINFO structure
/**********************************************************************************/
typedef struct _CUSEIMESSAGE {
unsigned char sei_message_type; /**< OUT: SEI Message Type */
unsigned char reserved[3];
unsigned int sei_message_size; /**< OUT: SEI Message Size */
} CUSEIMESSAGE;
/************************************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDEOFORMAT
//! Video format
//! Used in cuvidGetSourceVideoFormat API
/************************************************************************************************/
typedef struct {
cudaVideoCodec codec; /**< OUT: Compression format */
/**
* OUT: frame rate = numerator / denominator (for example: 30000/1001)
*/
struct {
/**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */
unsigned int numerator;
/**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
unsigned int denominator;
} frame_rate;
unsigned char progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */
unsigned char bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
unsigned char bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
unsigned char min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for correct
decoding. The client can send this value in ulNumDecodeSurfaces
(in CUVIDDECODECREATEINFO structure).
This guarantees correct functionality and optimal video memory
usage but not necessarily the best performance, which depends on
the design of the overall application. The optimal number of
decode surfaces (in terms of performance and memory utilization)
should be decided by experimentation for each application, but it
cannot go below min_num_decode_surfaces.
If this value is used for ulNumDecodeSurfaces then it must be
returned to parser during sequence callback. */
unsigned int coded_width; /**< OUT: coded frame width in pixels */
unsigned int coded_height; /**< OUT: coded frame height in pixels */
/**
* area of the frame that should be displayed
* typical example:
* coded_width = 1920, coded_height = 1088
* display_area = { 0,0,1920,1080 }
*/
struct {
int left; /**< OUT: left position of display rect */
int top; /**< OUT: top position of display rect */
int right; /**< OUT: right position of display rect */
int bottom; /**< OUT: bottom position of display rect */
} display_area;
cudaVideoChromaFormat chroma_format; /**< OUT: Chroma format */
unsigned int bitrate; /**< OUT: video bitrate (bps, 0=unknown) */
/**
* OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
*/
struct {
int x;
int y;
} display_aspect_ratio;
/**
* Video Signal Description
* Refer section E.2.1 (VUI parameters semantics) of H264 spec file
*/
struct {
unsigned char video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified */
unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range */
unsigned char reserved_zero_bits : 4; /**< Reserved bits */
unsigned char color_primaries; /**< OUT: chromaticity coordinates of source primaries */
unsigned char
transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */
unsigned char matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */
} video_signal_description;
unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */
} CUVIDEOFORMAT;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDOPERATINGPOINTINFO
//! Operating point information of scalable bitstream
/****************************************************************/
typedef struct {
cudaVideoCodec codec;
union {
struct {
unsigned char operating_points_cnt;
unsigned char reserved24_bits[3];
unsigned short operating_points_idc[32];
} av1;
unsigned char CodecReserved[1024];
};
} CUVIDOPERATINGPOINTINFO;
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDSEIMESSAGEINFO
//! Used in cuvidParseVideoData API with PFNVIDSEIMSGCALLBACK pfnGetSEIMsg
/**********************************************************************************/
typedef struct _CUVIDSEIMESSAGEINFO {
void *pSEIData; /**< OUT: SEI Message Data */
CUSEIMESSAGE *pSEIMessage; /**< OUT: SEI Message Info */
unsigned int sei_message_count; /**< OUT: SEI Message Count */
unsigned int picIdx; /**< OUT: SEI Message Pic Index */
} CUVIDSEIMESSAGEINFO;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDAV1SEQHDR
//! AV1 specific sequence header information
/****************************************************************/
typedef struct {
unsigned int max_width;
unsigned int max_height;
unsigned char reserved[1016];
} CUVIDAV1SEQHDR;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDEOFORMATEX
//! Video format including raw sequence header information
//! Used in cuvidGetSourceVideoFormat API
/****************************************************************/
typedef struct {
CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */
union {
CUVIDAV1SEQHDR av1;
unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */
};
} CUVIDEOFORMATEX;
/****************************************************************/
//! \ingroup STRUCTS
//! \struct CUAUDIOFORMAT
//! Audio formats
//! Used in cuvidGetSourceAudioFormat API
/****************************************************************/
typedef struct {
cudaAudioCodec codec; /**< OUT: Compression format */
unsigned int channels; /**< OUT: number of audio channels */
unsigned int samplespersec; /**< OUT: sampling frequency */
unsigned int bitrate; /**< OUT: For uncompressed, can also be used to determine bits per sample */
unsigned int reserved1; /**< Reserved for future use */
unsigned int reserved2; /**< Reserved for future use */
} CUAUDIOFORMAT;
/***************************************************************/
//! \enum CUvideopacketflags
//! Data packet flags
//! Used in CUVIDSOURCEDATAPACKET structure
/***************************************************************/
typedef enum {
CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */
CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */
CUVID_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */
CUVID_PKT_ENDOFPICTURE = 0x08, /**< Set when the packet contains exactly one frame or one field */
CUVID_PKT_NOTIFY_EOS = 0x10, /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy)
display callback will be invoked with null value of CUVIDPARSERDISPINFO which
should be interpreted as end of the stream. */
} CUvideopacketflags;
/*****************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDSOURCEDATAPACKET
//! Data Packet
//! Used in cuvidParseVideoData API
//! IN for cuvidParseVideoData
/*****************************************************************************/
typedef struct _CUVIDSOURCEDATAPACKET {
unsigned long flags; /**< IN: Combination of CUVID_PKT_XXX flags */
unsigned long payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
const unsigned char *payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
CUvideotimestamp timestamp; /**< IN: Presentation time stamp (10MHz clock), only valid if
CUVID_PKT_TIMESTAMP flag is set */
} CUVIDSOURCEDATAPACKET;
// Callback for packet delivery
typedef int(CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
/**************************************************************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDSOURCEPARAMS
//! Describes parameters needed in cuvidCreateVideoSource API
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all
//! supported containers. It's recommended to clients to use their own or third party demuxer if audio support is
//! needed.
/**************************************************************************************************************************/
typedef struct _CUVIDSOURCEPARAMS {
unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
unsigned int uReserved1[6]; /**< Reserved for future use - set to zero */
void *pUserData; /**< IN: User private data passed in to the data handlers */
PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */
PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */
void *pvReserved2[8]; /**< Reserved for future use - set to NULL */
} CUVIDSOURCEPARAMS;
/**********************************************/
//! \ingroup ENUMS
//! \enum CUvideosourceformat_flags
//! CUvideosourceformat_flags
//! Used in cuvidGetSourceVideoFormat API
/**********************************************/
typedef enum {
CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */
} CUvideosourceformat_flags;
#if !defined(__APPLE__)
/***************************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS
//! *pParams) Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks:
//! pfnVideoDataHandler() and pfnAudioDataHandler()
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all
//! supported containers. It's recommended to clients to use their own or third party demuxer if audio support is
//! needed.
/***************************************************************************************************************************/
CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
/***************************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS
//! *pParams) Create video source
/***************************************************************************************************************************/
CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
/********************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
//! Destroy video source
/********************************************************************/
CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
/******************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
//! Set video source state to:
//! cudaVideoState_Started - to signal the source to run and deliver data
//! cudaVideoState_Stopped - to stop the source from delivering the data
//! cudaVideoState_Error - invalid source
/******************************************************************************************/
CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
/******************************************************************************************/
//! \ingroup FUNCTS
//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
//! Get video source state
//! Returns:
//! cudaVideoState_Started - if Source is running and delivering data
//! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream
//! cudaVideoState_Error - if Source is in error state
/******************************************************************************************/
cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
/******************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
//! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement
/******************************************************************************************************************/
CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
/**************************************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
//! Get audio source format
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all
//! supported containers. It's recommended to clients to use their own or third party demuxer if audio support is
//! needed.
/**************************************************************************************************************************/
CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
#endif
/**********************************************************************************/
//! \ingroup STRUCTS
//! \struct CUVIDPARSERDISPINFO
//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
/**********************************************************************************/
typedef struct _CUVIDPARSERDISPINFO {
int picture_index; /**< OUT: Index of the current picture */
int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */
int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */
int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling,
-1=unpaired field) */
CUvideotimestamp timestamp; /**< OUT: Presentation time stamp */
} CUVIDPARSERDISPINFO;
/***********************************************************************************************************************/
//! Parser callbacks
//! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a
//! picture is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of
//! structure CUVIDSOURCEPARAMS Return values from these callbacks are interpreted as below. If the callbacks return
//! failure, it will be propagated by cuvidParseVideoData() to the application. Parser picks default operating point as
//! 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is -1 or invalid operating
//! point. PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by
//! CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser) PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded
//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded
//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30:
//! reserved) PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded
/***********************************************************************************************************************/
typedef int(CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
typedef int(CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
typedef int(CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
typedef int(CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO *);
typedef int(CUDAAPI *PFNVIDSEIMSGCALLBACK)(void *, CUVIDSEIMESSAGEINFO *);
/**************************************/
//! \ingroup STRUCTS
//! \struct CUVIDPARSERPARAMS
//! Used in cuvidCreateVideoParser API
/**************************************/
typedef struct _CUVIDPARSERPARAMS {
cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */
unsigned int ulMaxNumDecodeSurfaces; /**< IN: Max # of decode surfaces (parser will cycle through these) */
unsigned int ulClockRate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */
unsigned int ulErrorThreshold; /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always
IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display)
0=no delay (recommended values: 2..4) */
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
unsigned int uReserved1[4]; /**< IN: Reserved for future use - set to 0 */
void *pUserData; /**< IN: User data for callbacks */
PFNVIDSEQUENCECALLBACK
pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */
PFNVIDDISPLAYCALLBACK
pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */
PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1
scalable bitstream */
PFNVIDSEIMSGCALLBACK pfnGetSEIMsg; /**< IN: Called when all SEI messages are parsed for particular frame */
void *pvReserved2[5]; /**< Reserved for future use - set to NULL */
CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */
} CUVIDPARSERPARAMS;
/************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
//! Create video parser object and initialize
/************************************************************************************************/
CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
/************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
//! Parse the video data from source data packet in pPacket
//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and
//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
//! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when
//! the decoder encounters a video format change
//! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame
/************************************************************************************************/
CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
/************************************************************************************************/
//! \ingroup FUNCTS
//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
//! Destroy the video parser
/************************************************************************************************/
CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
/**********************************************************************************************/
#if defined(__cplusplus)
}
#endif /* __cplusplus */
#endif // __NVCUVID_H__

Двоичные данные
third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/libnvcuvid.so поставляемый Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -0,0 +1,709 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2023 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <algorithm>
#include <chrono>
#include <cmath>
#include <iostream>
#include "../../../Interface/nvcuvid.h"
#include "NvDecoder/NvDecoder.h"
std::map<int, int64_t> NvDecoder::sessionOverHead = {{0, 0}, {1, 0}};
/**
* @brief This function is used to get codec string from codec id
*/
const char *NvDecoder::GetCodecString(cudaVideoCodec eCodec) { return GetVideoCodecString(eCodec); }
/* Called when the parser encounters sequence header for AV1 SVC content
* return value interpretation:
* < 0 : fail, >=0: succeeded (bit 0-9: currOperatingPoint, bit 10-10: bDispAllLayer, bit 11-30: reserved, must be
* set 0)
*/
int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo) {
if (pOPInfo->codec == cudaVideoCodec_AV1) {
if (pOPInfo->av1.operating_points_cnt > 1) {
// clip has SVC enabled
if (m_nOperatingPoint >= pOPInfo->av1.operating_points_cnt)
m_nOperatingPoint = 0;
printf("AV1 SVC clip: operating point count %d ", pOPInfo->av1.operating_points_cnt);
printf("Selected operating point: %d, IDC 0x%x bOutputAllLayers %d\n", m_nOperatingPoint,
pOPInfo->av1.operating_points_idc[m_nOperatingPoint], m_bDispAllLayers);
return (m_nOperatingPoint | (m_bDispAllLayers << 10));
}
}
return -1;
}
/* Return value from HandleVideoSequence() are interpreted as :
* 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while
* creating parser)
*/
int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
START_TIMER
m_videoInfo.str("");
m_videoInfo.clear();
m_videoInfo << "Video Input Information" << std::endl
<< "\tCodec : " << GetVideoCodecString(pVideoFormat->codec) << std::endl
<< "\tFrame rate : " << pVideoFormat->frame_rate.numerator << "/"
<< pVideoFormat->frame_rate.denominator << " = "
<< 1.0 * pVideoFormat->frame_rate.numerator / pVideoFormat->frame_rate.denominator << " fps"
<< std::endl
<< "\tSequence : " << (pVideoFormat->progressive_sequence ? "Progressive" : "Interlaced")
<< std::endl
<< "\tCoded size : [" << pVideoFormat->coded_width << ", " << pVideoFormat->coded_height << "]"
<< std::endl
<< "\tDisplay area : [" << pVideoFormat->display_area.left << ", " << pVideoFormat->display_area.top
<< ", " << pVideoFormat->display_area.right << ", " << pVideoFormat->display_area.bottom << "]"
<< std::endl
<< "\tChroma : " << GetVideoChromaFormatString(pVideoFormat->chroma_format) << std::endl
<< "\tBit depth : " << pVideoFormat->bit_depth_luma_minus8 + 8;
m_videoInfo << std::endl;
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
CUVIDDECODECAPS decodecaps;
memset(&decodecaps, 0, sizeof(decodecaps));
decodecaps.eCodecType = pVideoFormat->codec;
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
if (!decodecaps.bIsSupported) {
NVDEC_THROW_ERROR("Codec not supported on this GPU", CUDA_ERROR_NOT_SUPPORTED);
return nDecodeSurface;
}
if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) || (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
std::ostringstream errorString;
errorString << std::endl
<< "Resolution : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height
<< std::endl
<< "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x" << decodecaps.nMaxHeight << std::endl
<< "Resolution not supported on this GPU";
const std::string cErr = errorString.str();
NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
return nDecodeSurface;
}
if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) > decodecaps.nMaxMBCount) {
std::ostringstream errorString;
errorString << std::endl
<< "MBCount : " << (pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4)
<< std::endl
<< "Max Supported mbcnt : " << decodecaps.nMaxMBCount << std::endl
<< "MBCount not supported on this GPU";
const std::string cErr = errorString.str();
NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
return nDecodeSurface;
}
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
// cuvidCreateDecoder() has been called before, and now there's possible config change
return ReconfigureDecoder(pVideoFormat);
}
// eCodec has been set in the constructor (for parser). Here it's set again for potential correction
m_eCodec = pVideoFormat->codec;
m_eChromaFormat = pVideoFormat->chroma_format;
m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
// Set the output surface format same as chroma format
if (m_eChromaFormat == cudaVideoChromaFormat_420 || cudaVideoChromaFormat_Monochrome)
m_eOutputFormat =
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
else if (m_eChromaFormat == cudaVideoChromaFormat_444)
m_eOutputFormat =
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
else if (m_eChromaFormat == cudaVideoChromaFormat_422)
m_eOutputFormat = cudaVideoSurfaceFormat_NV12; // no 4:2:2 output format supported yet so make 420 default
// Check if output format supported. If not, check falback options
if (!(decodecaps.nOutputFormatMask & (1 << m_eOutputFormat))) {
if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
m_eOutputFormat = cudaVideoSurfaceFormat_P016;
else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444))
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
else
NVDEC_THROW_ERROR("No supported output format found", CUDA_ERROR_NOT_SUPPORTED);
}
m_videoFormat = *pVideoFormat;
CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
if (pVideoFormat->progressive_sequence)
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
else
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
// With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware
videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
videoDecodeCreateInfo.vidLock = m_ctxLock;
videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
// AV1 has max width/height of sequence in sequence header
if (pVideoFormat->codec == cudaVideoCodec_AV1 && pVideoFormat->seqhdr_data_length > 0) {
// dont overwrite if it is already set from cmdline or reconfig.txt
if (!(m_nMaxWidth > pVideoFormat->coded_width || m_nMaxHeight > pVideoFormat->coded_height)) {
CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat;
m_nMaxWidth = vidFormatEx->av1.max_width;
m_nMaxHeight = vidFormatEx->av1.max_height;
}
}
if (m_nMaxWidth < (int)pVideoFormat->coded_width)
m_nMaxWidth = pVideoFormat->coded_width;
if (m_nMaxHeight < (int)pVideoFormat->coded_height)
m_nMaxHeight = pVideoFormat->coded_height;
videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth;
videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight;
if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
} else {
if (m_resizeDim.w && m_resizeDim.h) {
videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left;
videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top;
videoDecodeCreateInfo.display_area.right = pVideoFormat->display_area.right;
videoDecodeCreateInfo.display_area.bottom = pVideoFormat->display_area.bottom;
m_nWidth = m_resizeDim.w;
m_nLumaHeight = m_resizeDim.h;
}
if (m_cropRect.r && m_cropRect.b) {
videoDecodeCreateInfo.display_area.left = m_cropRect.l;
videoDecodeCreateInfo.display_area.top = m_cropRect.t;
videoDecodeCreateInfo.display_area.right = m_cropRect.r;
videoDecodeCreateInfo.display_area.bottom = m_cropRect.b;
m_nWidth = m_cropRect.r - m_cropRect.l;
m_nLumaHeight = m_cropRect.b - m_cropRect.t;
}
videoDecodeCreateInfo.ulTargetWidth = m_nWidth;
videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight;
}
m_nChromaHeight = (int)(ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat)));
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
m_displayRect.t = videoDecodeCreateInfo.display_area.top;
m_displayRect.l = videoDecodeCreateInfo.display_area.left;
m_displayRect.r = videoDecodeCreateInfo.display_area.right;
m_videoInfo << "Video Decoding Params:" << std::endl
<< "\tNum Surfaces : " << videoDecodeCreateInfo.ulNumDecodeSurfaces << std::endl
<< "\tCrop : [" << videoDecodeCreateInfo.display_area.left << ", "
<< videoDecodeCreateInfo.display_area.top << ", " << videoDecodeCreateInfo.display_area.right << ", "
<< videoDecodeCreateInfo.display_area.bottom << "]" << std::endl
<< "\tResize : " << videoDecodeCreateInfo.ulTargetWidth << "x"
<< videoDecodeCreateInfo.ulTargetHeight << std::endl
<< "\tDeinterlace : "
<< std::vector<const char *>{"Weave", "Bob", "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
m_videoInfo << std::endl;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
STOP_TIMER("Session Initialization Time: ");
NvDecoder::addDecoderSessionOverHead(getDecoderSessionID(), elapsedTime);
return nDecodeSurface;
}
int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
if (pVideoFormat->bit_depth_luma_minus8 != m_videoFormat.bit_depth_luma_minus8 ||
pVideoFormat->bit_depth_chroma_minus8 != m_videoFormat.bit_depth_chroma_minus8) {
NVDEC_THROW_ERROR("Reconfigure Not supported for bit depth change", CUDA_ERROR_NOT_SUPPORTED);
}
if (pVideoFormat->chroma_format != m_videoFormat.chroma_format) {
NVDEC_THROW_ERROR("Reconfigure Not supported for chroma format change", CUDA_ERROR_NOT_SUPPORTED);
}
bool bDecodeResChange = !(pVideoFormat->coded_width == m_videoFormat.coded_width &&
pVideoFormat->coded_height == m_videoFormat.coded_height);
bool bDisplayRectChange = !(pVideoFormat->display_area.bottom == m_videoFormat.display_area.bottom &&
pVideoFormat->display_area.top == m_videoFormat.display_area.top &&
pVideoFormat->display_area.left == m_videoFormat.display_area.left &&
pVideoFormat->display_area.right == m_videoFormat.display_area.right);
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
if ((pVideoFormat->coded_width > m_nMaxWidth) || (pVideoFormat->coded_height > m_nMaxHeight)) {
// For VP9, let driver handle the change if new width/height > maxwidth/maxheight
if ((m_eCodec != cudaVideoCodec_VP9) || m_bReconfigExternal) {
NVDEC_THROW_ERROR("Reconfigure Not supported when width/height > maxwidth/maxheight",
CUDA_ERROR_NOT_SUPPORTED);
}
return 1;
}
if (!bDecodeResChange && !m_bReconfigExtPPChange) {
// if the coded_width/coded_height hasn't changed but display resolution has changed, then need to update
// width/height for correct output without cropping. Example : 1920x1080 vs 1920x1088
if (bDisplayRectChange) {
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
m_nChromaHeight = (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
}
// no need for reconfigureDecoder(). Just return
return 1;
}
CUVIDRECONFIGUREDECODERINFO reconfigParams = {0};
reconfigParams.ulWidth = m_videoFormat.coded_width = pVideoFormat->coded_width;
reconfigParams.ulHeight = m_videoFormat.coded_height = pVideoFormat->coded_height;
// Dont change display rect and get scaled output from decoder. This will help display app to present apps smoothly
reconfigParams.display_area.bottom = m_displayRect.b;
reconfigParams.display_area.top = m_displayRect.t;
reconfigParams.display_area.left = m_displayRect.l;
reconfigParams.display_area.right = m_displayRect.r;
reconfigParams.ulTargetWidth = m_nSurfaceWidth;
reconfigParams.ulTargetHeight = m_nSurfaceHeight;
// If external reconfigure is called along with resolution change even if post processing params is not changed,
// do full reconfigure params update
if ((m_bReconfigExternal && bDecodeResChange) || m_bReconfigExtPPChange) {
// update display rect and target resolution if requested explicitly
m_bReconfigExternal = false;
m_bReconfigExtPPChange = false;
m_videoFormat = *pVideoFormat;
if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
reconfigParams.ulTargetWidth = pVideoFormat->coded_width;
reconfigParams.ulTargetHeight = pVideoFormat->coded_height;
} else {
if (m_resizeDim.w && m_resizeDim.h) {
reconfigParams.display_area.left = pVideoFormat->display_area.left;
reconfigParams.display_area.top = pVideoFormat->display_area.top;
reconfigParams.display_area.right = pVideoFormat->display_area.right;
reconfigParams.display_area.bottom = pVideoFormat->display_area.bottom;
m_nWidth = m_resizeDim.w;
m_nLumaHeight = m_resizeDim.h;
}
if (m_cropRect.r && m_cropRect.b) {
reconfigParams.display_area.left = m_cropRect.l;
reconfigParams.display_area.top = m_cropRect.t;
reconfigParams.display_area.right = m_cropRect.r;
reconfigParams.display_area.bottom = m_cropRect.b;
m_nWidth = m_cropRect.r - m_cropRect.l;
m_nLumaHeight = m_cropRect.b - m_cropRect.t;
}
reconfigParams.ulTargetWidth = m_nWidth;
reconfigParams.ulTargetHeight = m_nLumaHeight;
}
m_nChromaHeight = (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
m_nSurfaceHeight = reconfigParams.ulTargetHeight;
m_nSurfaceWidth = reconfigParams.ulTargetWidth;
m_displayRect.b = reconfigParams.display_area.bottom;
m_displayRect.t = reconfigParams.display_area.top;
m_displayRect.l = reconfigParams.display_area.left;
m_displayRect.r = reconfigParams.display_area.right;
}
reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
START_TIMER
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
STOP_TIMER("Session Reconfigure Time: ");
return nDecodeSurface;
}
int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
m_bReconfigExternal = true;
m_bReconfigExtPPChange = false;
if (pCropRect) {
if (!((pCropRect->t == m_cropRect.t) && (pCropRect->l == m_cropRect.l) && (pCropRect->b == m_cropRect.b) &&
(pCropRect->r == m_cropRect.r))) {
m_bReconfigExtPPChange = true;
m_cropRect = *pCropRect;
}
}
if (pResizeDim) {
if (!((pResizeDim->w == m_resizeDim.w) && (pResizeDim->h == m_resizeDim.h))) {
m_bReconfigExtPPChange = true;
m_resizeDim = *pResizeDim;
}
}
// Clear existing output buffers of different size
uint8_t *pFrame = NULL;
while (!m_vpFrame.empty()) {
pFrame = m_vpFrame.back();
m_vpFrame.pop_back();
if (m_bUseDeviceFrame) {
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
} else {
delete pFrame;
}
}
return 1;
}
/* Return value from HandlePictureDecode() are interpreted as:
* 0: fail, >=1: succeeded
*/
int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
if (!m_hDecoder) {
NVDEC_THROW_ERROR("Decoder not initialized.", CUDA_ERROR_NOT_INITIALIZED);
return false;
}
m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
if (m_bForce_zero_latency && ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) {
CUVIDPARSERDISPINFO dispInfo;
memset(&dispInfo, 0, sizeof(dispInfo));
dispInfo.picture_index = pPicParams->CurrPicIdx;
dispInfo.progressive_frame = !pPicParams->field_pic_flag;
dispInfo.top_field_first = pPicParams->bottom_field_flag ^ 1;
HandlePictureDisplay(&dispInfo);
}
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
return 1;
}
/* Return value from HandlePictureDisplay() are interpreted as:
* 0: fail, >=1: succeeded
*/
int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
CUVIDPROCPARAMS videoProcessingParameters = {};
videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
videoProcessingParameters.output_stream = m_cuvidStream;
if (m_bExtractSEIMessage) {
if (m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData) {
// Write SEI Message
uint8_t *seiBuffer = (uint8_t *)(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData);
uint32_t seiNumMessages = m_SEIMessagesDisplayOrder[pDispInfo->picture_index].sei_message_count;
CUSEIMESSAGE *seiMessagesInfo = m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage;
if (m_fpSEI) {
for (uint32_t i = 0; i < seiNumMessages; i++) {
if (m_eCodec == cudaVideoCodec_H264 || cudaVideoCodec_H264_SVC || cudaVideoCodec_H264_MVC ||
cudaVideoCodec_HEVC) {
switch (seiMessagesInfo[i].sei_message_type) {
case SEI_TYPE_TIME_CODE: {
HEVCSEITIMECODE *timecode = (HEVCSEITIMECODE *)seiBuffer;
fwrite(timecode, sizeof(HEVCSEITIMECODE), 1, m_fpSEI);
} break;
case SEI_TYPE_USER_DATA_UNREGISTERED: {
fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1, m_fpSEI);
} break;
}
}
if (m_eCodec == cudaVideoCodec_AV1) {
fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1, m_fpSEI);
}
seiBuffer += seiMessagesInfo[i].sei_message_size;
}
}
free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData);
free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage);
}
}
CUdeviceptr dpSrcFrame = 0;
unsigned int nSrcPitch = 0;
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
NVDEC_API_CALL(
cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index, &dpSrcFrame, &nSrcPitch, &videoProcessingParameters));
CUVIDGETDECODESTATUS DecodeStatus;
memset(&DecodeStatus, 0, sizeof(DecodeStatus));
CUresult result = cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
printf("Decode Error occurred for picture %d\n", m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
}
uint8_t *pDecodedFrame = nullptr;
{
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
if ((unsigned)++m_nDecodedFrame > m_vpFrame.size()) {
// Not enough frames in stock
m_nFrameAlloc++;
uint8_t *pFrame = NULL;
if (m_bUseDeviceFrame) {
if (m_bDeviceFramePitched) {
CUDA_DRVAPI_CALL(cuMemAllocPitch((CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
} else {
CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
}
} else {
pFrame = new uint8_t[GetFrameSize()];
}
m_vpFrame.push_back(pFrame);
}
pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1];
}
// Copy luma plane
CUDA_MEMCPY2D m = {0};
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
m.srcDevice = dpSrcFrame;
m.srcPitch = nSrcPitch;
m.dstMemoryType = m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
m.WidthInBytes = GetWidth() * m_nBPP;
m.Height = m_nLumaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
// Copy chroma plane
// NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning height
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1));
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
if (m_nNumChromaPlanes == 2) {
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1) * 2);
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight * 2);
m.Height = m_nChromaHeight;
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
}
CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
m_vTimestamp.resize(m_vpFrame.size());
}
m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
return 1;
}
int NvDecoder::GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
uint32_t seiNumMessages = pSEIMessageInfo->sei_message_count;
CUSEIMESSAGE *seiMessagesInfo = pSEIMessageInfo->pSEIMessage;
size_t totalSEIBufferSize = 0;
if ((pSEIMessageInfo->picIdx < 0) || (pSEIMessageInfo->picIdx >= MAX_FRM_CNT)) {
printf("Invalid picture index (%d)\n", pSEIMessageInfo->picIdx);
return 0;
}
for (uint32_t i = 0; i < seiNumMessages; i++) {
totalSEIBufferSize += seiMessagesInfo[i].sei_message_size;
}
if (!m_pCurrSEIMessage) {
printf("Out of Memory, Allocation failed for m_pCurrSEIMessage\n");
return 0;
}
m_pCurrSEIMessage->pSEIData = malloc(totalSEIBufferSize);
if (!m_pCurrSEIMessage->pSEIData) {
printf("Out of Memory, Allocation failed for SEI Buffer\n");
return 0;
}
memcpy(m_pCurrSEIMessage->pSEIData, pSEIMessageInfo->pSEIData, totalSEIBufferSize);
m_pCurrSEIMessage->pSEIMessage = (CUSEIMESSAGE *)malloc(sizeof(CUSEIMESSAGE) * seiNumMessages);
if (!m_pCurrSEIMessage->pSEIMessage) {
free(m_pCurrSEIMessage->pSEIData);
m_pCurrSEIMessage->pSEIData = NULL;
return 0;
}
memcpy(m_pCurrSEIMessage->pSEIMessage, pSEIMessageInfo->pSEIMessage, sizeof(CUSEIMESSAGE) * seiNumMessages);
m_pCurrSEIMessage->sei_message_count = pSEIMessageInfo->sei_message_count;
m_SEIMessagesDisplayOrder[pSEIMessageInfo->picIdx] = *m_pCurrSEIMessage;
return 1;
}
NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, bool bLowLatency,
bool bDeviceFramePitched, const Rect *pCropRect, const Dim *pResizeDim,
bool extract_user_SEI_Message, int maxWidth, int maxHeight, unsigned int clkRate,
bool force_zero_latency)
: m_cuContext(cuContext), m_bUseDeviceFrame(bUseDeviceFrame), m_eCodec(eCodec),
m_bDeviceFramePitched(bDeviceFramePitched), m_bExtractSEIMessage(extract_user_SEI_Message), m_nMaxWidth(maxWidth),
m_nMaxHeight(maxHeight), m_bForce_zero_latency(force_zero_latency) {
if (pCropRect)
m_cropRect = *pCropRect;
if (pResizeDim)
m_resizeDim = *pResizeDim;
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
ck(cuStreamCreate(&m_cuvidStream, CU_STREAM_DEFAULT));
decoderSessionID = 0;
if (m_bExtractSEIMessage) {
m_fpSEI = fopen("sei_message.txt", "wb");
m_pCurrSEIMessage = new CUVIDSEIMESSAGEINFO;
memset(&m_SEIMessagesDisplayOrder, 0, sizeof(m_SEIMessagesDisplayOrder));
}
CUVIDPARSERPARAMS videoParserParameters = {};
videoParserParameters.CodecType = eCodec;
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
videoParserParameters.ulClockRate = clkRate;
videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
videoParserParameters.pUserData = this;
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
videoParserParameters.pfnDisplayPicture = m_bForce_zero_latency ? NULL : HandlePictureDisplayProc;
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
videoParserParameters.pfnGetSEIMsg = m_bExtractSEIMessage ? HandleSEIMessagesProc : NULL;
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
}
NvDecoder::~NvDecoder() {
START_TIMER
if (m_pCurrSEIMessage) {
delete m_pCurrSEIMessage;
m_pCurrSEIMessage = NULL;
}
if (m_fpSEI) {
fclose(m_fpSEI);
m_fpSEI = NULL;
}
if (m_hParser) {
cuvidDestroyVideoParser(m_hParser);
}
cuCtxPushCurrent(m_cuContext);
if (m_hDecoder) {
cuvidDestroyDecoder(m_hDecoder);
}
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
for (uint8_t *pFrame : m_vpFrame) {
if (m_bUseDeviceFrame) {
cuMemFree((CUdeviceptr)pFrame);
} else {
delete[] pFrame;
}
}
cuCtxPopCurrent(NULL);
cuvidCtxLockDestroy(m_ctxLock);
STOP_TIMER("Session Deinitialization Time: ");
NvDecoder::addDecoderSessionOverHead(getDecoderSessionID(), elapsedTime);
}
int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags, int64_t nTimestamp) {
m_nDecodedFrame = 0;
m_nDecodedFrameReturned = 0;
CUVIDSOURCEDATAPACKET packet = {0};
packet.payload = pData;
packet.payload_size = nSize;
packet.flags = nFlags | CUVID_PKT_TIMESTAMP;
packet.timestamp = nTimestamp;
if (!pData || nSize == 0) {
packet.flags |= CUVID_PKT_ENDOFSTREAM;
}
NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
return m_nDecodedFrame;
}
uint8_t *NvDecoder::GetFrame(int64_t *pTimestamp) {
if (m_nDecodedFrame > 0) {
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
m_nDecodedFrame--;
if (pTimestamp)
*pTimestamp = m_vTimestamp[m_nDecodedFrameReturned];
return m_vpFrame[m_nDecodedFrameReturned++];
}
return NULL;
}
uint8_t *NvDecoder::GetLockedFrame(int64_t *pTimestamp) {
uint8_t *pFrame;
uint64_t timestamp;
if (m_nDecodedFrame > 0) {
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
m_nDecodedFrame--;
pFrame = m_vpFrame[0];
m_vpFrame.erase(m_vpFrame.begin(), m_vpFrame.begin() + 1);
timestamp = m_vTimestamp[0];
m_vTimestamp.erase(m_vTimestamp.begin(), m_vTimestamp.begin() + 1);
if (pTimestamp)
*pTimestamp = timestamp;
return pFrame;
}
return NULL;
}
void NvDecoder::UnlockFrame(uint8_t **pFrame) {
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
m_vpFrame.insert(m_vpFrame.end(), &pFrame[0], &pFrame[1]);
// add a dummy entry for timestamp
uint64_t timestamp[2] = {0};
m_vTimestamp.insert(m_vTimestamp.end(), &timestamp[0], &timestamp[1]);
}

Просмотреть файл

@ -0,0 +1,528 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2023 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include "../../../Interface/nvcuvid.h"
#include "../Utils/NvCodecUtils.h"
#include <assert.h>
#include <iostream>
#include <map>
#include <mutex>
#include <sstream>
#include <stdint.h>
#include <string.h>
#include <string>
#include <vector>
#define MAX_FRM_CNT 32
typedef enum { SEI_TYPE_TIME_CODE = 136, SEI_TYPE_USER_DATA_UNREGISTERED = 5 } SEI_H264_HEVC_PAYLOAD_TYPE;
/**
* @brief Exception class for error reporting from the decode API.
*/
class NVDECException : public std::exception {
public:
NVDECException(const std::string &errorStr, const CUresult errorCode)
: m_errorString(errorStr), m_errorCode(errorCode) {}
virtual ~NVDECException() throw() {}
virtual const char *what() const throw() { return m_errorString.c_str(); }
CUresult getErrorCode() const { return m_errorCode; }
const std::string &getErrorString() const { return m_errorString; }
static NVDECException makeNVDECException(const std::string &errorStr, const CUresult errorCode,
const std::string &functionName, const std::string &fileName, int lineNo);
private:
std::string m_errorString;
CUresult m_errorCode;
};
inline NVDECException NVDECException::makeNVDECException(const std::string &errorStr, const CUresult errorCode,
const std::string &functionName, const std::string &fileName,
int lineNo) {
std::ostringstream errorLog;
errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl;
NVDECException exception(errorLog.str(), errorCode);
return exception;
}
#define NVDEC_THROW_ERROR(errorStr, errorCode) \
do { \
throw NVDECException::makeNVDECException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
} while (0)
#define NVDEC_API_CALL(cuvidAPI) \
do { \
CUresult errorCode = cuvidAPI; \
if (errorCode != CUDA_SUCCESS) { \
std::ostringstream errorLog; \
errorLog << #cuvidAPI << " returned error " << errorCode; \
throw NVDECException::makeNVDECException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
} \
} while (0)
struct Rect {
int l, t, r, b;
};
struct Dim {
int w, h;
};
#define START_TIMER auto start = std::chrono::high_resolution_clock::now();
#define STOP_TIMER(print_message) \
int64_t elapsedTime = \
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start) \
.count(); \
std::cout << print_message << elapsedTime << " ms " << std::endl;
#define CUDA_DRVAPI_CALL(call) \
do { \
CUresult err__ = call; \
if (err__ != CUDA_SUCCESS) { \
const char *szErrName = NULL; \
cuGetErrorName(err__, &szErrName); \
std::ostringstream errorLog; \
errorLog << "CUDA driver API error " << szErrName; \
throw NVDECException::makeNVDECException(errorLog.str(), err__, __FUNCTION__, __FILE__, __LINE__); \
} \
} while (0)
static const char *GetVideoCodecString(cudaVideoCodec eCodec) {
static struct {
cudaVideoCodec eCodec;
const char *name;
} aCodecName[] = {
{cudaVideoCodec_MPEG1, "MPEG-1"},
{cudaVideoCodec_MPEG2, "MPEG-2"},
{cudaVideoCodec_MPEG4, "MPEG-4 (ASP)"},
{cudaVideoCodec_VC1, "VC-1/WMV"},
{cudaVideoCodec_H264, "AVC/H.264"},
{cudaVideoCodec_JPEG, "M-JPEG"},
{cudaVideoCodec_H264_SVC, "H.264/SVC"},
{cudaVideoCodec_H264_MVC, "H.264/MVC"},
{cudaVideoCodec_HEVC, "H.265/HEVC"},
{cudaVideoCodec_VP8, "VP8"},
{cudaVideoCodec_VP9, "VP9"},
{cudaVideoCodec_AV1, "AV1"},
{cudaVideoCodec_NumCodecs, "Invalid"},
{cudaVideoCodec_YUV420, "YUV 4:2:0"},
{cudaVideoCodec_YV12, "YV12 4:2:0"},
{cudaVideoCodec_NV12, "NV12 4:2:0"},
{cudaVideoCodec_YUYV, "YUYV 4:2:2"},
{cudaVideoCodec_UYVY, "UYVY 4:2:2"},
};
if (eCodec >= 0 && eCodec <= cudaVideoCodec_NumCodecs) {
return aCodecName[eCodec].name;
}
for (int i = cudaVideoCodec_NumCodecs + 1; i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) {
if (eCodec == aCodecName[i].eCodec) {
return aCodecName[eCodec].name;
}
}
return "Unknown";
}
static const char *GetVideoChromaFormatString(cudaVideoChromaFormat eChromaFormat) {
static struct {
cudaVideoChromaFormat eChromaFormat;
const char *name;
} aChromaFormatName[] = {
{cudaVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)"},
{cudaVideoChromaFormat_420, "YUV 420"},
{cudaVideoChromaFormat_422, "YUV 422"},
{cudaVideoChromaFormat_444, "YUV 444"},
};
if (eChromaFormat >= 0 && eChromaFormat < sizeof(aChromaFormatName) / sizeof(aChromaFormatName[0])) {
return aChromaFormatName[eChromaFormat].name;
}
return "Unknown";
}
static float GetChromaHeightFactor(cudaVideoSurfaceFormat eSurfaceFormat) {
float factor = 0.5;
switch (eSurfaceFormat) {
case cudaVideoSurfaceFormat_NV12:
case cudaVideoSurfaceFormat_P016:
factor = 0.5;
break;
case cudaVideoSurfaceFormat_YUV444:
case cudaVideoSurfaceFormat_YUV444_16Bit:
factor = 1.0;
break;
}
return factor;
}
static int GetChromaPlaneCount(cudaVideoSurfaceFormat eSurfaceFormat) {
int numPlane = 1;
switch (eSurfaceFormat) {
case cudaVideoSurfaceFormat_NV12:
case cudaVideoSurfaceFormat_P016:
numPlane = 1;
break;
case cudaVideoSurfaceFormat_YUV444:
case cudaVideoSurfaceFormat_YUV444_16Bit:
numPlane = 2;
break;
}
return numPlane;
}
/**
* @brief Base class for decoder interface.
*/
class NvDecoder {
public:
NvDecoder() {}
/**
* @brief This function is used to initialize the decoder session.
* Application must call this function to initialize the decoder, before
* starting to decode any frames.
*/
NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, bool bLowLatency = false,
bool bDeviceFramePitched = false, const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL,
bool extract_user_SEI_Message = false, int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000,
bool force_zero_latency = false);
~NvDecoder();
/**
* @brief This function is used to get the current CUDA context.
*/
CUcontext GetContext() { return m_cuContext; }
/**
* @brief This function is used to get the output frame width.
* NV12/P016 output format width is 2 byte aligned because of U and V interleave
*/
int GetWidth() {
assert(m_nWidth);
return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 || m_eOutputFormat == cudaVideoSurfaceFormat_P016)
? (m_nWidth + 1) & ~1
: m_nWidth;
}
/**
* @brief This function is used to get the actual decode width
*/
int GetDecodeWidth() {
assert(m_nWidth);
return m_nWidth;
}
/**
* @brief This function is used to get the output frame height (Luma height).
*/
int GetHeight() {
assert(m_nLumaHeight);
return m_nLumaHeight;
}
/**
* @brief This function is used to get the current chroma height.
*/
int GetChromaHeight() {
assert(m_nChromaHeight);
return m_nChromaHeight;
}
/**
* @brief This function is used to get the number of chroma planes.
*/
int GetNumChromaPlanes() {
assert(m_nNumChromaPlanes);
return m_nNumChromaPlanes;
}
/**
* @brief This function is used to get the current frame size based on pixel format.
*/
int GetFrameSize() {
assert(m_nWidth);
return GetWidth() * (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP;
}
/**
* @brief This function is used to get the current frame Luma plane size.
*/
int GetLumaPlaneSize() {
assert(m_nWidth);
return GetWidth() * m_nLumaHeight * m_nBPP;
}
/**
* @brief This function is used to get the current frame chroma plane size.
*/
int GetChromaPlaneSize() {
assert(m_nWidth);
return GetWidth() * (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP;
}
/**
* @brief This function is used to get the pitch of the device buffer holding the decoded frame.
*/
int GetDeviceFramePitch() {
assert(m_nWidth);
return m_nDeviceFramePitch ? (int)m_nDeviceFramePitch : GetWidth() * m_nBPP;
}
/**
* @brief This function is used to get the bit depth associated with the pixel format.
*/
int GetBitDepth() {
assert(m_nWidth);
return m_nBitDepthMinus8 + 8;
}
/**
* @brief This function is used to get the bytes used per pixel.
*/
int GetBPP() {
assert(m_nWidth);
return m_nBPP;
}
/**
* @brief This function is used to get the YUV chroma format
*/
cudaVideoSurfaceFormat GetOutputFormat() { return m_eOutputFormat; }
/**
* @brief This function is used to get information about the video stream (codec, display parameters etc)
*/
CUVIDEOFORMAT GetVideoFormatInfo() {
assert(m_nWidth);
return m_videoFormat;
}
/**
* @brief This function is used to get codec string from codec id
*/
const char *GetCodecString(cudaVideoCodec eCodec);
/**
* @brief This function is used to print information about the video stream
*/
std::string GetVideoInfo() const { return m_videoInfo.str(); }
/**
* @brief This function decodes a frame and returns the number of frames that are available for
* display. All frames that are available for display should be read before making a subsequent decode call.
* @param pData - pointer to the data buffer that is to be decoded
* @param nSize - size of the data buffer in bytes
* @param nFlags - CUvideopacketflags for setting decode options
* @param nTimestamp - presentation timestamp
*/
int Decode(const uint8_t *pData, int nSize, int nFlags = 0, int64_t nTimestamp = 0);
/**
* @brief This function returns a decoded frame and timestamp. This function should be called in a loop for
* fetching all the frames that are available for display.
*/
uint8_t *GetFrame(int64_t *pTimestamp = nullptr);
/**
* @brief This function decodes a frame and returns the locked frame buffers
* This makes the buffers available for use by the application without the buffers
* getting overwritten, even if subsequent decode calls are made. The frame buffers
* remain locked, until UnlockFrame() is called
*/
uint8_t *GetLockedFrame(int64_t *pTimestamp = nullptr);
/**
* @brief This function unlocks the frame buffer and makes the frame buffers available for write again
* @param ppFrame - pointer to array of frames that are to be unlocked
* @param nFrame - number of frames to be unlocked
*/
void UnlockFrame(uint8_t **pFrame);
/**
* @brief This function allows app to set decoder reconfig params
* @param pCropRect - cropping rectangle coordinates
* @param pResizeDim - width and height of resized output
*/
int setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim);
/**
* @brief This function allows app to set operating point for AV1 SVC clips
* @param opPoint - operating point of an AV1 scalable bitstream
* @param bDispAllLayers - Output all decoded frames of an AV1 scalable bitstream
*/
void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) {
m_nOperatingPoint = opPoint;
m_bDispAllLayers = bDispAllLayers;
}
// start a timer
void startTimer() { m_stDecode_time.Start(); }
// stop the timer
double stopTimer() { return m_stDecode_time.Stop(); }
void setDecoderSessionID(int sessionID) { decoderSessionID = sessionID; }
int getDecoderSessionID() { return decoderSessionID; }
// Session overhead refers to decoder initialization and deinitialization time
static void addDecoderSessionOverHead(int sessionID, int64_t duration) { sessionOverHead[sessionID] += duration; }
static int64_t getDecoderSessionOverHead(int sessionID) { return sessionOverHead[sessionID]; }
protected:
int decoderSessionID; // Decoder session identifier. Used to gather session level stats.
static std::map<int, int64_t> sessionOverHead; // Records session overhead of initialization+deinitialization time.
// Format is (thread id, duration)
/**
* @brief Callback function to be registered for getting a callback when decoding of sequence starts
*/
static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) {
return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
}
/**
* @brief Callback function to be registered for getting a callback when a decoded frame is ready to be decoded
*/
static int CUDAAPI HandlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) {
return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams);
}
/**
* @brief Callback function to be registered for getting a callback when a decoded frame is available for display
*/
static int CUDAAPI HandlePictureDisplayProc(void *pUserData, CUVIDPARSERDISPINFO *pDispInfo) {
return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo);
}
/**
* @brief Callback function to be registered for getting a callback to get operating point when AV1 SVC sequence
* header start.
*/
static int CUDAAPI HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) {
return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo);
}
/**
* @brief Callback function to be registered for getting a callback when all the unregistered user SEI Messages
* are parsed for a frame.
*/
static int CUDAAPI HandleSEIMessagesProc(void *pUserData, CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
return ((NvDecoder *)pUserData)->GetSEIMessage(pSEIMessageInfo);
}
/**
* @brief This function gets called when a sequence is ready to be decoded. The function also gets called
when there is format change
*/
int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
/**
* @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from this
* function to decode the picture
*/
int HandlePictureDecode(CUVIDPICPARAMS *pPicParams);
/**
* @brief This function gets called after a picture is decoded and available for display. Frames are fetched and
stored in internal buffer
*/
int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo);
/**
* @brief This function gets called when AV1 sequence encounter more than one operating points
*/
int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
/**
* @brief This function gets called when all unregistered user SEI messages are parsed for a frame
*/
int GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo);
/**
* @brief This function reconfigure decoder if there is a change in sequence params.
*/
int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat);
public:
CUcontext m_cuContext = NULL;
CUvideoctxlock m_ctxLock;
CUvideoparser m_hParser = NULL;
CUvideodecoder m_hDecoder = NULL;
bool m_bUseDeviceFrame;
// dimension of the output
unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0;
unsigned int m_nNumChromaPlanes = 0;
// height of the mapped surface
int m_nSurfaceHeight = 0;
int m_nSurfaceWidth = 0;
cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs;
cudaVideoChromaFormat m_eChromaFormat = cudaVideoChromaFormat_420;
cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
int m_nBitDepthMinus8 = 0;
int m_nBPP = 1;
CUVIDEOFORMAT m_videoFormat = {};
Rect m_displayRect = {};
// stock of frames
std::vector<uint8_t *> m_vpFrame;
// timestamps of decoded frames
std::vector<int64_t> m_vTimestamp;
int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[MAX_FRM_CNT];
CUVIDSEIMESSAGEINFO *m_pCurrSEIMessage = NULL;
CUVIDSEIMESSAGEINFO m_SEIMessagesDisplayOrder[MAX_FRM_CNT];
FILE *m_fpSEI = NULL;
bool m_bEndDecodeDone = false;
std::mutex m_mtxVPFrame;
int m_nFrameAlloc = 0;
CUstream m_cuvidStream = 0;
bool m_bDeviceFramePitched = false;
size_t m_nDeviceFramePitch = 0;
Rect m_cropRect = {};
Dim m_resizeDim = {};
std::ostringstream m_videoInfo;
unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0;
bool m_bReconfigExternal = false;
bool m_bReconfigExtPPChange = false;
StopWatch m_stDecode_time;
unsigned int m_nOperatingPoint = 0;
bool m_bDispAllLayers = false;
// In H.264, there is an inherent display latency for video contents
// which do not have num_reorder_frames=0 in the VUI. This applies to
// All-Intra and IPPP sequences as well. If the user wants zero display
// latency for All-Intra and IPPP sequences, the below flag will enable
// the display callback immediately after the decode callback.
bool m_bForce_zero_latency = false;
bool m_bExtractSEIMessage = false;
};

379
third_party/Video_Codec_SDK/Samples/Utils/FFmpegDemuxer.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,379 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2023 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
/* Explicitly include bsf.h when building against FFmpeg 4.3 (libavcodec 58.45.100) or later for backward compatibility
*/
#if LIBAVCODEC_VERSION_INT >= 3824484
#include <libavcodec/bsf.h>
#endif
}
#include "NvCodecUtils.h"
#include "nvcuvid.h"
//---------------------------------------------------------------------------
//! \file FFmpegDemuxer.h
//! \brief Provides functionality for stream demuxing
//!
//! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it.
//---------------------------------------------------------------------------
/**
* @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
*/
class FFmpegDemuxer {
private:
AVFormatContext *fmtc = NULL;
AVIOContext *avioc = NULL;
AVPacket *pkt = NULL; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input
to decoders */
AVPacket *pktFiltered = NULL;
AVBSFContext *bsfc = NULL;
int iVideoStream;
bool bMp4H264, bMp4HEVC, bMp4MPEG4;
AVCodecID eVideoCodec;
AVPixelFormat eChromaFormat;
int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
double timeBase = 0.0;
int64_t userTimeScale = 0;
uint8_t *pDataWithHeader = NULL;
unsigned int frameCount = 0;
public:
class DataProvider {
public:
virtual ~DataProvider() {}
virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
};
private:
/**
* @brief Private constructor to initialize libavformat resources.
* @param fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
*/
FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
if (!fmtc) {
LOG(ERROR) << "No AVFormatContext provided.";
return;
}
// Allocate the AVPackets and initialize to default values
pkt = av_packet_alloc();
pktFiltered = av_packet_alloc();
if (!pkt || !pktFiltered) {
LOG(ERROR) << "AVPacket allocation failed";
return;
}
LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
ck(avformat_find_stream_info(fmtc, NULL));
iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
if (iVideoStream < 0) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " "
<< "Could not find stream in input file";
av_packet_free(&pkt);
av_packet_free(&pktFiltered);
return;
}
// fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
nWidth = fmtc->streams[iVideoStream]->codecpar->width;
nHeight = fmtc->streams[iVideoStream]->codecpar->height;
eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
timeBase = av_q2d(rTimeBase);
userTimeScale = timeScale;
// Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
switch (eChromaFormat) {
case AV_PIX_FMT_YUV420P10LE:
case AV_PIX_FMT_GRAY10LE: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 10;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV420P12LE:
nBitDepth = 12;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P10LE:
nBitDepth = 10;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P12LE:
nBitDepth = 12;
nChromaHeight = nHeight << 1;
nBPP = 2;
break;
case AV_PIX_FMT_YUV444P:
nBitDepth = 8;
nChromaHeight = nHeight << 1;
nBPP = 1;
break;
case AV_PIX_FMT_YUV420P:
case AV_PIX_FMT_YUVJ420P:
case AV_PIX_FMT_YUVJ422P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_YUVJ444P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
case AV_PIX_FMT_GRAY8: // monochrome is treated as 420 with chroma filled with 0x0
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
break;
default:
LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
eChromaFormat = AV_PIX_FMT_YUV420P;
nBitDepth = 8;
nChromaHeight = (nHeight + 1) >> 1;
nBPP = 1;
}
bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (!strcmp(fmtc->iformat->long_name, "QuickTime / MOV") ||
!strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") ||
!strcmp(fmtc->iformat->long_name, "Matroska / WebM"));
bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (!strcmp(fmtc->iformat->long_name, "QuickTime / MOV") ||
!strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") ||
!strcmp(fmtc->iformat->long_name, "Matroska / WebM"));
bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (!strcmp(fmtc->iformat->long_name, "QuickTime / MOV") ||
!strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") ||
!strcmp(fmtc->iformat->long_name, "Matroska / WebM"));
// Initialize bitstream filter and its required resources
if (bMp4H264) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " "
<< "av_bsf_get_by_name() failed";
av_packet_free(&pkt);
av_packet_free(&pktFiltered);
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
if (bMp4HEVC) {
const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
if (!bsf) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " "
<< "av_bsf_get_by_name() failed";
av_packet_free(&pkt);
av_packet_free(&pktFiltered);
return;
}
ck(av_bsf_alloc(bsf, &bsfc));
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
ck(av_bsf_init(bsfc));
}
}
AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
AVFormatContext *ctx = NULL;
if (!(ctx = avformat_alloc_context())) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
uint8_t *avioc_buffer = NULL;
int avioc_buffer_size = 8 * 1024 * 1024;
avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
if (!avioc_buffer) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size, 0, pDataProvider, &ReadPacket, NULL, NULL);
if (!avioc) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return NULL;
}
ctx->pb = avioc;
ck(avformat_open_input(&ctx, NULL, NULL, NULL));
return ctx;
}
/**
* @brief Allocate and return AVFormatContext*.
* @param szFilePath - Filepath pointing to input stream.
* @return Pointer to AVFormatContext
*/
AVFormatContext *CreateFormatContext(const char *szFilePath) {
avformat_network_init();
AVFormatContext *ctx = NULL;
ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
return ctx;
}
public:
FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/)
: FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) { avioc = fmtc->pb; }
~FFmpegDemuxer() {
if (!fmtc) {
return;
}
if (pkt) {
av_packet_free(&pkt);
}
if (pktFiltered) {
av_packet_free(&pktFiltered);
}
if (bsfc) {
av_bsf_free(&bsfc);
}
avformat_close_input(&fmtc);
if (avioc) {
av_freep(&avioc->buffer);
av_freep(&avioc);
}
if (pDataWithHeader) {
av_free(pDataWithHeader);
}
}
AVCodecID GetVideoCodec() { return eVideoCodec; }
AVPixelFormat GetChromaFormat() { return eChromaFormat; }
int GetWidth() { return nWidth; }
int GetHeight() { return nHeight; }
int GetBitDepth() { return nBitDepth; }
int GetFrameSize() { return nWidth * (nHeight + nChromaHeight) * nBPP; }
bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
if (!fmtc) {
return false;
}
*pnVideoBytes = 0;
if (pkt->data) {
av_packet_unref(pkt);
}
int e = 0;
while ((e = av_read_frame(fmtc, pkt)) >= 0 && pkt->stream_index != iVideoStream) {
av_packet_unref(pkt);
}
if (e < 0) {
return false;
}
if (bMp4H264 || bMp4HEVC) {
if (pktFiltered->data) {
av_packet_unref(pktFiltered);
}
ck(av_bsf_send_packet(bsfc, pkt));
ck(av_bsf_receive_packet(bsfc, pktFiltered));
*ppVideo = pktFiltered->data;
*pnVideoBytes = pktFiltered->size;
if (pts)
*pts = (int64_t)(pktFiltered->pts * userTimeScale * timeBase);
} else {
if (bMp4MPEG4 && (frameCount == 0)) {
int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
if (extraDataSize > 0) {
// extradata contains start codes 00 00 01. Subtract its size
pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt->size - 3 * sizeof(uint8_t));
if (!pDataWithHeader) {
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
return false;
}
memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
memcpy(pDataWithHeader + extraDataSize, pkt->data + 3, pkt->size - 3 * sizeof(uint8_t));
*ppVideo = pDataWithHeader;
*pnVideoBytes = extraDataSize + pkt->size - 3 * sizeof(uint8_t);
}
} else {
*ppVideo = pkt->data;
*pnVideoBytes = pkt->size;
}
if (pts)
*pts = (int64_t)(pkt->pts * userTimeScale * timeBase);
}
frameCount++;
return true;
}
static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
}
};
inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
switch (id) {
case AV_CODEC_ID_MPEG1VIDEO:
return cudaVideoCodec_MPEG1;
case AV_CODEC_ID_MPEG2VIDEO:
return cudaVideoCodec_MPEG2;
case AV_CODEC_ID_MPEG4:
return cudaVideoCodec_MPEG4;
case AV_CODEC_ID_WMV3:
case AV_CODEC_ID_VC1:
return cudaVideoCodec_VC1;
case AV_CODEC_ID_H264:
return cudaVideoCodec_H264;
case AV_CODEC_ID_HEVC:
return cudaVideoCodec_HEVC;
case AV_CODEC_ID_VP8:
return cudaVideoCodec_VP8;
case AV_CODEC_ID_VP9:
return cudaVideoCodec_VP9;
case AV_CODEC_ID_MJPEG:
return cudaVideoCodec_JPEG;
case AV_CODEC_ID_AV1:
return cudaVideoCodec_AV1;
default:
return cudaVideoCodec_NumCodecs;
}
}

148
third_party/Video_Codec_SDK/Samples/Utils/FFmpegStreamer.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,148 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2023 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <mutex>
#include <thread>
extern "C" {
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
#include <libswresample/swresample.h>
};
#include "Logger.h"
using namespace std;
extern simplelogger::Logger *logger;
static string AvErrorToString(int av_error_code) {
const auto buf_size = 1024U;
char *err_string = (char *)calloc(buf_size, sizeof(*err_string));
if (!err_string) {
return string();
}
if (0 != av_strerror(av_error_code, err_string, buf_size - 1)) {
free(err_string);
stringstream ss;
ss << "Unknown error with code " << av_error_code;
return ss.str();
}
string str(err_string);
free(err_string);
return str;
}
class FFmpegStreamer {
private:
AVFormatContext *oc = NULL;
AVStream *vs = NULL;
int nFps = 0;
public:
FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
avformat_network_init();
int ret = 0;
if ((eCodecId == AV_CODEC_ID_H264) || (eCodecId == AV_CODEC_ID_HEVC))
ret = avformat_alloc_output_context2(&oc, NULL, "mpegts", NULL);
else if (eCodecId == AV_CODEC_ID_AV1)
ret = avformat_alloc_output_context2(&oc, NULL, "ivf", NULL);
if (ret < 0) {
LOG(ERROR) << "FFmpeg: failed to allocate an AVFormatContext. Error message: " << AvErrorToString(ret);
return;
}
oc->url = av_strdup(szInFilePath);
LOG(INFO) << "Streaming destination: " << oc->url;
// Add video stream to oc
vs = avformat_new_stream(oc, NULL);
if (!vs) {
LOG(ERROR) << "FFMPEG: Could not alloc video stream";
return;
}
vs->id = 0;
// Set video parameters
AVCodecParameters *vpar = vs->codecpar;
vpar->codec_id = eCodecId;
vpar->codec_type = AVMEDIA_TYPE_VIDEO;
vpar->width = nWidth;
vpar->height = nHeight;
// Everything is ready. Now open the output stream.
if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
return;
}
// Write the container header
if (avformat_write_header(oc, NULL)) {
LOG(ERROR) << "FFMPEG: avformat_write_header error!";
return;
}
}
~FFmpegStreamer() {
if (oc) {
av_write_trailer(oc);
avio_close(oc->pb);
avformat_free_context(oc);
}
}
bool Stream(uint8_t *pData, int nBytes, int nPts) {
AVPacket *pkt = av_packet_alloc();
if (!pkt) {
LOG(ERROR) << "AVPacket allocation failed !";
return false;
}
pkt->pts = av_rescale_q(nPts++, AVRational{1, nFps}, vs->time_base);
// No B-frames
pkt->dts = pkt->pts;
pkt->stream_index = vs->index;
pkt->data = pData;
pkt->size = nBytes;
if (!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
pkt->flags |= AV_PKT_FLAG_KEY;
}
// Write the compressed frame into the output
int ret = av_write_frame(oc, pkt);
av_write_frame(oc, NULL);
if (ret < 0) {
LOG(ERROR) << "FFMPEG: Error while writing video frame";
}
av_packet_free(&pkt);
return true;
}
};

235
third_party/Video_Codec_SDK/Samples/Utils/Logger.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,235 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2023 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#pragma once
#include <fstream>
#include <iostream>
#include <mutex>
#include <sstream>
#include <string>
#include <time.h>
#ifdef _WIN32
#include <windows.h>
#include <winsock.h>
#pragma comment(lib, "ws2_32.lib")
#undef ERROR
#else
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <unistd.h>
#define SOCKET int
#define INVALID_SOCKET -1
#endif
enum LogLevel { TRACE, INFO, WARNING, ERROR, FATAL };
namespace simplelogger {
class Logger {
public:
Logger(LogLevel level, bool bPrintTimeStamp) : level(level), bPrintTimeStamp(bPrintTimeStamp) {}
virtual ~Logger() {}
virtual std::ostream &GetStream() = 0;
virtual void FlushStream() {}
bool ShouldLogFor(LogLevel l) { return l >= level; }
char *GetLead(LogLevel l, const char *szFile, int nLine, const char *szFunc) {
if (l < TRACE || l > FATAL) {
sprintf(szLead, "[?????] ");
return szLead;
}
const char *szLevels[] = {"TRACE", "INFO", "WARN", "ERROR", "FATAL"};
if (bPrintTimeStamp) {
time_t t = time(NULL);
struct tm *ptm = localtime(&t);
sprintf(szLead, "[%-5s][%02d:%02d:%02d] ", szLevels[l], ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
} else {
sprintf(szLead, "[%-5s] ", szLevels[l]);
}
return szLead;
}
void EnterCriticalSection() { mtx.lock(); }
void LeaveCriticalSection() { mtx.unlock(); }
private:
LogLevel level;
char szLead[80];
bool bPrintTimeStamp;
std::mutex mtx;
};
class LoggerFactory {
public:
static Logger *CreateFileLogger(std::string strFilePath, LogLevel level = INFO, bool bPrintTimeStamp = true) {
return new FileLogger(strFilePath, level, bPrintTimeStamp);
}
static Logger *CreateConsoleLogger(LogLevel level = INFO, bool bPrintTimeStamp = true) {
return new ConsoleLogger(level, bPrintTimeStamp);
}
static Logger *CreateUdpLogger(char *szHost, unsigned uPort, LogLevel level = INFO, bool bPrintTimeStamp = true) {
return new UdpLogger(szHost, uPort, level, bPrintTimeStamp);
}
private:
LoggerFactory() {}
class FileLogger : public Logger {
public:
FileLogger(std::string strFilePath, LogLevel level, bool bPrintTimeStamp) : Logger(level, bPrintTimeStamp) {
pFileOut = new std::ofstream();
pFileOut->open(strFilePath.c_str());
}
~FileLogger() { pFileOut->close(); }
std::ostream &GetStream() { return *pFileOut; }
private:
std::ofstream *pFileOut;
};
class ConsoleLogger : public Logger {
public:
ConsoleLogger(LogLevel level, bool bPrintTimeStamp) : Logger(level, bPrintTimeStamp) {}
std::ostream &GetStream() { return std::cout; }
};
class UdpLogger : public Logger {
private:
class UdpOstream : public std::ostream {
public:
UdpOstream(char *szHost, unsigned short uPort) : std::ostream(&sb), socket(INVALID_SOCKET) {
#ifdef _WIN32
WSADATA w;
if (WSAStartup(0x0101, &w) != 0) {
fprintf(stderr, "WSAStartup() failed.\n");
return;
}
#endif
socket = ::socket(AF_INET, SOCK_DGRAM, 0);
if (socket == INVALID_SOCKET) {
#ifdef _WIN32
WSACleanup();
#endif
fprintf(stderr, "socket() failed.\n");
return;
}
#ifdef _WIN32
unsigned int b1, b2, b3, b4;
sscanf(szHost, "%u.%u.%u.%u", &b1, &b2, &b3, &b4);
struct in_addr addr = {(unsigned char)b1, (unsigned char)b2, (unsigned char)b3, (unsigned char)b4};
#else
struct in_addr addr = {inet_addr(szHost)};
#endif
struct sockaddr_in s = {AF_INET, htons(uPort), addr};
server = s;
}
~UdpOstream() throw() {
if (socket == INVALID_SOCKET) {
return;
}
#ifdef _WIN32
closesocket(socket);
WSACleanup();
#else
close(socket);
#endif
}
void Flush() {
if (sendto(socket, sb.str().c_str(), (int)sb.str().length() + 1, 0, (struct sockaddr *)&server,
(int)sizeof(sockaddr_in)) == -1) {
fprintf(stderr, "sendto() failed.\n");
}
sb.str("");
}
private:
std::stringbuf sb;
SOCKET socket;
struct sockaddr_in server;
};
public:
UdpLogger(char *szHost, unsigned uPort, LogLevel level, bool bPrintTimeStamp)
: Logger(level, bPrintTimeStamp), udpOut(szHost, (unsigned short)uPort) {}
UdpOstream &GetStream() { return udpOut; }
virtual void FlushStream() { udpOut.Flush(); }
private:
UdpOstream udpOut;
};
};
class LogTransaction {
public:
LogTransaction(Logger *pLogger, LogLevel level, const char *szFile, const int nLine, const char *szFunc)
: pLogger(pLogger), level(level) {
if (!pLogger) {
std::cout << "[-----] ";
return;
}
if (!pLogger->ShouldLogFor(level)) {
return;
}
pLogger->EnterCriticalSection();
pLogger->GetStream() << pLogger->GetLead(level, szFile, nLine, szFunc);
}
~LogTransaction() {
if (!pLogger) {
std::cout << std::endl;
return;
}
if (!pLogger->ShouldLogFor(level)) {
return;
}
pLogger->GetStream() << std::endl;
pLogger->FlushStream();
pLogger->LeaveCriticalSection();
if (level == FATAL) {
exit(1);
}
}
std::ostream &GetStream() {
if (!pLogger) {
return std::cout;
}
if (!pLogger->ShouldLogFor(level)) {
return ossNull;
}
return pLogger->GetStream();
}
private:
Logger *pLogger;
LogLevel level;
std::ostringstream ossNull;
};
} // namespace simplelogger
extern simplelogger::Logger *logger;
#define LOG(level) simplelogger::LogTransaction(logger, level, __FILE__, __LINE__, __FUNCTION__).GetStream()

547
third_party/Video_Codec_SDK/Samples/Utils/NvCodecUtils.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,547 @@
/*
* This copyright notice applies to this header file only:
*
* Copyright (c) 2010-2023 NVIDIA Corporation
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the software, and to permit persons to whom the
* software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
//---------------------------------------------------------------------------
//! \file NvCodecUtils.h
//! \brief Miscellaneous classes and error checking functions.
//!
//! Used by Transcode/Encode samples apps for reading input files, mutithreading, performance measurement or colorspace
//! conversion while decoding.
//---------------------------------------------------------------------------
#pragma once
#include "Logger.h"
#include <assert.h>
#include <chrono>
#include <condition_variable>
#include <iomanip>
#include <ios>
#include <list>
#include <sstream>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <thread>
#include <vector>
extern simplelogger::Logger *logger;
#ifdef __cuda_cuda_h__
inline bool check(CUresult e, int iLine, const char *szFile) {
if (e != CUDA_SUCCESS) {
const char *szErrName = NULL;
cuGetErrorName(e, &szErrName);
LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile;
return false;
}
return true;
}
#endif
#ifdef __CUDA_RUNTIME_H__
inline bool check(cudaError_t e, int iLine, const char *szFile) {
if (e != cudaSuccess) {
LOG(FATAL) << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile;
return false;
}
return true;
}
#endif
#ifdef _NV_ENCODEAPI_H_
inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
const char *aszErrName[] = {
"NV_ENC_SUCCESS",
"NV_ENC_ERR_NO_ENCODE_DEVICE",
"NV_ENC_ERR_UNSUPPORTED_DEVICE",
"NV_ENC_ERR_INVALID_ENCODERDEVICE",
"NV_ENC_ERR_INVALID_DEVICE",
"NV_ENC_ERR_DEVICE_NOT_EXIST",
"NV_ENC_ERR_INVALID_PTR",
"NV_ENC_ERR_INVALID_EVENT",
"NV_ENC_ERR_INVALID_PARAM",
"NV_ENC_ERR_INVALID_CALL",
"NV_ENC_ERR_OUT_OF_MEMORY",
"NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
"NV_ENC_ERR_UNSUPPORTED_PARAM",
"NV_ENC_ERR_LOCK_BUSY",
"NV_ENC_ERR_NOT_ENOUGH_BUFFER",
"NV_ENC_ERR_INVALID_VERSION",
"NV_ENC_ERR_MAP_FAILED",
"NV_ENC_ERR_NEED_MORE_INPUT",
"NV_ENC_ERR_ENCODER_BUSY",
"NV_ENC_ERR_EVENT_NOT_REGISTERED",
"NV_ENC_ERR_GENERIC",
"NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
"NV_ENC_ERR_UNIMPLEMENTED",
"NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
"NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
"NV_ENC_ERR_RESOURCE_NOT_MAPPED",
};
if (e != NV_ENC_SUCCESS) {
LOG(FATAL) << "NVENC error " << aszErrName[e] << " at line " << iLine << " in file " << szFile;
return false;
}
return true;
}
#endif
#ifdef _WINERROR_
inline bool check(HRESULT e, int iLine, const char *szFile) {
if (e != S_OK) {
std::stringstream stream;
stream << std::hex << std::uppercase << e;
LOG(FATAL) << "HRESULT error 0x" << stream.str() << " at line " << iLine << " in file " << szFile;
return false;
}
return true;
}
#endif
#if defined(__gl_h_) || defined(__GL_H__)
inline bool check(GLenum e, int iLine, const char *szFile) {
if (e != 0) {
LOG(ERROR) << "GLenum error " << e << " at line " << iLine << " in file " << szFile;
return false;
}
return true;
}
#endif
inline bool check(int e, int iLine, const char *szFile) {
if (e < 0) {
LOG(ERROR) << "General error " << e << " at line " << iLine << " in file " << szFile;
return false;
}
return true;
}
#define ck(call) check(call, __LINE__, __FILE__)
#define MAKE_FOURCC(ch0, ch1, ch2, ch3) \
((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | \
((uint32_t)(uint8_t)(ch3) << 24))
/**
* @brief Wrapper class around std::thread
*/
class NvThread {
public:
NvThread() = default;
NvThread(const NvThread &) = delete;
NvThread &operator=(const NvThread &other) = delete;
NvThread(std::thread &&thread) : t(std::move(thread)) {}
NvThread(NvThread &&thread) : t(std::move(thread.t)) {}
NvThread &operator=(NvThread &&other) {
t = std::move(other.t);
return *this;
}
~NvThread() { join(); }
void join() {
if (t.joinable()) {
t.join();
}
}
private:
std::thread t;
};
#ifndef _WIN32
#define _stricmp strcasecmp
#define _stat64 stat64
#endif
/**
* @brief Utility class to allocate buffer memory. Helps avoid I/O during the encode/decode loop in case of performance
* tests.
*/
class BufferedFileReader {
public:
/**
* @brief Constructor function to allocate appropriate memory and copy file contents into it
*/
BufferedFileReader(const char *szFileName, bool bPartial = false) {
struct _stat64 st;
if (_stat64(szFileName, &st) != 0) {
return;
}
nSize = st.st_size;
while (nSize) {
try {
pBuf = new uint8_t[(size_t)nSize];
if (nSize != st.st_size) {
LOG(WARNING) << "File is too large - only " << std::setprecision(4) << 100.0 * nSize / st.st_size
<< "% is loaded";
}
break;
} catch (std::bad_alloc) {
if (!bPartial) {
LOG(ERROR) << "Failed to allocate memory in BufferedReader";
return;
}
nSize = (uint32_t)(nSize * 0.9);
}
}
std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
if (!fpIn) {
LOG(ERROR) << "Unable to open input file: " << szFileName;
return;
}
std::streamsize nRead = fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
fpIn.close();
assert(nRead == nSize);
}
~BufferedFileReader() {
if (pBuf) {
delete[] pBuf;
}
}
bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
if (!pBuf) {
return false;
}
*ppBuf = pBuf;
*pnSize = nSize;
return true;
}
private:
uint8_t *pBuf = NULL;
uint64_t nSize = 0;
};
/**
* @brief Template class to facilitate color space conversion
*/
template <typename T> class YuvConverter {
public:
YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)];
}
~YuvConverter() { delete[] pQuad; }
void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
if (nPitch == 0) {
nPitch = nWidth;
}
// sizes of source surface plane
int nSizePlaneY = nPitch * nHeight;
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
int nSizePlaneV = nSizePlaneU;
T *puv = pFrame + nSizePlaneY;
if (nPitch == nWidth) {
memcpy(pQuad, puv, nSizePlaneU * sizeof(T));
} else {
for (int i = 0; i < (nHeight + 1) / 2; i++) {
memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
}
}
T *pv = puv + nSizePlaneU;
for (int y = 0; y < (nHeight + 1) / 2; y++) {
for (int x = 0; x < (nWidth + 1) / 2; x++) {
puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x];
puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x];
}
}
}
void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
if (nPitch == 0) {
nPitch = nWidth;
}
// sizes of source surface plane
int nSizePlaneY = nPitch * nHeight;
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
int nSizePlaneV = nSizePlaneU;
T *puv = pFrame + nSizePlaneY, *pu = puv, *pv = puv + nSizePlaneU;
// split chroma from interleave to planar
for (int y = 0; y < (nHeight + 1) / 2; y++) {
for (int x = 0; x < (nWidth + 1) / 2; x++) {
pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2];
pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1];
}
}
if (nPitch == nWidth) {
memcpy(pv, pQuad, nSizePlaneV * sizeof(T));
} else {
for (int i = 0; i < (nHeight + 1) / 2; i++) {
memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
}
}
}
private:
T *pQuad;
int nWidth, nHeight;
};
/**
* @brief Class for writing IVF format header for AV1 codec
*/
class IVFUtils {
public:
void WriteFileHeader(std::vector<uint8_t> &vPacket, uint32_t nFourCC, uint32_t nWidth, uint32_t nHeight,
uint32_t nFrameRateNum, uint32_t nFrameRateDen, uint32_t nFrameCnt) {
char header[32];
header[0] = 'D';
header[1] = 'K';
header[2] = 'I';
header[3] = 'F';
mem_put_le16(header + 4, 0); // version
mem_put_le16(header + 6, 32); // header size
mem_put_le32(header + 8, nFourCC); // fourcc
mem_put_le16(header + 12, nWidth); // width
mem_put_le16(header + 14, nHeight); // height
mem_put_le32(header + 16, nFrameRateNum); // rate
mem_put_le32(header + 20, nFrameRateDen); // scale
mem_put_le32(header + 24, nFrameCnt); // length
mem_put_le32(header + 28, 0); // unused
vPacket.insert(vPacket.end(), &header[0], &header[32]);
}
void WriteFrameHeader(std::vector<uint8_t> &vPacket, size_t nFrameSize, int64_t pts) {
char header[12];
mem_put_le32(header, (int)nFrameSize);
mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
mem_put_le32(header + 8, (int)(pts >> 32));
vPacket.insert(vPacket.end(), &header[0], &header[12]);
}
private:
static inline void mem_put_le32(void *vmem, int val) {
unsigned char *mem = (unsigned char *)vmem;
mem[0] = (unsigned char)((val >> 0) & 0xff);
mem[1] = (unsigned char)((val >> 8) & 0xff);
mem[2] = (unsigned char)((val >> 16) & 0xff);
mem[3] = (unsigned char)((val >> 24) & 0xff);
}
static inline void mem_put_le16(void *vmem, int val) {
unsigned char *mem = (unsigned char *)vmem;
mem[0] = (unsigned char)((val >> 0) & 0xff);
mem[1] = (unsigned char)((val >> 8) & 0xff);
}
};
/**
* @brief Utility class to measure elapsed time in seconds between the block of executed code
*/
class StopWatch {
public:
void Start() { t0 = std::chrono::high_resolution_clock::now(); }
double Stop() {
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now().time_since_epoch() - t0.time_since_epoch())
.count() /
1.0e9;
}
private:
std::chrono::high_resolution_clock::time_point t0;
};
template <typename T> class ConcurrentQueue {
public:
ConcurrentQueue() {}
ConcurrentQueue(size_t size) : maxSize(size) {}
ConcurrentQueue(const ConcurrentQueue &) = delete;
ConcurrentQueue &operator=(const ConcurrentQueue &) = delete;
void setSize(size_t s) { maxSize = s; }
void push_back(const T &value) {
// Do not use a std::lock_guard here. We will need to explicitly
// unlock before notify_one as the other waiting thread will
// automatically try to acquire mutex once it wakes up
// (which will happen on notify_one)
std::unique_lock<std::mutex> lock(m_mutex);
auto wasEmpty = m_List.empty();
while (full()) {
m_cond.wait(lock);
}
m_List.push_back(value);
if (wasEmpty && !m_List.empty()) {
lock.unlock();
m_cond.notify_one();
}
}
T pop_front() {
std::unique_lock<std::mutex> lock(m_mutex);
while (m_List.empty()) {
m_cond.wait(lock);
}
auto wasFull = full();
T data = std::move(m_List.front());
m_List.pop_front();
if (wasFull && !full()) {
lock.unlock();
m_cond.notify_one();
}
return data;
}
T front() {
std::unique_lock<std::mutex> lock(m_mutex);
while (m_List.empty()) {
m_cond.wait(lock);
}
return m_List.front();
}
size_t size() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_List.size();
}
bool empty() {
std::unique_lock<std::mutex> lock(m_mutex);
return m_List.empty();
}
void clear() {
std::unique_lock<std::mutex> lock(m_mutex);
m_List.clear();
}
private:
bool full() {
if (maxSize > 0 && m_List.size() == maxSize)
return true;
return false;
}
private:
std::list<T> m_List;
std::mutex m_mutex;
std::condition_variable m_cond;
size_t maxSize;
};
inline void CheckInputFile(const char *szInFilePath) {
std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
if (fpIn.fail()) {
std::ostringstream err;
err << "Unable to open input file: " << szInFilePath << std::endl;
throw std::invalid_argument(err.str());
}
}
inline void ValidateResolution(int nWidth, int nHeight) {
if (nWidth <= 0 || nHeight <= 0) {
std::ostringstream err;
err << "Please specify positive non zero resolution as -s WxH. Current resolution is " << nWidth << "x"
<< nHeight << std::endl;
throw std::invalid_argument(err.str());
}
}
template <class COLOR32>
void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR64>
void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR64>
void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR32>
void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR64>
void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR64>
void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR32>
void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 4);
template <class COLOR32>
void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 0);
template <class COLOR32>
void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
int iMatrix = 4);
void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight,
int iMatrix = 4);
void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12,
int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstNv12UV = nullptr);
void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016,
int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstP016UV = nullptr);
void ScaleYUV420(unsigned char *dpDstY, unsigned char *dpDstU, unsigned char *dpDstV, int nDstPitch,
int nDstChromaPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcY, unsigned char *dpSrcU,
unsigned char *dpSrcV, int nSrcPitch, int nSrcChromaPitch, int nSrcWidth, int nSrcHeight,
bool bSemiplanar);
#ifdef __cuda_cuda_h__
void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, CUstream_st *outputCUStream);
#endif