Benchmarks: micro benchmark - source code for evaluating NVDEC decoding performance (#560)
**Description** source code for evaluating NVDEC decoding performance. --------- Co-authored-by: yukirora <yuting.jiang@microsoft.com>
This commit is contained in:
Родитель
6c0205cece
Коммит
27a10811af
|
@ -11,7 +11,7 @@ pool:
|
|||
|
||||
container:
|
||||
image: nvcr.io/nvidia/pytorch:20.12-py3
|
||||
options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker'
|
||||
options: '-v /var/run/docker.sock:/var/run/docker.sock -v /usr/bin/docker:/usr/bin/docker -v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/:/usr/lib/sudo/'
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
|
@ -21,6 +21,8 @@ steps:
|
|||
python3 -m pip install --upgrade pip setuptools==65.7
|
||||
python3 -m pip install .[test,nvworker]
|
||||
make postinstall
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get update
|
||||
sudo DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev
|
||||
displayName: Install dependencies
|
||||
- script: |
|
||||
python3 setup.py lint
|
||||
|
@ -31,7 +33,7 @@ steps:
|
|||
- script: |
|
||||
SB_MICRO_PATH=$PWD python3 setup.py test
|
||||
displayName: Run unit tests
|
||||
timeoutInMinutes: 15
|
||||
timeoutInMinutes: 30
|
||||
- script: |
|
||||
bash <(curl -s https://codecov.io/bash) -cF cuda-unit-test
|
||||
displayName: Report coverage results
|
||||
|
|
|
@ -49,6 +49,10 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
- name: Install Dependency
|
||||
run: |
|
||||
DEBIAN_FRONTEND=noninteractive apt-get update
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libavcodec-dev libavformat-dev libavutil-dev libswresample-dev sudo
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
with:
|
||||
|
|
|
@ -9,9 +9,6 @@ __pycache__/
|
|||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
|
|
|
@ -26,13 +26,18 @@ RUN apt-get update && \
|
|||
build-essential \
|
||||
curl \
|
||||
dmidecode \
|
||||
ffmpeg \
|
||||
git \
|
||||
iproute2 \
|
||||
jq \
|
||||
libaio-dev \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libcap2 \
|
||||
libnuma-dev \
|
||||
libpci-dev \
|
||||
libswresample-dev \
|
||||
libtinfo5 \
|
||||
libtool \
|
||||
lshw \
|
||||
|
|
|
@ -25,14 +25,19 @@ RUN apt-get update && \
|
|||
build-essential \
|
||||
curl \
|
||||
dmidecode \
|
||||
ffmpeg \
|
||||
git \
|
||||
iproute2 \
|
||||
jq \
|
||||
libaio-dev \
|
||||
libavcodec-dev \
|
||||
libavformat-dev \
|
||||
libavutil-dev \
|
||||
libboost-program-options-dev \
|
||||
libcap2 \
|
||||
libnuma-dev \
|
||||
libpci-dev \
|
||||
libswresample-dev \
|
||||
libtinfo5 \
|
||||
libtool \
|
||||
lshw \
|
||||
|
|
|
@ -0,0 +1,454 @@
|
|||
// Copyright(c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cuda.h>
|
||||
#include <cudaProfiler.h>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
#include "../Utils/FFmpegDemuxer.h"
|
||||
#include "../Utils/NvCodecUtils.h"
|
||||
#include "OptimizedNvDecoder.h"
|
||||
#include "ThreadPoolUtils.h"
|
||||
|
||||
// Define logger which need in third party utils
|
||||
simplelogger::Logger *logger = simplelogger::LoggerFactory::CreateConsoleLogger();
|
||||
|
||||
// Define the codec map
|
||||
std::map<std::string, cudaVideoCodec_enum> codecMap = {
|
||||
{"mpeg1", cudaVideoCodec_MPEG1}, {"mpeg2", cudaVideoCodec_MPEG2}, {"mpeg4", cudaVideoCodec_MPEG4},
|
||||
{"vc1", cudaVideoCodec_VC1}, {"h264", cudaVideoCodec_H264}, {"jpeg", cudaVideoCodec_JPEG},
|
||||
{"h264_svc", cudaVideoCodec_H264_SVC}, {"h264_mvc", cudaVideoCodec_H264_MVC}, {"hevc", cudaVideoCodec_HEVC},
|
||||
{"vp8", cudaVideoCodec_VP8}, {"vp9", cudaVideoCodec_VP9}, {"av1", cudaVideoCodec_AV1}};
|
||||
|
||||
/**
|
||||
* @brief Function to decode video file using OptimizedNvDecoder interface
|
||||
* @param pDec - Handle to OptimizedNvDecoder
|
||||
* @param demuxer - Pointer to an FFmpegDemuxer instance
|
||||
* @param pnFrame - Variable to record the number of frames decoded
|
||||
* @param ex - Stores current exception in case of failure
|
||||
*/
|
||||
void DecProc(OptimizedNvDecoder *pDec, const char *szInFilePath, int *pnFrame, std::exception_ptr &ex) {
|
||||
try {
|
||||
std::unique_ptr<FFmpegDemuxer> demuxer(new FFmpegDemuxer(szInFilePath));
|
||||
int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;
|
||||
uint8_t *pVideo = NULL, *pFrame = NULL;
|
||||
do {
|
||||
// Demux video from file using FFmpegDemuxer
|
||||
demuxer->Demux(&pVideo, &nVideoBytes);
|
||||
// Decode the video frame from demuxed packet
|
||||
nFrameReturned = pDec->Decode(pVideo, nVideoBytes);
|
||||
if (!nFrame && nFrameReturned)
|
||||
LOG(INFO) << pDec->GetVideoInfo();
|
||||
nFrame += nFrameReturned;
|
||||
} while (nVideoBytes);
|
||||
*pnFrame = nFrame;
|
||||
} catch (std::exception &) {
|
||||
ex = std::current_exception();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to show help message and exit
|
||||
*/
|
||||
void ShowHelpAndExit(const char *szBadOption = NULL) {
|
||||
std::ostringstream oss;
|
||||
bool bThrowError = false;
|
||||
if (szBadOption) {
|
||||
bThrowError = true;
|
||||
oss << "Error parsing \"" << szBadOption << "\"" << std::endl;
|
||||
}
|
||||
oss << "Options:" << std::endl
|
||||
<< "-i Input file path. No default value. One of -i and -multi_input is required." << std::endl
|
||||
<< "-o Output file path of raw data. No default value. Optional." << std::endl
|
||||
<< "-gpu Ordinal of GPU to use. Default 0. Optional." << std::endl
|
||||
<< "-thread Number of decoding thread. Default 5. Optional." << std::endl
|
||||
<< "-total Number of total video to test. Default 100. Optional." << std::endl
|
||||
<< "-single (No value) Use single cuda context for every thread. Default is multi-context, one context "
|
||||
"per thread."
|
||||
<< std::endl
|
||||
<< "-host (No value) Copy frame to host memory .Default is device memory)" << std::endl
|
||||
<< "-multi_input The file path which lists the path of multiple video in each line." << std::endl
|
||||
<< "-codec The codec of video to test. Default H264." << std::endl;
|
||||
if (bThrowError) {
|
||||
throw std::invalid_argument(oss.str());
|
||||
} else {
|
||||
std::cout << oss.str();
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to parse commandline arguments
|
||||
*/
|
||||
void ParseCommandLine(int argc, char *argv[], char *szInputFileName, int &iGpu, int &nThread, int &nTotalVideo,
|
||||
bool &bSingle, bool &bHost, std::string &inputFilesListPath, std::string &outputFile,
|
||||
cudaVideoCodec &codec) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (!_stricmp(argv[i], "-h")) {
|
||||
ShowHelpAndExit();
|
||||
}
|
||||
if (!_stricmp(argv[i], "-i")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-i");
|
||||
}
|
||||
sprintf(szInputFileName, "%s", argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-o")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-o");
|
||||
}
|
||||
outputFile = std::string(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-gpu")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-gpu");
|
||||
}
|
||||
iGpu = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-thread")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-thread");
|
||||
}
|
||||
nThread = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-total")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-total");
|
||||
}
|
||||
nTotalVideo = atoi(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-multi_input")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-multi_input");
|
||||
}
|
||||
inputFilesListPath = std::string(argv[i]);
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-single")) {
|
||||
bSingle = true;
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-host")) {
|
||||
bHost = true;
|
||||
continue;
|
||||
}
|
||||
if (!_stricmp(argv[i], "-codec")) {
|
||||
if (++i == argc) {
|
||||
ShowHelpAndExit("-codec");
|
||||
}
|
||||
std::string codecName = std::string(argv[i]);
|
||||
std::transform(codecName.begin(), codecName.end(), codecName.begin(),
|
||||
[](unsigned char c) { return std::tolower(c); });
|
||||
if (codecMap.find(codecName) != codecMap.end()) {
|
||||
codec = codecMap[codecName];
|
||||
} else {
|
||||
std::cout << "Codec name not found in the map." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
ShowHelpAndExit(argv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to create cuda context and initialize decoder
|
||||
*/
|
||||
OptimizedNvDecoder *InitOptimizedNvDecoder(int i, const CUdevice &cuDevice, CUcontext &cuContext, bool bSingle,
|
||||
bool bHost, cudaVideoCodec codec, CUVIDDECODECAPS decodecaps) {
|
||||
if (!bSingle) {
|
||||
ck(cuCtxCreate(&cuContext, 0, cuDevice));
|
||||
}
|
||||
OptimizedNvDecoder *sessionObject = new OptimizedNvDecoder(cuContext, !bHost, codec, decodecaps);
|
||||
sessionObject->setDecoderSessionID(i);
|
||||
return sessionObject;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to decode a video in a thread and measure the latency
|
||||
*/
|
||||
double DecodeVideo(size_t i, const std::vector<OptimizedNvDecoder *> &vDec, const char *szInFilePath, int *pnFrame,
|
||||
std::exception_ptr &ex) {
|
||||
try {
|
||||
OptimizedNvDecoder *pDec = vDec[i];
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
DecProc(pDec, szInFilePath, pnFrame, ex);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto elapsedTime = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
|
||||
std::cout << "Decode finished --"
|
||||
<< " duration:" << elapsedTime << " frames:" << *pnFrame << std::endl;
|
||||
return elapsedTime / 1000.0f;
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "Exception in deocding: " << e.what() << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to read the video paths from a file
|
||||
*/
|
||||
std::vector<std::string> ReadMultipleVideoFiles(const std::string &filepath) {
|
||||
std::ifstream file(filepath);
|
||||
if (!file) {
|
||||
std::cerr << "Error opening the file." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
std::string line;
|
||||
std::vector<std::string> tokens;
|
||||
while (std::getline(file, line)) {
|
||||
tokens.push_back(line);
|
||||
}
|
||||
file.close();
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to get the decoder capability
|
||||
*/
|
||||
void GetDefaultDecoderCaps(CUVIDDECODECAPS &decodecaps, cudaVideoCodec codec) {
|
||||
memset(&decodecaps, 0, sizeof(decodecaps));
|
||||
decodecaps.eCodecType = codec;
|
||||
decodecaps.eChromaFormat = cudaVideoChromaFormat_420;
|
||||
decodecaps.nBitDepthMinus8 = 0;
|
||||
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to initialize the cuda device, cuda context, query the decoder capability and create decoder for
|
||||
* each thread
|
||||
*/
|
||||
void InitializeContext(std::vector<OptimizedNvDecoder *> &vDec, int iGpu, int nThread, bool bSingle, bool bHost,
|
||||
cudaVideoCodec codec) {
|
||||
ck(cuInit(0));
|
||||
int nGpu = 0;
|
||||
ck(cuDeviceGetCount(&nGpu));
|
||||
if (iGpu < 0 || iGpu >= nGpu) {
|
||||
std::cout << "GPU ordinal out of range. Should be within [" << 0 << ", " << nGpu - 1 << "]" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
CUdevice cuDevice = 0;
|
||||
ck(cuDeviceGet(&cuDevice, iGpu));
|
||||
char szDeviceName[80];
|
||||
ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice));
|
||||
std::cout << "GPU in use: " << szDeviceName << std::endl;
|
||||
|
||||
CUcontext cuContext = NULL;
|
||||
ck(cuCtxCreate(&cuContext, 0, cuDevice));
|
||||
|
||||
CUVIDDECODECAPS decodecaps;
|
||||
GetDefaultDecoderCaps(decodecaps, codec);
|
||||
|
||||
ThreadPool threadPool(nThread);
|
||||
std::vector<std::future<OptimizedNvDecoder *>> futures;
|
||||
for (int i = 0; i < nThread; i++) {
|
||||
futures.push_back(
|
||||
threadPool.enqueue(InitOptimizedNvDecoder, cuDevice, cuContext, bSingle, bHost, codec, decodecaps));
|
||||
}
|
||||
for (auto &future : futures) {
|
||||
vDec.push_back(future.get()); // Retrieve the results from each task
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to write the latency and FPS data of each video to a file
|
||||
*/
|
||||
void WriteRawData(std::vector<OptimizedNvDecoder *> &vDec, int nThread, const std::vector<double> &data,
|
||||
std::vector<int> &frames, std::string filename) {
|
||||
// Open the output file stream
|
||||
std::ofstream outputFile(filename);
|
||||
outputFile << "Frame Latency" << std::endl;
|
||||
for (int i = 0; i < nThread; i++) {
|
||||
for (const auto &tuple : vDec[i]->GetFrameLatency()) {
|
||||
int frame = std::get<0>(tuple);
|
||||
double latency = std::get<1>(tuple);
|
||||
outputFile << "Frame: " << frame << ", Latency: " << latency << std::endl;
|
||||
}
|
||||
}
|
||||
outputFile << "Video Latency" << std::endl;
|
||||
for (int i = 0; i < data.size(); i++) {
|
||||
outputFile << data[i] << std::endl;
|
||||
}
|
||||
outputFile << "Video FPS" << std::endl;
|
||||
for (int i = 0; i < data.size(); i++) {
|
||||
outputFile << frames[i] / data[i] << std::endl;
|
||||
}
|
||||
|
||||
// Close the file stream
|
||||
outputFile.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to calculate the statistical metrics
|
||||
*/
|
||||
std::tuple<double, double, double, double, double, double, double, double>
|
||||
CalMetrics(const std::vector<double> &originData) {
|
||||
std::vector<double> data = originData;
|
||||
double sum = std::accumulate(data.begin(), data.end(), 0.0);
|
||||
double mean = sum / data.size();
|
||||
double min = *std::min_element(data.begin(), data.end());
|
||||
double max = *std::max_element(data.begin(), data.end());
|
||||
std::sort(data.begin(), data.end());
|
||||
double p50 = data[data.size() / 2];
|
||||
double p90 = data[static_cast<size_t>(data.size() * 0.9)];
|
||||
double p95 = data[static_cast<size_t>(data.size() * 0.95)];
|
||||
double p99 = data[static_cast<size_t>(data.size() * 0.99)];
|
||||
return std::make_tuple(sum, mean, min, max, p50, p90, p95, p99);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to generate the total file list for the given total number of videos.
|
||||
* If the number of videos is less than the total number of videos, the list will be repeated.
|
||||
* If the number of videos is greater than the total number of videos, the list will be truncated.
|
||||
*/
|
||||
std::vector<std::string> GenerateTotalFileList(const std::string &inputFilesListPath, int nTotalVideo,
|
||||
const char *szInFilePath) {
|
||||
std::vector<std::string> files;
|
||||
if (inputFilesListPath.size() != 0) {
|
||||
auto videofiles = ReadMultipleVideoFiles(inputFilesListPath);
|
||||
int smallerSize = videofiles.size();
|
||||
|
||||
if (nTotalVideo > smallerSize) {
|
||||
int numIterations = nTotalVideo / smallerSize;
|
||||
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
files.insert(files.end(), videofiles.begin(), videofiles.end());
|
||||
}
|
||||
|
||||
int remainingElements = nTotalVideo - (numIterations * smallerSize);
|
||||
files.insert(files.end(), videofiles.begin(), videofiles.begin() + remainingElements);
|
||||
} else {
|
||||
files = std::vector<std::string>(videofiles.begin(), videofiles.begin() + nTotalVideo);
|
||||
}
|
||||
|
||||
std::cout << "Multifile mode - " << nTotalVideo << "videos will be decoded" << std::endl;
|
||||
} else {
|
||||
for (int i = 0; i < nTotalVideo; i++) {
|
||||
files.push_back(std::string(szInFilePath));
|
||||
}
|
||||
}
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Function to run the decoding tasks in parallel with thread pool to decode all the videos and record the total
|
||||
* latency and the total number of frames
|
||||
*/
|
||||
float run(std::vector<OptimizedNvDecoder *> &vDec, int nThread, std::vector<std::string> &files,
|
||||
std::vector<int> &vnFrame, std::vector<std::exception_ptr> &vExceptionPtrs, int *nTotalFrames,
|
||||
std::vector<double> &vnLatency, std::vector<double> &frLatency, std::vector<double> &vnFPS) {
|
||||
std::vector<std::future<double>> decodeLatencyFutures;
|
||||
ThreadPool threadPool(nThread);
|
||||
// Enqueue the video decoding task into thread pool
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
auto filePath = files[i].c_str();
|
||||
CheckInputFile(filePath);
|
||||
decodeLatencyFutures.push_back(
|
||||
threadPool.enqueue(DecodeVideo, vDec, filePath, &vnFrame[i], std::ref(vExceptionPtrs[i])));
|
||||
}
|
||||
// Wait until decoding tasks finished
|
||||
for (int i = 0; i < files.size(); i++) {
|
||||
auto decodeLatency = decodeLatencyFutures[i].get();
|
||||
vnLatency.push_back(decodeLatency);
|
||||
*nTotalFrames += vnFrame[i];
|
||||
}
|
||||
auto elapsedTime =
|
||||
(std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start)
|
||||
.count()) /
|
||||
1000.0f;
|
||||
for (int i = 0; i < nThread; i++) {
|
||||
for (const auto &tuple : vDec[i]->GetFrameLatency()) {
|
||||
int frame = std::get<0>(tuple);
|
||||
double latency = std::get<1>(tuple);
|
||||
if (frame > 0) {
|
||||
frLatency.push_back(latency / frame);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < vnLatency.size(); i++) {
|
||||
if (vnLatency[i] != 0) {
|
||||
vnFPS.push_back(vnFrame[i] / vnLatency[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Record the total time
|
||||
return elapsedTime;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
char szInFilePath[256] = "";
|
||||
int iGpu = 0;
|
||||
int nThread = 5;
|
||||
int nTotalVideo = 100;
|
||||
bool bSingle = false;
|
||||
bool bHost = false;
|
||||
std::string inputFilesListPath = "";
|
||||
std::string outputFilePath = "";
|
||||
std::vector<std::exception_ptr> vExceptionPtrs(nTotalVideo);
|
||||
cudaVideoCodec codec = cudaVideoCodec_H264;
|
||||
try {
|
||||
// Parse the command line arguments
|
||||
ParseCommandLine(argc, argv, szInFilePath, iGpu, nThread, nTotalVideo, bSingle, bHost, inputFilesListPath,
|
||||
outputFilePath, codec);
|
||||
auto files = GenerateTotalFileList(inputFilesListPath, nTotalVideo, szInFilePath);
|
||||
|
||||
// Initialize and prepare the decoder context for each thread
|
||||
std::vector<OptimizedNvDecoder *> vDec;
|
||||
InitializeContext(vDec, iGpu, nThread, bSingle, bHost, codec);
|
||||
|
||||
// Decode all video with thread pool
|
||||
std::vector<int> vnFrame(nTotalVideo);
|
||||
int nTotalFrames = 0;
|
||||
std::vector<double> vnLatency;
|
||||
std::vector<double> frLatency;
|
||||
std::vector<double> videoFPS;
|
||||
auto elapsedTime =
|
||||
run(vDec, nThread, files, vnFrame, vExceptionPtrs, &nTotalFrames, vnLatency, frLatency, videoFPS);
|
||||
|
||||
// Calculate and output the raw data into file and metrics into stdout
|
||||
double sum, mean, min, max, p50, p90, p95, p99;
|
||||
std::tie(sum, mean, min, max, p50, p90, p95, p99) = CalMetrics(vnLatency);
|
||||
std::cout << "Total Frames Decoded=" << nTotalFrames << " FPS=" << nTotalFrames / elapsedTime << std::endl;
|
||||
std::cout << "Mean Latency for each video=" << mean * 1000 << " P50 Latency=" << p50 * 1000
|
||||
<< " P90 Latency=" << p90 * 1000 << " P95 Latency=" << p95 * 1000 << " P99 Latency=" << p99 * 1000
|
||||
<< "ms" << std::endl;
|
||||
|
||||
std::tie(sum, mean, min, max, p50, p90, p95, p99) = CalMetrics(videoFPS);
|
||||
std::cout << "Mean FPS for each video=" << mean << " P50 FPS=" << p50 << " P90 FPS=" << p90
|
||||
<< " P95 FPS=" << p95 << " P99 FPS=" << p99 << std::endl;
|
||||
std::tie(sum, mean, min, max, p50, p90, p95, p99) = CalMetrics(frLatency);
|
||||
std::cout << "Mean Latency for each frame=" << mean * 1000 << " P50 Latency=" << p50 * 1000
|
||||
<< " P90 Latency=" << p90 * 1000 << " P95 Latency=" << p95 * 1000 << " P99 Latency=" << p99 * 1000
|
||||
<< "ms" << std::endl;
|
||||
if (outputFilePath.size() != 0) {
|
||||
WriteRawData(vDec, nThread, vnLatency, vnFrame, outputFilePath);
|
||||
}
|
||||
// Deinitialization
|
||||
for (int i = 0; i < nThread; i++) {
|
||||
delete (vDec[i]);
|
||||
}
|
||||
for (int i = 0; i < nThread; i++) {
|
||||
if (vExceptionPtrs[i]) {
|
||||
std::rethrow_exception(vExceptionPtrs[i]);
|
||||
}
|
||||
}
|
||||
} catch (const std::exception &ex) {
|
||||
std::cout << ex.what();
|
||||
exit(1);
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
project(cuda_decode_performance)
|
||||
|
||||
find_package(CUDA QUIET)
|
||||
if(CUDA_FOUND)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(THIRD_PARTY_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Samples)
|
||||
set(NVCODEC_PUBLIC_INTERFACE_DIR ${THIRD_PARTY_SAMPLE_DIR}/../Interface)
|
||||
set(NVCODEC_UTILS_DIR ${THIRD_PARTY_SAMPLE_DIR}/Utils)
|
||||
set(NV_CODEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec)
|
||||
set(NV_DEC_DIR ${THIRD_PARTY_SAMPLE_DIR}/NvCodec/NvDecoder)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(PC_AVCODEC REQUIRED IMPORTED_TARGET libavcodec)
|
||||
pkg_check_modules(PC_AVFORMAT REQUIRED IMPORTED_TARGET libavformat)
|
||||
pkg_check_modules(PC_AVUTIL REQUIRED IMPORTED_TARGET libavutil)
|
||||
pkg_check_modules(PC_SWRESAMPLE REQUIRED IMPORTED_TARGET libswresample)
|
||||
|
||||
set(NV_FFMPEG_HDRS ${PC_AVCODEC_INCLUDE_DIRS})
|
||||
find_library(AVCODEC_LIBRARY NAMES avcodec
|
||||
HINTS
|
||||
${PC_AVCODEC_LIBDIR}
|
||||
${PC_AVCODEC_LIBRARY_DIRS}
|
||||
)
|
||||
find_library(AVFORMAT_LIBRARY NAMES avformat
|
||||
HINTS
|
||||
${PC_AVFORMAT_LIBDIR}
|
||||
${PC_AVFORMAT_LIBRARY_DIRS}
|
||||
)
|
||||
find_library(AVUTIL_LIBRARY NAMES avutil
|
||||
HINTS
|
||||
${PC_AVUTIL_LIBDIR}
|
||||
${PC_AVUTIL_LIBRARY_DIRS}
|
||||
)
|
||||
find_library(SWRESAMPLE_LIBRARY NAMES swresample
|
||||
HINTS
|
||||
${PC_SWRESAMPLE_LIBDIR}
|
||||
${PC_SWRESAMPLE_LIBRARY_DIRS}
|
||||
)
|
||||
set(AVCODEC_LIB ${AVCODEC_LIBRARY})
|
||||
set(AVFORMAT_LIB ${AVFORMAT_LIBRARY})
|
||||
set(AVUTIL_LIB ${AVUTIL_LIBRARY})
|
||||
set(SWRESAMPLE_LIB ${SWRESAMPLE_LIBRARY})
|
||||
endif()
|
||||
|
||||
set(APP_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/AppDecPerf.cpp
|
||||
)
|
||||
|
||||
set(NV_DEC_SOURCES
|
||||
${NV_DEC_DIR}/NvDecoder.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.cpp
|
||||
)
|
||||
|
||||
set(NV_DEC_HDRS
|
||||
${NV_DEC_DIR}/NvDecoder.h
|
||||
${NVCODEC_PUBLIC_INTERFACE_DIR}/cuviddec.h
|
||||
${NVCODEC_PUBLIC_INTERFACE_DIR}/nvcuvid.h
|
||||
${NVCODEC_UTILS_DIR}/NvCodecUtils.h
|
||||
${NVCODEC_UTILS_DIR}/FFmpegDemuxer.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ThreadPoolUtils.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/OptimizedNvDecoder.h
|
||||
)
|
||||
|
||||
source_group( "headers" FILES ${NV_DEC_HDRS} )
|
||||
source_group( "sources" FILES ${APP_SOURCES} ${NV_DEC_SOURCES})
|
||||
set(CMAKE_LIBRARY_PATH "${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs;${CUDA_TOOLKIT_ROOT_DIR}/lib64;${CUDA_TOOLKIT_ROOT_DIR}/lib;${CMAKE_LIBRARY_PATH}")
|
||||
find_package(CUDA)
|
||||
set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=\"sm_50,compute_50\")
|
||||
if ( CMAKE_COMPILER_IS_GNUCC )
|
||||
if(NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=c\\+\\+11" )
|
||||
list(APPEND CUDA_NVCC_FLAGS -std=c++11)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Check if the file exists
|
||||
if (NOT EXISTS "/usr/local/lib/libnvcuvid.so" )
|
||||
execute_process(
|
||||
COMMAND sudo ln -s /usr/lib/x86_64-linux-gnu/libnvcuvid.so.1 /usr/local/lib/libnvcuvid.so
|
||||
RESULT_VARIABLE result
|
||||
)
|
||||
if(result)
|
||||
message(FATAL_ERROR "Failed to create symbolic link for nvcuvid lib: ${result}")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
find_library(CUVID_LIB nvcuvid
|
||||
HINTS
|
||||
"/usr/local/lib/"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/../../../../third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/"
|
||||
)
|
||||
|
||||
cuda_add_executable(${PROJECT_NAME} ${APP_SOURCES} ${NV_DEC_SOURCES} ${NV_DEC_HDRS})
|
||||
|
||||
set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_INCLUDE_DIRS}
|
||||
${NVCODEC_PUBLIC_INTERFACE_DIR}
|
||||
${NVCODEC_UTILS_DIR}
|
||||
${NV_CODEC_DIR}
|
||||
${NV_APPDEC_COMMON_DIR}
|
||||
${NV_FFMPEG_HDRS}
|
||||
${THIRD_PARTY_SAMPLE_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} ${CUDA_CUDA_LIBRARY} ${CMAKE_DL_LIBS} ${CUVID_LIB} ${AVCODEC_LIB}
|
||||
${AVFORMAT_LIB} ${AVUTIL_LIB} ${SWRESAMPLE_LIB})
|
||||
|
||||
install(TARGETS ${PROJECT_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
|
||||
endif()
|
|
@ -0,0 +1,263 @@
|
|||
// Copyright(c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "OptimizedNvDecoder.h"
|
||||
|
||||
int OptimizedNvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags, int64_t nTimestamp) {
|
||||
m_nDecodedFrame = 0;
|
||||
m_nDecodedFrameReturned = 0;
|
||||
CUVIDSOURCEDATAPACKET packet = {0};
|
||||
packet.payload = pData;
|
||||
packet.payload_size = nSize;
|
||||
packet.flags = nFlags | CUVID_PKT_TIMESTAMP;
|
||||
packet.timestamp = nTimestamp;
|
||||
if (!pData || nSize == 0) {
|
||||
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
||||
}
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
|
||||
int64_t elapsedTime =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - start)
|
||||
.count();
|
||||
frameLatency.push_back(std::make_tuple(m_nDecodedFrame, elapsedTime / 1000.0f / 1000.0f));
|
||||
return m_nDecodedFrame;
|
||||
}
|
||||
|
||||
OptimizedNvDecoder::OptimizedNvDecoder(CUcontext &cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec,
|
||||
CUVIDDECODECAPS decodecaps, bool bLowLatency, bool bDeviceFramePitched,
|
||||
const Rect *pCropRect, const Dim *pResizeDim, bool extract_user_SEI_Message,
|
||||
int maxWidth, int maxHeight, unsigned int clkRate, bool force_zero_latency) {
|
||||
m_cuContext = cuContext;
|
||||
m_bUseDeviceFrame = bUseDeviceFrame;
|
||||
m_eCodec = eCodec;
|
||||
m_bDeviceFramePitched = bDeviceFramePitched;
|
||||
m_bExtractSEIMessage = extract_user_SEI_Message;
|
||||
m_nMaxWidth = maxWidth;
|
||||
m_nMaxHeight = maxHeight;
|
||||
m_bForce_zero_latency = force_zero_latency;
|
||||
if (pCropRect)
|
||||
m_cropRect = *pCropRect;
|
||||
if (pResizeDim)
|
||||
m_resizeDim = *pResizeDim;
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
|
||||
|
||||
ck(cuStreamCreate(&m_cuvidStream, CU_STREAM_DEFAULT));
|
||||
|
||||
decoderSessionID = 0;
|
||||
|
||||
if (m_bExtractSEIMessage) {
|
||||
m_fpSEI = fopen("sei_message.txt", "wb");
|
||||
m_pCurrSEIMessage = new CUVIDSEIMESSAGEINFO;
|
||||
memset(&m_SEIMessagesDisplayOrder, 0, sizeof(m_SEIMessagesDisplayOrder));
|
||||
}
|
||||
CUVIDPARSERPARAMS videoParserParameters = {};
|
||||
videoParserParameters.CodecType = eCodec;
|
||||
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
|
||||
videoParserParameters.ulClockRate = clkRate;
|
||||
videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
|
||||
videoParserParameters.pUserData = this;
|
||||
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
|
||||
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
|
||||
videoParserParameters.pfnDisplayPicture = m_bForce_zero_latency ? NULL : HandlePictureDisplayProc;
|
||||
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
|
||||
videoParserParameters.pfnGetSEIMsg = m_bExtractSEIMessage ? HandleSEIMessagesProc : NULL;
|
||||
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
|
||||
// reuse the decodecaps queried before
|
||||
m_decodecaps = decodecaps;
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
}
|
||||
|
||||
int OptimizedNvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
|
||||
START_TIMER
|
||||
m_videoInfo.str("");
|
||||
m_videoInfo.clear();
|
||||
m_videoInfo << "Video Input Information" << std::endl
|
||||
<< "\tCodec : " << GetVideoCodecString(pVideoFormat->codec) << std::endl
|
||||
<< "\tFrame rate : " << pVideoFormat->frame_rate.numerator << "/"
|
||||
<< pVideoFormat->frame_rate.denominator << " = "
|
||||
<< 1.0 * pVideoFormat->frame_rate.numerator / pVideoFormat->frame_rate.denominator << " fps"
|
||||
<< std::endl
|
||||
<< "\tSequence : " << (pVideoFormat->progressive_sequence ? "Progressive" : "Interlaced")
|
||||
<< std::endl
|
||||
<< "\tCoded size : [" << pVideoFormat->coded_width << ", " << pVideoFormat->coded_height << "]"
|
||||
<< std::endl
|
||||
<< "\tDisplay area : [" << pVideoFormat->display_area.left << ", " << pVideoFormat->display_area.top
|
||||
<< ", " << pVideoFormat->display_area.right << ", " << pVideoFormat->display_area.bottom << "]"
|
||||
<< std::endl
|
||||
<< "\tChroma : " << GetVideoChromaFormatString(pVideoFormat->chroma_format) << std::endl
|
||||
<< "\tBit depth : " << pVideoFormat->bit_depth_luma_minus8 + 8;
|
||||
m_videoInfo << std::endl;
|
||||
|
||||
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
|
||||
|
||||
// re-call the cuvidGetDecoderCaps when the video codeoc and format change
|
||||
if (m_decodecaps.eCodecType != pVideoFormat->codec || m_decodecaps.eChromaFormat != pVideoFormat->chroma_format ||
|
||||
m_decodecaps.nBitDepthMinus8 != pVideoFormat->bit_depth_luma_minus8) {
|
||||
m_decodecaps.eCodecType = pVideoFormat->codec;
|
||||
m_decodecaps.eChromaFormat = pVideoFormat->chroma_format;
|
||||
m_decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidGetDecoderCaps(&m_decodecaps));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
}
|
||||
|
||||
if (!m_decodecaps.bIsSupported) {
|
||||
NVDEC_THROW_ERROR("Codec not supported on this GPU", CUDA_ERROR_NOT_SUPPORTED);
|
||||
return nDecodeSurface;
|
||||
}
|
||||
|
||||
if ((pVideoFormat->coded_width > m_decodecaps.nMaxWidth) ||
|
||||
(pVideoFormat->coded_height > m_decodecaps.nMaxHeight)) {
|
||||
|
||||
std::ostringstream errorString;
|
||||
errorString << std::endl
|
||||
<< "Resolution : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height
|
||||
<< std::endl
|
||||
<< "Max Supported (wxh) : " << m_decodecaps.nMaxWidth << "x" << m_decodecaps.nMaxHeight << std::endl
|
||||
<< "Resolution not supported on this GPU";
|
||||
|
||||
const std::string cErr = errorString.str();
|
||||
NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
|
||||
return nDecodeSurface;
|
||||
}
|
||||
if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) > m_decodecaps.nMaxMBCount) {
|
||||
|
||||
std::ostringstream errorString;
|
||||
errorString << std::endl
|
||||
<< "MBCount : " << (pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4)
|
||||
<< std::endl
|
||||
<< "Max Supported mbcnt : " << m_decodecaps.nMaxMBCount << std::endl
|
||||
<< "MBCount not supported on this GPU";
|
||||
NVDEC_THROW_ERROR(errorString.str(), CUDA_ERROR_NOT_SUPPORTED);
|
||||
return nDecodeSurface;
|
||||
}
|
||||
|
||||
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
|
||||
|
||||
// cuvidCreateDecoder() has been called before, and now there's possible config change
|
||||
return ReconfigureDecoder(pVideoFormat);
|
||||
}
|
||||
|
||||
// eCodec has been set in the constructor (for parser). Here it's set again for potential correction
|
||||
m_eCodec = pVideoFormat->codec;
|
||||
m_eChromaFormat = pVideoFormat->chroma_format;
|
||||
m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
||||
m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
|
||||
|
||||
// Set the output surface format same as chroma format
|
||||
if (m_eChromaFormat == cudaVideoChromaFormat_420 || cudaVideoChromaFormat_Monochrome)
|
||||
m_eOutputFormat =
|
||||
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
|
||||
else if (m_eChromaFormat == cudaVideoChromaFormat_444)
|
||||
m_eOutputFormat =
|
||||
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
|
||||
else if (m_eChromaFormat == cudaVideoChromaFormat_422)
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_NV12; // no 4:2:2 output format supported yet so make 420 default
|
||||
|
||||
// Check if output format supported. If not, check falback options
|
||||
if (!(m_decodecaps.nOutputFormatMask & (1 << m_eOutputFormat))) {
|
||||
if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
|
||||
else if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_P016;
|
||||
else if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
|
||||
else if (m_decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
|
||||
else
|
||||
NVDEC_THROW_ERROR("No supported output format found", CUDA_ERROR_NOT_SUPPORTED);
|
||||
}
|
||||
m_videoFormat = *pVideoFormat;
|
||||
|
||||
CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
|
||||
videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
|
||||
videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
|
||||
videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
|
||||
videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
||||
if (pVideoFormat->progressive_sequence)
|
||||
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
|
||||
else
|
||||
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
|
||||
videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
|
||||
// With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware
|
||||
videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
|
||||
videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
|
||||
videoDecodeCreateInfo.vidLock = m_ctxLock;
|
||||
videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
|
||||
videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
|
||||
// AV1 has max width/height of sequence in sequence header
|
||||
if (pVideoFormat->codec == cudaVideoCodec_AV1 && pVideoFormat->seqhdr_data_length > 0) {
|
||||
CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat;
|
||||
if (m_nMaxWidth < pVideoFormat->coded_width) {
|
||||
m_nMaxWidth = vidFormatEx->av1.max_width;
|
||||
}
|
||||
if (m_nMaxHeight < pVideoFormat->coded_height) {
|
||||
m_nMaxHeight = vidFormatEx->av1.max_height;
|
||||
}
|
||||
}
|
||||
if (m_nMaxWidth < (int)pVideoFormat->coded_width)
|
||||
m_nMaxWidth = pVideoFormat->coded_width;
|
||||
if (m_nMaxHeight < (int)pVideoFormat->coded_height)
|
||||
m_nMaxHeight = pVideoFormat->coded_height;
|
||||
videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth;
|
||||
videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight;
|
||||
|
||||
if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
|
||||
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
|
||||
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
|
||||
videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
|
||||
videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
|
||||
} else {
|
||||
if (m_resizeDim.w && m_resizeDim.h) {
|
||||
videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left;
|
||||
videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top;
|
||||
videoDecodeCreateInfo.display_area.right = pVideoFormat->display_area.right;
|
||||
videoDecodeCreateInfo.display_area.bottom = pVideoFormat->display_area.bottom;
|
||||
m_nWidth = m_resizeDim.w;
|
||||
m_nLumaHeight = m_resizeDim.h;
|
||||
}
|
||||
|
||||
if (m_cropRect.r && m_cropRect.b) {
|
||||
videoDecodeCreateInfo.display_area.left = m_cropRect.l;
|
||||
videoDecodeCreateInfo.display_area.top = m_cropRect.t;
|
||||
videoDecodeCreateInfo.display_area.right = m_cropRect.r;
|
||||
videoDecodeCreateInfo.display_area.bottom = m_cropRect.b;
|
||||
m_nWidth = m_cropRect.r - m_cropRect.l;
|
||||
m_nLumaHeight = m_cropRect.b - m_cropRect.t;
|
||||
}
|
||||
videoDecodeCreateInfo.ulTargetWidth = m_nWidth;
|
||||
videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight;
|
||||
}
|
||||
|
||||
m_nChromaHeight = (int)(ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat)));
|
||||
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
|
||||
m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
|
||||
m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
|
||||
m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
|
||||
m_displayRect.t = videoDecodeCreateInfo.display_area.top;
|
||||
m_displayRect.l = videoDecodeCreateInfo.display_area.left;
|
||||
m_displayRect.r = videoDecodeCreateInfo.display_area.right;
|
||||
|
||||
m_videoInfo << "Video Decoding Params:" << std::endl
|
||||
<< "\tNum Surfaces : " << videoDecodeCreateInfo.ulNumDecodeSurfaces << std::endl
|
||||
<< "\tCrop : [" << videoDecodeCreateInfo.display_area.left << ", "
|
||||
<< videoDecodeCreateInfo.display_area.top << ", " << videoDecodeCreateInfo.display_area.right << ", "
|
||||
<< videoDecodeCreateInfo.display_area.bottom << "]" << std::endl
|
||||
<< "\tResize : " << videoDecodeCreateInfo.ulTargetWidth << "x"
|
||||
<< videoDecodeCreateInfo.ulTargetHeight << std::endl
|
||||
<< "\tDeinterlace : "
|
||||
<< std::vector<const char *>{"Weave", "Bob", "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
|
||||
m_videoInfo << std::endl;
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
STOP_TIMER("Session Initialization Time: ");
|
||||
NvDecoder::addDecoderSessionOverHead(getDecoderSessionID(), elapsedTime);
|
||||
return nDecodeSurface;
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright(c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "NvDecoder/NvDecoder.h"
|
||||
|
||||
// This class is derived from NvDecoder class and is used to optimize the cuvidGetDecoderCaps overhead
|
||||
class OptimizedNvDecoder : public NvDecoder {
|
||||
|
||||
public:
|
||||
OptimizedNvDecoder() {}
|
||||
/**
|
||||
* @brief This function is used to initialize the decoder session.
|
||||
* Application must call this function to initialize the decoder, before
|
||||
* starting to decode any frames.
|
||||
* The only difference from the original function is to add a new member m_decodecaps.
|
||||
* Other part is the same as the original function, refer to NvDecoder.cpp in NVIDIA Video Codec SDK.
|
||||
*/
|
||||
OptimizedNvDecoder(CUcontext &cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, CUVIDDECODECAPS decodecaps,
|
||||
bool bLowLatency = false, bool bDeviceFramePitched = false, const Rect *pCropRect = NULL,
|
||||
const Dim *pResizeDim = NULL, bool extract_user_SEI_Message = false, int maxWidth = 0,
|
||||
int maxHeight = 0, unsigned int clkRate = 1000, bool force_zero_latency = false);
|
||||
|
||||
/**
|
||||
* @brief This function is to overwrite the origin Decode function to record the latency on frame level.
|
||||
*/
|
||||
int Decode(const uint8_t *pData, int nSize, int nFlags = 0, int64_t nTimestamp = 0);
|
||||
/**
|
||||
* @brief This function is used to Get the frameLatency vector
|
||||
*/
|
||||
std::vector<std::tuple<int, double>> &GetFrameLatency() { return frameLatency; }
|
||||
|
||||
protected:
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when decoding of sequence starts
|
||||
*/
|
||||
static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) {
|
||||
if (pUserData == nullptr) {
|
||||
throw std::runtime_error("pUserData is nullptr");
|
||||
}
|
||||
return ((OptimizedNvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
|
||||
}
|
||||
/**
|
||||
* @brief Define the new handler when decoding of sequence starts.
|
||||
* The only change is to re-query decoder caps when the video codec or format change
|
||||
* Other part is the same as the original function, refer to NvDecoder.cpp in NVIDIA Video Codec SDK.
|
||||
*/
|
||||
int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
|
||||
|
||||
CUVIDDECODECAPS m_decodecaps;
|
||||
|
||||
std::vector<std::tuple<int, double>> frameLatency;
|
||||
};
|
|
@ -0,0 +1,99 @@
|
|||
// Copyright(c) Microsoft Corporation.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
// ThreadPool is a simple thread pool implementation that supports enqueueing the task with the index of thread to use
|
||||
// and custom arguments like task(thread_index, *args).
|
||||
class ThreadPool {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new ThreadPool object with the given number of threads.
|
||||
*/
|
||||
ThreadPool(size_t numThreads) {
|
||||
for (size_t i = 0; i < numThreads; ++i) {
|
||||
threads.emplace_back(&ThreadPool::worker, this, i);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief Destroy the ThreadPool object and join all threads.
|
||||
*/
|
||||
~ThreadPool() {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
stop = true;
|
||||
}
|
||||
cv.notify_all();
|
||||
|
||||
for (auto &thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief TaskWrapper is a wrapper of the task with the index of thread to use and custom arguments like
|
||||
* task(thread_index, *args).
|
||||
*/
|
||||
template <typename R, typename F, typename... Args> struct TaskWrapper {
|
||||
std::shared_ptr<std::packaged_task<R(size_t)>> task;
|
||||
|
||||
template <typename Callable, typename... CallableArgs> TaskWrapper(Callable &&f, CallableArgs &&...args) {
|
||||
task = std::make_shared<std::packaged_task<R(size_t)>>(
|
||||
[f, args...](size_t threadIdx) mutable { return f(threadIdx, args...); });
|
||||
}
|
||||
|
||||
void operator()(size_t threadIdx) { (*task)(threadIdx); }
|
||||
};
|
||||
/**
|
||||
* @brief Enqueue enqueues the task with custom arguments and return the results of task when finished.
|
||||
*/
|
||||
template <typename F, typename... Args>
|
||||
auto enqueue(F &&f, Args &&...args) -> std::future<typename std::result_of<F(size_t, Args...)>::type> {
|
||||
using ReturnType = typename std::result_of<F(size_t, Args...)>::type;
|
||||
|
||||
TaskWrapper<ReturnType, F, Args...> wrapper(std::forward<F>(f), std::forward<Args>(args)...);
|
||||
std::future<ReturnType> res = wrapper.task->get_future();
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
tasks.emplace(std::move(wrapper));
|
||||
}
|
||||
cv.notify_one();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief The worker function that dequeues the task and executes it for each thread index.
|
||||
*/
|
||||
void worker(size_t threadIdx) {
|
||||
while (true) {
|
||||
std::function<void(size_t)> task;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
cv.wait(lock, [this] { return stop || !tasks.empty(); });
|
||||
|
||||
if (stop && tasks.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
task = tasks.front();
|
||||
tasks.pop();
|
||||
}
|
||||
|
||||
task(threadIdx);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
std::queue<std::function<void(size_t)>> tasks;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
bool stop = false;
|
||||
};
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,486 @@
|
|||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2023 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/********************************************************************************************************************/
|
||||
//! \file nvcuvid.h
|
||||
//! NVDECODE API provides video decoding interface to NVIDIA GPU devices.
|
||||
//! \date 2015-2022
|
||||
//! This file contains the interface constants, structure definitions and function prototypes.
|
||||
/********************************************************************************************************************/
|
||||
|
||||
#if !defined(__NVCUVID_H__)
|
||||
#define __NVCUVID_H__
|
||||
|
||||
#include "cuviddec.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#define MAX_CLOCK_TS 3
|
||||
|
||||
/***********************************************/
|
||||
//!
|
||||
//! High-level helper APIs for video sources
|
||||
//!
|
||||
/***********************************************/
|
||||
|
||||
typedef void *CUvideosource;
|
||||
typedef void *CUvideoparser;
|
||||
typedef long long CUvideotimestamp;
|
||||
|
||||
/************************************************************************/
|
||||
//! \enum cudaVideoState
|
||||
//! Video source state enums
|
||||
//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs
|
||||
/************************************************************************/
|
||||
typedef enum {
|
||||
cudaVideoState_Error = -1, /**< Error state (invalid source) */
|
||||
cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */
|
||||
cudaVideoState_Started = 1 /**< Source is running and delivering data */
|
||||
} cudaVideoState;
|
||||
|
||||
/************************************************************************/
|
||||
//! \enum cudaAudioCodec
|
||||
//! Audio compression enums
|
||||
//! Used in CUAUDIOFORMAT structure
|
||||
/************************************************************************/
|
||||
typedef enum {
|
||||
cudaAudioCodec_MPEG1 = 0, /**< MPEG-1 Audio */
|
||||
cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */
|
||||
cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */
|
||||
cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */
|
||||
cudaAudioCodec_LPCM, /**< PCM Audio */
|
||||
cudaAudioCodec_AAC, /**< AAC Audio */
|
||||
} cudaAudioCodec;
|
||||
|
||||
/************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct HEVCTIMECODESET
|
||||
//! Used to store Time code extracted from Time code SEI in HEVC codec
|
||||
/************************************************************************/
|
||||
typedef struct _HEVCTIMECODESET {
|
||||
unsigned int time_offset_value;
|
||||
unsigned short n_frames;
|
||||
unsigned char clock_timestamp_flag;
|
||||
unsigned char units_field_based_flag;
|
||||
unsigned char counting_type;
|
||||
unsigned char full_timestamp_flag;
|
||||
unsigned char discontinuity_flag;
|
||||
unsigned char cnt_dropped_flag;
|
||||
unsigned char seconds_value;
|
||||
unsigned char minutes_value;
|
||||
unsigned char hours_value;
|
||||
unsigned char seconds_flag;
|
||||
unsigned char minutes_flag;
|
||||
unsigned char hours_flag;
|
||||
unsigned char time_offset_length;
|
||||
unsigned char reserved;
|
||||
} HEVCTIMECODESET;
|
||||
|
||||
/************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct HEVCSEITIMECODE
|
||||
//! Used to extract Time code SEI in HEVC codec
|
||||
/************************************************************************/
|
||||
typedef struct _HEVCSEITIMECODE {
|
||||
HEVCTIMECODESET time_code_set[MAX_CLOCK_TS];
|
||||
unsigned char num_clock_ts;
|
||||
} HEVCSEITIMECODE;
|
||||
|
||||
/**********************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUSEIMESSAGE;
|
||||
//! Used in CUVIDSEIMESSAGEINFO structure
|
||||
/**********************************************************************************/
|
||||
typedef struct _CUSEIMESSAGE {
|
||||
unsigned char sei_message_type; /**< OUT: SEI Message Type */
|
||||
unsigned char reserved[3];
|
||||
unsigned int sei_message_size; /**< OUT: SEI Message Size */
|
||||
} CUSEIMESSAGE;
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDEOFORMAT
|
||||
//! Video format
|
||||
//! Used in cuvidGetSourceVideoFormat API
|
||||
/************************************************************************************************/
|
||||
typedef struct {
|
||||
cudaVideoCodec codec; /**< OUT: Compression format */
|
||||
/**
|
||||
* OUT: frame rate = numerator / denominator (for example: 30000/1001)
|
||||
*/
|
||||
struct {
|
||||
/**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */
|
||||
unsigned int numerator;
|
||||
/**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */
|
||||
unsigned int denominator;
|
||||
} frame_rate;
|
||||
unsigned char progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */
|
||||
unsigned char bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
|
||||
unsigned char bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */
|
||||
unsigned char min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for correct
|
||||
decoding. The client can send this value in ulNumDecodeSurfaces
|
||||
(in CUVIDDECODECREATEINFO structure).
|
||||
This guarantees correct functionality and optimal video memory
|
||||
usage but not necessarily the best performance, which depends on
|
||||
the design of the overall application. The optimal number of
|
||||
decode surfaces (in terms of performance and memory utilization)
|
||||
should be decided by experimentation for each application, but it
|
||||
cannot go below min_num_decode_surfaces.
|
||||
If this value is used for ulNumDecodeSurfaces then it must be
|
||||
returned to parser during sequence callback. */
|
||||
unsigned int coded_width; /**< OUT: coded frame width in pixels */
|
||||
unsigned int coded_height; /**< OUT: coded frame height in pixels */
|
||||
/**
|
||||
* area of the frame that should be displayed
|
||||
* typical example:
|
||||
* coded_width = 1920, coded_height = 1088
|
||||
* display_area = { 0,0,1920,1080 }
|
||||
*/
|
||||
struct {
|
||||
int left; /**< OUT: left position of display rect */
|
||||
int top; /**< OUT: top position of display rect */
|
||||
int right; /**< OUT: right position of display rect */
|
||||
int bottom; /**< OUT: bottom position of display rect */
|
||||
} display_area;
|
||||
cudaVideoChromaFormat chroma_format; /**< OUT: Chroma format */
|
||||
unsigned int bitrate; /**< OUT: video bitrate (bps, 0=unknown) */
|
||||
/**
|
||||
* OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc)
|
||||
*/
|
||||
struct {
|
||||
int x;
|
||||
int y;
|
||||
} display_aspect_ratio;
|
||||
/**
|
||||
* Video Signal Description
|
||||
* Refer section E.2.1 (VUI parameters semantics) of H264 spec file
|
||||
*/
|
||||
struct {
|
||||
unsigned char video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified */
|
||||
unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range */
|
||||
unsigned char reserved_zero_bits : 4; /**< Reserved bits */
|
||||
unsigned char color_primaries; /**< OUT: chromaticity coordinates of source primaries */
|
||||
unsigned char
|
||||
transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */
|
||||
unsigned char matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */
|
||||
} video_signal_description;
|
||||
unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */
|
||||
} CUVIDEOFORMAT;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDOPERATINGPOINTINFO
|
||||
//! Operating point information of scalable bitstream
|
||||
/****************************************************************/
|
||||
typedef struct {
|
||||
cudaVideoCodec codec;
|
||||
union {
|
||||
struct {
|
||||
unsigned char operating_points_cnt;
|
||||
unsigned char reserved24_bits[3];
|
||||
unsigned short operating_points_idc[32];
|
||||
} av1;
|
||||
unsigned char CodecReserved[1024];
|
||||
};
|
||||
} CUVIDOPERATINGPOINTINFO;
|
||||
|
||||
/**********************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDSEIMESSAGEINFO
|
||||
//! Used in cuvidParseVideoData API with PFNVIDSEIMSGCALLBACK pfnGetSEIMsg
|
||||
/**********************************************************************************/
|
||||
typedef struct _CUVIDSEIMESSAGEINFO {
|
||||
void *pSEIData; /**< OUT: SEI Message Data */
|
||||
CUSEIMESSAGE *pSEIMessage; /**< OUT: SEI Message Info */
|
||||
unsigned int sei_message_count; /**< OUT: SEI Message Count */
|
||||
unsigned int picIdx; /**< OUT: SEI Message Pic Index */
|
||||
} CUVIDSEIMESSAGEINFO;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDAV1SEQHDR
|
||||
//! AV1 specific sequence header information
|
||||
/****************************************************************/
|
||||
typedef struct {
|
||||
unsigned int max_width;
|
||||
unsigned int max_height;
|
||||
unsigned char reserved[1016];
|
||||
} CUVIDAV1SEQHDR;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDEOFORMATEX
|
||||
//! Video format including raw sequence header information
|
||||
//! Used in cuvidGetSourceVideoFormat API
|
||||
/****************************************************************/
|
||||
typedef struct {
|
||||
CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */
|
||||
union {
|
||||
CUVIDAV1SEQHDR av1;
|
||||
unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */
|
||||
};
|
||||
} CUVIDEOFORMATEX;
|
||||
|
||||
/****************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUAUDIOFORMAT
|
||||
//! Audio formats
|
||||
//! Used in cuvidGetSourceAudioFormat API
|
||||
/****************************************************************/
|
||||
typedef struct {
|
||||
cudaAudioCodec codec; /**< OUT: Compression format */
|
||||
unsigned int channels; /**< OUT: number of audio channels */
|
||||
unsigned int samplespersec; /**< OUT: sampling frequency */
|
||||
unsigned int bitrate; /**< OUT: For uncompressed, can also be used to determine bits per sample */
|
||||
unsigned int reserved1; /**< Reserved for future use */
|
||||
unsigned int reserved2; /**< Reserved for future use */
|
||||
} CUAUDIOFORMAT;
|
||||
|
||||
/***************************************************************/
|
||||
//! \enum CUvideopacketflags
|
||||
//! Data packet flags
|
||||
//! Used in CUVIDSOURCEDATAPACKET structure
|
||||
/***************************************************************/
|
||||
typedef enum {
|
||||
CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */
|
||||
CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */
|
||||
CUVID_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */
|
||||
CUVID_PKT_ENDOFPICTURE = 0x08, /**< Set when the packet contains exactly one frame or one field */
|
||||
CUVID_PKT_NOTIFY_EOS = 0x10, /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy)
|
||||
display callback will be invoked with null value of CUVIDPARSERDISPINFO which
|
||||
should be interpreted as end of the stream. */
|
||||
} CUvideopacketflags;
|
||||
|
||||
/*****************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDSOURCEDATAPACKET
|
||||
//! Data Packet
|
||||
//! Used in cuvidParseVideoData API
|
||||
//! IN for cuvidParseVideoData
|
||||
/*****************************************************************************/
|
||||
typedef struct _CUVIDSOURCEDATAPACKET {
|
||||
unsigned long flags; /**< IN: Combination of CUVID_PKT_XXX flags */
|
||||
unsigned long payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */
|
||||
const unsigned char *payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */
|
||||
CUvideotimestamp timestamp; /**< IN: Presentation time stamp (10MHz clock), only valid if
|
||||
CUVID_PKT_TIMESTAMP flag is set */
|
||||
} CUVIDSOURCEDATAPACKET;
|
||||
|
||||
// Callback for packet delivery
|
||||
typedef int(CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
|
||||
|
||||
/**************************************************************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDSOURCEPARAMS
|
||||
//! Describes parameters needed in cuvidCreateVideoSource API
|
||||
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all
|
||||
//! supported containers. It's recommended to clients to use their own or third party demuxer if audio support is
|
||||
//! needed.
|
||||
/**************************************************************************************************************************/
|
||||
typedef struct _CUVIDSOURCEPARAMS {
|
||||
unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */
|
||||
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
|
||||
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
|
||||
unsigned int uReserved1[6]; /**< Reserved for future use - set to zero */
|
||||
void *pUserData; /**< IN: User private data passed in to the data handlers */
|
||||
PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */
|
||||
PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */
|
||||
void *pvReserved2[8]; /**< Reserved for future use - set to NULL */
|
||||
} CUVIDSOURCEPARAMS;
|
||||
|
||||
/**********************************************/
|
||||
//! \ingroup ENUMS
|
||||
//! \enum CUvideosourceformat_flags
|
||||
//! CUvideosourceformat_flags
|
||||
//! Used in cuvidGetSourceVideoFormat API
|
||||
/**********************************************/
|
||||
typedef enum {
|
||||
CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */
|
||||
} CUvideosourceformat_flags;
|
||||
|
||||
#if !defined(__APPLE__)
|
||||
/***************************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS
|
||||
//! *pParams) Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks:
|
||||
//! pfnVideoDataHandler() and pfnAudioDataHandler()
|
||||
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all
|
||||
//! supported containers. It's recommended to clients to use their own or third party demuxer if audio support is
|
||||
//! needed.
|
||||
/***************************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
|
||||
|
||||
/***************************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS
|
||||
//! *pParams) Create video source
|
||||
/***************************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
|
||||
|
||||
/********************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
|
||||
//! Destroy video source
|
||||
/********************************************************************/
|
||||
CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj);
|
||||
|
||||
/******************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
|
||||
//! Set video source state to:
|
||||
//! cudaVideoState_Started - to signal the source to run and deliver data
|
||||
//! cudaVideoState_Stopped - to stop the source from delivering the data
|
||||
//! cudaVideoState_Error - invalid source
|
||||
/******************************************************************************************/
|
||||
CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
|
||||
|
||||
/******************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
|
||||
//! Get video source state
|
||||
//! Returns:
|
||||
//! cudaVideoState_Started - if Source is running and delivering data
|
||||
//! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream
|
||||
//! cudaVideoState_Error - if Source is in error state
|
||||
/******************************************************************************************/
|
||||
cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj);
|
||||
|
||||
/******************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
|
||||
//! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement
|
||||
/******************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
|
||||
|
||||
/**************************************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
|
||||
//! Get audio source format
|
||||
//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all
|
||||
//! supported containers. It's recommended to clients to use their own or third party demuxer if audio support is
|
||||
//! needed.
|
||||
/**************************************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
|
||||
|
||||
#endif
|
||||
/**********************************************************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDPARSERDISPINFO
|
||||
//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture
|
||||
/**********************************************************************************/
|
||||
typedef struct _CUVIDPARSERDISPINFO {
|
||||
int picture_index; /**< OUT: Index of the current picture */
|
||||
int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */
|
||||
int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */
|
||||
int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling,
|
||||
-1=unpaired field) */
|
||||
CUvideotimestamp timestamp; /**< OUT: Presentation time stamp */
|
||||
} CUVIDPARSERDISPINFO;
|
||||
|
||||
/***********************************************************************************************************************/
|
||||
//! Parser callbacks
|
||||
//! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a
|
||||
//! picture is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of
|
||||
//! structure CUVIDSOURCEPARAMS Return values from these callbacks are interpreted as below. If the callbacks return
|
||||
//! failure, it will be propagated by cuvidParseVideoData() to the application. Parser picks default operating point as
|
||||
//! 0 and outputAllLayers flag as 0 if PFNVIDOPPOINTCALLBACK is not set or return value is -1 or invalid operating
|
||||
//! point. PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by
|
||||
//! CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser) PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded
|
||||
//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded
|
||||
//! PFNVIDOPPOINTCALLBACK : <0: fail, >=0: succeeded (bit 0-9: OperatingPoint, bit 10-10: outputAllLayers, bit 11-30:
|
||||
//! reserved) PFNVIDSEIMSGCALLBACK : 0: fail, >=1: succeeded
|
||||
/***********************************************************************************************************************/
|
||||
typedef int(CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
|
||||
typedef int(CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
|
||||
typedef int(CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
|
||||
typedef int(CUDAAPI *PFNVIDOPPOINTCALLBACK)(void *, CUVIDOPERATINGPOINTINFO *);
|
||||
typedef int(CUDAAPI *PFNVIDSEIMSGCALLBACK)(void *, CUVIDSEIMESSAGEINFO *);
|
||||
|
||||
/**************************************/
|
||||
//! \ingroup STRUCTS
|
||||
//! \struct CUVIDPARSERPARAMS
|
||||
//! Used in cuvidCreateVideoParser API
|
||||
/**************************************/
|
||||
typedef struct _CUVIDPARSERPARAMS {
|
||||
cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */
|
||||
unsigned int ulMaxNumDecodeSurfaces; /**< IN: Max # of decode surfaces (parser will cycle through these) */
|
||||
unsigned int ulClockRate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */
|
||||
unsigned int ulErrorThreshold; /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always
|
||||
IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */
|
||||
unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display)
|
||||
0=no delay (recommended values: 2..4) */
|
||||
unsigned int bAnnexb : 1; /**< IN: AV1 annexB stream */
|
||||
unsigned int uReserved : 31; /**< Reserved for future use - set to zero */
|
||||
unsigned int uReserved1[4]; /**< IN: Reserved for future use - set to 0 */
|
||||
void *pUserData; /**< IN: User data for callbacks */
|
||||
PFNVIDSEQUENCECALLBACK
|
||||
pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */
|
||||
PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */
|
||||
PFNVIDDISPLAYCALLBACK
|
||||
pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */
|
||||
PFNVIDOPPOINTCALLBACK pfnGetOperatingPoint; /**< IN: Called from AV1 sequence header to get operating point of a AV1
|
||||
scalable bitstream */
|
||||
PFNVIDSEIMSGCALLBACK pfnGetSEIMsg; /**< IN: Called when all SEI messages are parsed for particular frame */
|
||||
void *pvReserved2[5]; /**< Reserved for future use - set to NULL */
|
||||
CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */
|
||||
} CUVIDPARSERPARAMS;
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
|
||||
//! Create video parser object and initialize
|
||||
/************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
|
||||
//! Parse the video data from source data packet in pPacket
|
||||
//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and
|
||||
//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding
|
||||
//! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when
|
||||
//! the decoder encounters a video format change
|
||||
//! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame
|
||||
/************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
|
||||
|
||||
/************************************************************************************************/
|
||||
//! \ingroup FUNCTS
|
||||
//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
|
||||
//! Destroy the video parser
|
||||
/************************************************************************************************/
|
||||
CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj);
|
||||
|
||||
/**********************************************************************************************/
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif // __NVCUVID_H__
|
Двоичные данные
third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/libnvcuvid.so
поставляемый
Normal file
Двоичные данные
third_party/Video_Codec_SDK/Lib/linux/stubs/x86_64/libnvcuvid.so
поставляемый
Normal file
Двоичный файл не отображается.
|
@ -0,0 +1,709 @@
|
|||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2023 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
|
||||
#include "../../../Interface/nvcuvid.h"
|
||||
#include "NvDecoder/NvDecoder.h"
|
||||
|
||||
std::map<int, int64_t> NvDecoder::sessionOverHead = {{0, 0}, {1, 0}};
|
||||
|
||||
/**
|
||||
* @brief This function is used to get codec string from codec id
|
||||
*/
|
||||
const char *NvDecoder::GetCodecString(cudaVideoCodec eCodec) { return GetVideoCodecString(eCodec); }
|
||||
|
||||
/* Called when the parser encounters sequence header for AV1 SVC content
|
||||
* return value interpretation:
|
||||
* < 0 : fail, >=0: succeeded (bit 0-9: currOperatingPoint, bit 10-10: bDispAllLayer, bit 11-30: reserved, must be
|
||||
* set 0)
|
||||
*/
|
||||
int NvDecoder::GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo) {
|
||||
if (pOPInfo->codec == cudaVideoCodec_AV1) {
|
||||
if (pOPInfo->av1.operating_points_cnt > 1) {
|
||||
// clip has SVC enabled
|
||||
if (m_nOperatingPoint >= pOPInfo->av1.operating_points_cnt)
|
||||
m_nOperatingPoint = 0;
|
||||
|
||||
printf("AV1 SVC clip: operating point count %d ", pOPInfo->av1.operating_points_cnt);
|
||||
printf("Selected operating point: %d, IDC 0x%x bOutputAllLayers %d\n", m_nOperatingPoint,
|
||||
pOPInfo->av1.operating_points_idc[m_nOperatingPoint], m_bDispAllLayers);
|
||||
return (m_nOperatingPoint | (m_bDispAllLayers << 10));
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Return value from HandleVideoSequence() are interpreted as :
|
||||
* 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while
|
||||
* creating parser)
|
||||
*/
|
||||
int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) {
|
||||
START_TIMER
|
||||
m_videoInfo.str("");
|
||||
m_videoInfo.clear();
|
||||
m_videoInfo << "Video Input Information" << std::endl
|
||||
<< "\tCodec : " << GetVideoCodecString(pVideoFormat->codec) << std::endl
|
||||
<< "\tFrame rate : " << pVideoFormat->frame_rate.numerator << "/"
|
||||
<< pVideoFormat->frame_rate.denominator << " = "
|
||||
<< 1.0 * pVideoFormat->frame_rate.numerator / pVideoFormat->frame_rate.denominator << " fps"
|
||||
<< std::endl
|
||||
<< "\tSequence : " << (pVideoFormat->progressive_sequence ? "Progressive" : "Interlaced")
|
||||
<< std::endl
|
||||
<< "\tCoded size : [" << pVideoFormat->coded_width << ", " << pVideoFormat->coded_height << "]"
|
||||
<< std::endl
|
||||
<< "\tDisplay area : [" << pVideoFormat->display_area.left << ", " << pVideoFormat->display_area.top
|
||||
<< ", " << pVideoFormat->display_area.right << ", " << pVideoFormat->display_area.bottom << "]"
|
||||
<< std::endl
|
||||
<< "\tChroma : " << GetVideoChromaFormatString(pVideoFormat->chroma_format) << std::endl
|
||||
<< "\tBit depth : " << pVideoFormat->bit_depth_luma_minus8 + 8;
|
||||
m_videoInfo << std::endl;
|
||||
|
||||
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
|
||||
|
||||
CUVIDDECODECAPS decodecaps;
|
||||
memset(&decodecaps, 0, sizeof(decodecaps));
|
||||
|
||||
decodecaps.eCodecType = pVideoFormat->codec;
|
||||
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
|
||||
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
|
||||
if (!decodecaps.bIsSupported) {
|
||||
NVDEC_THROW_ERROR("Codec not supported on this GPU", CUDA_ERROR_NOT_SUPPORTED);
|
||||
return nDecodeSurface;
|
||||
}
|
||||
|
||||
if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) || (pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
|
||||
|
||||
std::ostringstream errorString;
|
||||
errorString << std::endl
|
||||
<< "Resolution : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height
|
||||
<< std::endl
|
||||
<< "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x" << decodecaps.nMaxHeight << std::endl
|
||||
<< "Resolution not supported on this GPU";
|
||||
|
||||
const std::string cErr = errorString.str();
|
||||
NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
|
||||
return nDecodeSurface;
|
||||
}
|
||||
|
||||
if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) > decodecaps.nMaxMBCount) {
|
||||
|
||||
std::ostringstream errorString;
|
||||
errorString << std::endl
|
||||
<< "MBCount : " << (pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4)
|
||||
<< std::endl
|
||||
<< "Max Supported mbcnt : " << decodecaps.nMaxMBCount << std::endl
|
||||
<< "MBCount not supported on this GPU";
|
||||
|
||||
const std::string cErr = errorString.str();
|
||||
NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED);
|
||||
return nDecodeSurface;
|
||||
}
|
||||
|
||||
if (m_nWidth && m_nLumaHeight && m_nChromaHeight) {
|
||||
|
||||
// cuvidCreateDecoder() has been called before, and now there's possible config change
|
||||
return ReconfigureDecoder(pVideoFormat);
|
||||
}
|
||||
|
||||
// eCodec has been set in the constructor (for parser). Here it's set again for potential correction
|
||||
m_eCodec = pVideoFormat->codec;
|
||||
m_eChromaFormat = pVideoFormat->chroma_format;
|
||||
m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
||||
m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1;
|
||||
|
||||
// Set the output surface format same as chroma format
|
||||
if (m_eChromaFormat == cudaVideoChromaFormat_420 || cudaVideoChromaFormat_Monochrome)
|
||||
m_eOutputFormat =
|
||||
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12;
|
||||
else if (m_eChromaFormat == cudaVideoChromaFormat_444)
|
||||
m_eOutputFormat =
|
||||
pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444;
|
||||
else if (m_eChromaFormat == cudaVideoChromaFormat_422)
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_NV12; // no 4:2:2 output format supported yet so make 420 default
|
||||
|
||||
// Check if output format supported. If not, check falback options
|
||||
if (!(decodecaps.nOutputFormatMask & (1 << m_eOutputFormat))) {
|
||||
if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
|
||||
else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_P016;
|
||||
else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444;
|
||||
else if (decodecaps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444_16Bit))
|
||||
m_eOutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit;
|
||||
else
|
||||
NVDEC_THROW_ERROR("No supported output format found", CUDA_ERROR_NOT_SUPPORTED);
|
||||
}
|
||||
m_videoFormat = *pVideoFormat;
|
||||
|
||||
CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
|
||||
videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
|
||||
videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
|
||||
videoDecodeCreateInfo.OutputFormat = m_eOutputFormat;
|
||||
videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
||||
if (pVideoFormat->progressive_sequence)
|
||||
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
|
||||
else
|
||||
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive;
|
||||
videoDecodeCreateInfo.ulNumOutputSurfaces = 2;
|
||||
// With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware
|
||||
videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
|
||||
videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
|
||||
videoDecodeCreateInfo.vidLock = m_ctxLock;
|
||||
videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
|
||||
videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
|
||||
// AV1 has max width/height of sequence in sequence header
|
||||
if (pVideoFormat->codec == cudaVideoCodec_AV1 && pVideoFormat->seqhdr_data_length > 0) {
|
||||
// dont overwrite if it is already set from cmdline or reconfig.txt
|
||||
if (!(m_nMaxWidth > pVideoFormat->coded_width || m_nMaxHeight > pVideoFormat->coded_height)) {
|
||||
CUVIDEOFORMATEX *vidFormatEx = (CUVIDEOFORMATEX *)pVideoFormat;
|
||||
m_nMaxWidth = vidFormatEx->av1.max_width;
|
||||
m_nMaxHeight = vidFormatEx->av1.max_height;
|
||||
}
|
||||
}
|
||||
if (m_nMaxWidth < (int)pVideoFormat->coded_width)
|
||||
m_nMaxWidth = pVideoFormat->coded_width;
|
||||
if (m_nMaxHeight < (int)pVideoFormat->coded_height)
|
||||
m_nMaxHeight = pVideoFormat->coded_height;
|
||||
videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth;
|
||||
videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight;
|
||||
|
||||
if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
|
||||
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
|
||||
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
|
||||
videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
|
||||
videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
|
||||
} else {
|
||||
if (m_resizeDim.w && m_resizeDim.h) {
|
||||
videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left;
|
||||
videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top;
|
||||
videoDecodeCreateInfo.display_area.right = pVideoFormat->display_area.right;
|
||||
videoDecodeCreateInfo.display_area.bottom = pVideoFormat->display_area.bottom;
|
||||
m_nWidth = m_resizeDim.w;
|
||||
m_nLumaHeight = m_resizeDim.h;
|
||||
}
|
||||
|
||||
if (m_cropRect.r && m_cropRect.b) {
|
||||
videoDecodeCreateInfo.display_area.left = m_cropRect.l;
|
||||
videoDecodeCreateInfo.display_area.top = m_cropRect.t;
|
||||
videoDecodeCreateInfo.display_area.right = m_cropRect.r;
|
||||
videoDecodeCreateInfo.display_area.bottom = m_cropRect.b;
|
||||
m_nWidth = m_cropRect.r - m_cropRect.l;
|
||||
m_nLumaHeight = m_cropRect.b - m_cropRect.t;
|
||||
}
|
||||
videoDecodeCreateInfo.ulTargetWidth = m_nWidth;
|
||||
videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight;
|
||||
}
|
||||
|
||||
m_nChromaHeight = (int)(ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat)));
|
||||
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
|
||||
m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
|
||||
m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
|
||||
m_displayRect.b = videoDecodeCreateInfo.display_area.bottom;
|
||||
m_displayRect.t = videoDecodeCreateInfo.display_area.top;
|
||||
m_displayRect.l = videoDecodeCreateInfo.display_area.left;
|
||||
m_displayRect.r = videoDecodeCreateInfo.display_area.right;
|
||||
|
||||
m_videoInfo << "Video Decoding Params:" << std::endl
|
||||
<< "\tNum Surfaces : " << videoDecodeCreateInfo.ulNumDecodeSurfaces << std::endl
|
||||
<< "\tCrop : [" << videoDecodeCreateInfo.display_area.left << ", "
|
||||
<< videoDecodeCreateInfo.display_area.top << ", " << videoDecodeCreateInfo.display_area.right << ", "
|
||||
<< videoDecodeCreateInfo.display_area.bottom << "]" << std::endl
|
||||
<< "\tResize : " << videoDecodeCreateInfo.ulTargetWidth << "x"
|
||||
<< videoDecodeCreateInfo.ulTargetHeight << std::endl
|
||||
<< "\tDeinterlace : "
|
||||
<< std::vector<const char *>{"Weave", "Bob", "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode];
|
||||
m_videoInfo << std::endl;
|
||||
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
STOP_TIMER("Session Initialization Time: ");
|
||||
NvDecoder::addDecoderSessionOverHead(getDecoderSessionID(), elapsedTime);
|
||||
return nDecodeSurface;
|
||||
}
|
||||
|
||||
int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) {
|
||||
if (pVideoFormat->bit_depth_luma_minus8 != m_videoFormat.bit_depth_luma_minus8 ||
|
||||
pVideoFormat->bit_depth_chroma_minus8 != m_videoFormat.bit_depth_chroma_minus8) {
|
||||
|
||||
NVDEC_THROW_ERROR("Reconfigure Not supported for bit depth change", CUDA_ERROR_NOT_SUPPORTED);
|
||||
}
|
||||
|
||||
if (pVideoFormat->chroma_format != m_videoFormat.chroma_format) {
|
||||
|
||||
NVDEC_THROW_ERROR("Reconfigure Not supported for chroma format change", CUDA_ERROR_NOT_SUPPORTED);
|
||||
}
|
||||
|
||||
bool bDecodeResChange = !(pVideoFormat->coded_width == m_videoFormat.coded_width &&
|
||||
pVideoFormat->coded_height == m_videoFormat.coded_height);
|
||||
bool bDisplayRectChange = !(pVideoFormat->display_area.bottom == m_videoFormat.display_area.bottom &&
|
||||
pVideoFormat->display_area.top == m_videoFormat.display_area.top &&
|
||||
pVideoFormat->display_area.left == m_videoFormat.display_area.left &&
|
||||
pVideoFormat->display_area.right == m_videoFormat.display_area.right);
|
||||
|
||||
int nDecodeSurface = pVideoFormat->min_num_decode_surfaces;
|
||||
|
||||
if ((pVideoFormat->coded_width > m_nMaxWidth) || (pVideoFormat->coded_height > m_nMaxHeight)) {
|
||||
// For VP9, let driver handle the change if new width/height > maxwidth/maxheight
|
||||
if ((m_eCodec != cudaVideoCodec_VP9) || m_bReconfigExternal) {
|
||||
NVDEC_THROW_ERROR("Reconfigure Not supported when width/height > maxwidth/maxheight",
|
||||
CUDA_ERROR_NOT_SUPPORTED);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!bDecodeResChange && !m_bReconfigExtPPChange) {
|
||||
// if the coded_width/coded_height hasn't changed but display resolution has changed, then need to update
|
||||
// width/height for correct output without cropping. Example : 1920x1080 vs 1920x1088
|
||||
if (bDisplayRectChange) {
|
||||
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
|
||||
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
|
||||
m_nChromaHeight = (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
|
||||
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
|
||||
}
|
||||
|
||||
// no need for reconfigureDecoder(). Just return
|
||||
return 1;
|
||||
}
|
||||
|
||||
CUVIDRECONFIGUREDECODERINFO reconfigParams = {0};
|
||||
|
||||
reconfigParams.ulWidth = m_videoFormat.coded_width = pVideoFormat->coded_width;
|
||||
reconfigParams.ulHeight = m_videoFormat.coded_height = pVideoFormat->coded_height;
|
||||
|
||||
// Dont change display rect and get scaled output from decoder. This will help display app to present apps smoothly
|
||||
reconfigParams.display_area.bottom = m_displayRect.b;
|
||||
reconfigParams.display_area.top = m_displayRect.t;
|
||||
reconfigParams.display_area.left = m_displayRect.l;
|
||||
reconfigParams.display_area.right = m_displayRect.r;
|
||||
reconfigParams.ulTargetWidth = m_nSurfaceWidth;
|
||||
reconfigParams.ulTargetHeight = m_nSurfaceHeight;
|
||||
|
||||
// If external reconfigure is called along with resolution change even if post processing params is not changed,
|
||||
// do full reconfigure params update
|
||||
if ((m_bReconfigExternal && bDecodeResChange) || m_bReconfigExtPPChange) {
|
||||
// update display rect and target resolution if requested explicitly
|
||||
m_bReconfigExternal = false;
|
||||
m_bReconfigExtPPChange = false;
|
||||
m_videoFormat = *pVideoFormat;
|
||||
if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) {
|
||||
m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left;
|
||||
m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
|
||||
reconfigParams.ulTargetWidth = pVideoFormat->coded_width;
|
||||
reconfigParams.ulTargetHeight = pVideoFormat->coded_height;
|
||||
} else {
|
||||
if (m_resizeDim.w && m_resizeDim.h) {
|
||||
reconfigParams.display_area.left = pVideoFormat->display_area.left;
|
||||
reconfigParams.display_area.top = pVideoFormat->display_area.top;
|
||||
reconfigParams.display_area.right = pVideoFormat->display_area.right;
|
||||
reconfigParams.display_area.bottom = pVideoFormat->display_area.bottom;
|
||||
m_nWidth = m_resizeDim.w;
|
||||
m_nLumaHeight = m_resizeDim.h;
|
||||
}
|
||||
|
||||
if (m_cropRect.r && m_cropRect.b) {
|
||||
reconfigParams.display_area.left = m_cropRect.l;
|
||||
reconfigParams.display_area.top = m_cropRect.t;
|
||||
reconfigParams.display_area.right = m_cropRect.r;
|
||||
reconfigParams.display_area.bottom = m_cropRect.b;
|
||||
m_nWidth = m_cropRect.r - m_cropRect.l;
|
||||
m_nLumaHeight = m_cropRect.b - m_cropRect.t;
|
||||
}
|
||||
reconfigParams.ulTargetWidth = m_nWidth;
|
||||
reconfigParams.ulTargetHeight = m_nLumaHeight;
|
||||
}
|
||||
|
||||
m_nChromaHeight = (int)ceil(m_nLumaHeight * GetChromaHeightFactor(m_eOutputFormat));
|
||||
m_nNumChromaPlanes = GetChromaPlaneCount(m_eOutputFormat);
|
||||
m_nSurfaceHeight = reconfigParams.ulTargetHeight;
|
||||
m_nSurfaceWidth = reconfigParams.ulTargetWidth;
|
||||
m_displayRect.b = reconfigParams.display_area.bottom;
|
||||
m_displayRect.t = reconfigParams.display_area.top;
|
||||
m_displayRect.l = reconfigParams.display_area.left;
|
||||
m_displayRect.r = reconfigParams.display_area.right;
|
||||
}
|
||||
|
||||
reconfigParams.ulNumDecodeSurfaces = nDecodeSurface;
|
||||
|
||||
START_TIMER
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
STOP_TIMER("Session Reconfigure Time: ");
|
||||
|
||||
return nDecodeSurface;
|
||||
}
|
||||
|
||||
int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) {
|
||||
m_bReconfigExternal = true;
|
||||
m_bReconfigExtPPChange = false;
|
||||
if (pCropRect) {
|
||||
if (!((pCropRect->t == m_cropRect.t) && (pCropRect->l == m_cropRect.l) && (pCropRect->b == m_cropRect.b) &&
|
||||
(pCropRect->r == m_cropRect.r))) {
|
||||
m_bReconfigExtPPChange = true;
|
||||
m_cropRect = *pCropRect;
|
||||
}
|
||||
}
|
||||
if (pResizeDim) {
|
||||
if (!((pResizeDim->w == m_resizeDim.w) && (pResizeDim->h == m_resizeDim.h))) {
|
||||
m_bReconfigExtPPChange = true;
|
||||
m_resizeDim = *pResizeDim;
|
||||
}
|
||||
}
|
||||
|
||||
// Clear existing output buffers of different size
|
||||
uint8_t *pFrame = NULL;
|
||||
while (!m_vpFrame.empty()) {
|
||||
pFrame = m_vpFrame.back();
|
||||
m_vpFrame.pop_back();
|
||||
if (m_bUseDeviceFrame) {
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
} else {
|
||||
delete pFrame;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Return value from HandlePictureDecode() are interpreted as:
|
||||
* 0: fail, >=1: succeeded
|
||||
*/
|
||||
int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) {
|
||||
if (!m_hDecoder) {
|
||||
NVDEC_THROW_ERROR("Decoder not initialized.", CUDA_ERROR_NOT_INITIALIZED);
|
||||
return false;
|
||||
}
|
||||
m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++;
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams));
|
||||
if (m_bForce_zero_latency && ((!pPicParams->field_pic_flag) || (pPicParams->second_field))) {
|
||||
CUVIDPARSERDISPINFO dispInfo;
|
||||
memset(&dispInfo, 0, sizeof(dispInfo));
|
||||
dispInfo.picture_index = pPicParams->CurrPicIdx;
|
||||
dispInfo.progressive_frame = !pPicParams->field_pic_flag;
|
||||
dispInfo.top_field_first = pPicParams->bottom_field_flag ^ 1;
|
||||
HandlePictureDisplay(&dispInfo);
|
||||
}
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Return value from HandlePictureDisplay() are interpreted as:
|
||||
* 0: fail, >=1: succeeded
|
||||
*/
|
||||
int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
CUVIDPROCPARAMS videoProcessingParameters = {};
|
||||
videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
|
||||
videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
|
||||
videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
|
||||
videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
|
||||
videoProcessingParameters.output_stream = m_cuvidStream;
|
||||
|
||||
if (m_bExtractSEIMessage) {
|
||||
if (m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData) {
|
||||
// Write SEI Message
|
||||
uint8_t *seiBuffer = (uint8_t *)(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData);
|
||||
uint32_t seiNumMessages = m_SEIMessagesDisplayOrder[pDispInfo->picture_index].sei_message_count;
|
||||
CUSEIMESSAGE *seiMessagesInfo = m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage;
|
||||
if (m_fpSEI) {
|
||||
for (uint32_t i = 0; i < seiNumMessages; i++) {
|
||||
if (m_eCodec == cudaVideoCodec_H264 || cudaVideoCodec_H264_SVC || cudaVideoCodec_H264_MVC ||
|
||||
cudaVideoCodec_HEVC) {
|
||||
switch (seiMessagesInfo[i].sei_message_type) {
|
||||
case SEI_TYPE_TIME_CODE: {
|
||||
HEVCSEITIMECODE *timecode = (HEVCSEITIMECODE *)seiBuffer;
|
||||
fwrite(timecode, sizeof(HEVCSEITIMECODE), 1, m_fpSEI);
|
||||
} break;
|
||||
case SEI_TYPE_USER_DATA_UNREGISTERED: {
|
||||
fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1, m_fpSEI);
|
||||
} break;
|
||||
}
|
||||
}
|
||||
if (m_eCodec == cudaVideoCodec_AV1) {
|
||||
fwrite(seiBuffer, seiMessagesInfo[i].sei_message_size, 1, m_fpSEI);
|
||||
}
|
||||
seiBuffer += seiMessagesInfo[i].sei_message_size;
|
||||
}
|
||||
}
|
||||
free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIData);
|
||||
free(m_SEIMessagesDisplayOrder[pDispInfo->picture_index].pSEIMessage);
|
||||
}
|
||||
}
|
||||
|
||||
CUdeviceptr dpSrcFrame = 0;
|
||||
unsigned int nSrcPitch = 0;
|
||||
CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));
|
||||
NVDEC_API_CALL(
|
||||
cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index, &dpSrcFrame, &nSrcPitch, &videoProcessingParameters));
|
||||
|
||||
CUVIDGETDECODESTATUS DecodeStatus;
|
||||
memset(&DecodeStatus, 0, sizeof(DecodeStatus));
|
||||
CUresult result = cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
|
||||
if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error ||
|
||||
DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
|
||||
printf("Decode Error occurred for picture %d\n", m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
|
||||
}
|
||||
|
||||
uint8_t *pDecodedFrame = nullptr;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
|
||||
if ((unsigned)++m_nDecodedFrame > m_vpFrame.size()) {
|
||||
// Not enough frames in stock
|
||||
m_nFrameAlloc++;
|
||||
uint8_t *pFrame = NULL;
|
||||
if (m_bUseDeviceFrame) {
|
||||
if (m_bDeviceFramePitched) {
|
||||
CUDA_DRVAPI_CALL(cuMemAllocPitch((CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, GetWidth() * m_nBPP,
|
||||
m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16));
|
||||
} else {
|
||||
CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
|
||||
}
|
||||
} else {
|
||||
pFrame = new uint8_t[GetFrameSize()];
|
||||
}
|
||||
m_vpFrame.push_back(pFrame);
|
||||
}
|
||||
pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1];
|
||||
}
|
||||
|
||||
// Copy luma plane
|
||||
CUDA_MEMCPY2D m = {0};
|
||||
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
||||
m.srcDevice = dpSrcFrame;
|
||||
m.srcPitch = nSrcPitch;
|
||||
m.dstMemoryType = m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
|
||||
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
|
||||
m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : GetWidth() * m_nBPP;
|
||||
m.WidthInBytes = GetWidth() * m_nBPP;
|
||||
m.Height = m_nLumaHeight;
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
|
||||
|
||||
// Copy chroma plane
|
||||
// NVDEC output has luma height aligned by 2. Adjust chroma offset by aligning height
|
||||
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1));
|
||||
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight);
|
||||
m.Height = m_nChromaHeight;
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
|
||||
|
||||
if (m_nNumChromaPlanes == 2) {
|
||||
m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * ((m_nSurfaceHeight + 1) & ~1) * 2);
|
||||
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight * 2);
|
||||
m.Height = m_nChromaHeight;
|
||||
CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
|
||||
}
|
||||
CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
|
||||
CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
|
||||
|
||||
if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
|
||||
m_vTimestamp.resize(m_vpFrame.size());
|
||||
}
|
||||
m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
|
||||
|
||||
NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
|
||||
return 1;
|
||||
}
|
||||
|
||||
int NvDecoder::GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
|
||||
uint32_t seiNumMessages = pSEIMessageInfo->sei_message_count;
|
||||
CUSEIMESSAGE *seiMessagesInfo = pSEIMessageInfo->pSEIMessage;
|
||||
size_t totalSEIBufferSize = 0;
|
||||
if ((pSEIMessageInfo->picIdx < 0) || (pSEIMessageInfo->picIdx >= MAX_FRM_CNT)) {
|
||||
printf("Invalid picture index (%d)\n", pSEIMessageInfo->picIdx);
|
||||
return 0;
|
||||
}
|
||||
for (uint32_t i = 0; i < seiNumMessages; i++) {
|
||||
totalSEIBufferSize += seiMessagesInfo[i].sei_message_size;
|
||||
}
|
||||
if (!m_pCurrSEIMessage) {
|
||||
printf("Out of Memory, Allocation failed for m_pCurrSEIMessage\n");
|
||||
return 0;
|
||||
}
|
||||
m_pCurrSEIMessage->pSEIData = malloc(totalSEIBufferSize);
|
||||
if (!m_pCurrSEIMessage->pSEIData) {
|
||||
printf("Out of Memory, Allocation failed for SEI Buffer\n");
|
||||
return 0;
|
||||
}
|
||||
memcpy(m_pCurrSEIMessage->pSEIData, pSEIMessageInfo->pSEIData, totalSEIBufferSize);
|
||||
m_pCurrSEIMessage->pSEIMessage = (CUSEIMESSAGE *)malloc(sizeof(CUSEIMESSAGE) * seiNumMessages);
|
||||
if (!m_pCurrSEIMessage->pSEIMessage) {
|
||||
free(m_pCurrSEIMessage->pSEIData);
|
||||
m_pCurrSEIMessage->pSEIData = NULL;
|
||||
return 0;
|
||||
}
|
||||
memcpy(m_pCurrSEIMessage->pSEIMessage, pSEIMessageInfo->pSEIMessage, sizeof(CUSEIMESSAGE) * seiNumMessages);
|
||||
m_pCurrSEIMessage->sei_message_count = pSEIMessageInfo->sei_message_count;
|
||||
m_SEIMessagesDisplayOrder[pSEIMessageInfo->picIdx] = *m_pCurrSEIMessage;
|
||||
return 1;
|
||||
}
|
||||
|
||||
NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, bool bLowLatency,
|
||||
bool bDeviceFramePitched, const Rect *pCropRect, const Dim *pResizeDim,
|
||||
bool extract_user_SEI_Message, int maxWidth, int maxHeight, unsigned int clkRate,
|
||||
bool force_zero_latency)
|
||||
: m_cuContext(cuContext), m_bUseDeviceFrame(bUseDeviceFrame), m_eCodec(eCodec),
|
||||
m_bDeviceFramePitched(bDeviceFramePitched), m_bExtractSEIMessage(extract_user_SEI_Message), m_nMaxWidth(maxWidth),
|
||||
m_nMaxHeight(maxHeight), m_bForce_zero_latency(force_zero_latency) {
|
||||
if (pCropRect)
|
||||
m_cropRect = *pCropRect;
|
||||
if (pResizeDim)
|
||||
m_resizeDim = *pResizeDim;
|
||||
|
||||
NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));
|
||||
|
||||
ck(cuStreamCreate(&m_cuvidStream, CU_STREAM_DEFAULT));
|
||||
|
||||
decoderSessionID = 0;
|
||||
|
||||
if (m_bExtractSEIMessage) {
|
||||
m_fpSEI = fopen("sei_message.txt", "wb");
|
||||
m_pCurrSEIMessage = new CUVIDSEIMESSAGEINFO;
|
||||
memset(&m_SEIMessagesDisplayOrder, 0, sizeof(m_SEIMessagesDisplayOrder));
|
||||
}
|
||||
CUVIDPARSERPARAMS videoParserParameters = {};
|
||||
videoParserParameters.CodecType = eCodec;
|
||||
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
|
||||
videoParserParameters.ulClockRate = clkRate;
|
||||
videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
|
||||
videoParserParameters.pUserData = this;
|
||||
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
|
||||
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
|
||||
videoParserParameters.pfnDisplayPicture = m_bForce_zero_latency ? NULL : HandlePictureDisplayProc;
|
||||
videoParserParameters.pfnGetOperatingPoint = HandleOperatingPointProc;
|
||||
videoParserParameters.pfnGetSEIMsg = m_bExtractSEIMessage ? HandleSEIMessagesProc : NULL;
|
||||
NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
|
||||
}
|
||||
|
||||
NvDecoder::~NvDecoder() {
|
||||
|
||||
START_TIMER
|
||||
|
||||
if (m_pCurrSEIMessage) {
|
||||
delete m_pCurrSEIMessage;
|
||||
m_pCurrSEIMessage = NULL;
|
||||
}
|
||||
|
||||
if (m_fpSEI) {
|
||||
fclose(m_fpSEI);
|
||||
m_fpSEI = NULL;
|
||||
}
|
||||
|
||||
if (m_hParser) {
|
||||
cuvidDestroyVideoParser(m_hParser);
|
||||
}
|
||||
cuCtxPushCurrent(m_cuContext);
|
||||
if (m_hDecoder) {
|
||||
cuvidDestroyDecoder(m_hDecoder);
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
|
||||
|
||||
for (uint8_t *pFrame : m_vpFrame) {
|
||||
if (m_bUseDeviceFrame) {
|
||||
cuMemFree((CUdeviceptr)pFrame);
|
||||
} else {
|
||||
delete[] pFrame;
|
||||
}
|
||||
}
|
||||
cuCtxPopCurrent(NULL);
|
||||
|
||||
cuvidCtxLockDestroy(m_ctxLock);
|
||||
|
||||
STOP_TIMER("Session Deinitialization Time: ");
|
||||
|
||||
NvDecoder::addDecoderSessionOverHead(getDecoderSessionID(), elapsedTime);
|
||||
}
|
||||
|
||||
int NvDecoder::Decode(const uint8_t *pData, int nSize, int nFlags, int64_t nTimestamp) {
|
||||
m_nDecodedFrame = 0;
|
||||
m_nDecodedFrameReturned = 0;
|
||||
CUVIDSOURCEDATAPACKET packet = {0};
|
||||
packet.payload = pData;
|
||||
packet.payload_size = nSize;
|
||||
packet.flags = nFlags | CUVID_PKT_TIMESTAMP;
|
||||
packet.timestamp = nTimestamp;
|
||||
if (!pData || nSize == 0) {
|
||||
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
||||
}
|
||||
NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
|
||||
|
||||
return m_nDecodedFrame;
|
||||
}
|
||||
|
||||
uint8_t *NvDecoder::GetFrame(int64_t *pTimestamp) {
|
||||
if (m_nDecodedFrame > 0) {
|
||||
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
|
||||
m_nDecodedFrame--;
|
||||
if (pTimestamp)
|
||||
*pTimestamp = m_vTimestamp[m_nDecodedFrameReturned];
|
||||
return m_vpFrame[m_nDecodedFrameReturned++];
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint8_t *NvDecoder::GetLockedFrame(int64_t *pTimestamp) {
|
||||
uint8_t *pFrame;
|
||||
uint64_t timestamp;
|
||||
if (m_nDecodedFrame > 0) {
|
||||
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
|
||||
m_nDecodedFrame--;
|
||||
pFrame = m_vpFrame[0];
|
||||
m_vpFrame.erase(m_vpFrame.begin(), m_vpFrame.begin() + 1);
|
||||
|
||||
timestamp = m_vTimestamp[0];
|
||||
m_vTimestamp.erase(m_vTimestamp.begin(), m_vTimestamp.begin() + 1);
|
||||
|
||||
if (pTimestamp)
|
||||
*pTimestamp = timestamp;
|
||||
|
||||
return pFrame;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void NvDecoder::UnlockFrame(uint8_t **pFrame) {
|
||||
std::lock_guard<std::mutex> lock(m_mtxVPFrame);
|
||||
m_vpFrame.insert(m_vpFrame.end(), &pFrame[0], &pFrame[1]);
|
||||
|
||||
// add a dummy entry for timestamp
|
||||
uint64_t timestamp[2] = {0};
|
||||
m_vTimestamp.insert(m_vTimestamp.end(), ×tamp[0], ×tamp[1]);
|
||||
}
|
|
@ -0,0 +1,528 @@
|
|||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2023 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../../../Interface/nvcuvid.h"
|
||||
#include "../Utils/NvCodecUtils.h"
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define MAX_FRM_CNT 32
|
||||
|
||||
typedef enum { SEI_TYPE_TIME_CODE = 136, SEI_TYPE_USER_DATA_UNREGISTERED = 5 } SEI_H264_HEVC_PAYLOAD_TYPE;
|
||||
|
||||
/**
|
||||
* @brief Exception class for error reporting from the decode API.
|
||||
*/
|
||||
class NVDECException : public std::exception {
|
||||
public:
|
||||
NVDECException(const std::string &errorStr, const CUresult errorCode)
|
||||
: m_errorString(errorStr), m_errorCode(errorCode) {}
|
||||
|
||||
virtual ~NVDECException() throw() {}
|
||||
virtual const char *what() const throw() { return m_errorString.c_str(); }
|
||||
CUresult getErrorCode() const { return m_errorCode; }
|
||||
const std::string &getErrorString() const { return m_errorString; }
|
||||
static NVDECException makeNVDECException(const std::string &errorStr, const CUresult errorCode,
|
||||
const std::string &functionName, const std::string &fileName, int lineNo);
|
||||
|
||||
private:
|
||||
std::string m_errorString;
|
||||
CUresult m_errorCode;
|
||||
};
|
||||
|
||||
inline NVDECException NVDECException::makeNVDECException(const std::string &errorStr, const CUresult errorCode,
|
||||
const std::string &functionName, const std::string &fileName,
|
||||
int lineNo) {
|
||||
std::ostringstream errorLog;
|
||||
errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl;
|
||||
NVDECException exception(errorLog.str(), errorCode);
|
||||
return exception;
|
||||
}
|
||||
|
||||
#define NVDEC_THROW_ERROR(errorStr, errorCode) \
|
||||
do { \
|
||||
throw NVDECException::makeNVDECException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \
|
||||
} while (0)
|
||||
|
||||
#define NVDEC_API_CALL(cuvidAPI) \
|
||||
do { \
|
||||
CUresult errorCode = cuvidAPI; \
|
||||
if (errorCode != CUDA_SUCCESS) { \
|
||||
std::ostringstream errorLog; \
|
||||
errorLog << #cuvidAPI << " returned error " << errorCode; \
|
||||
throw NVDECException::makeNVDECException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
struct Rect {
|
||||
int l, t, r, b;
|
||||
};
|
||||
|
||||
struct Dim {
|
||||
int w, h;
|
||||
};
|
||||
|
||||
#define START_TIMER auto start = std::chrono::high_resolution_clock::now();
|
||||
|
||||
#define STOP_TIMER(print_message) \
|
||||
int64_t elapsedTime = \
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::high_resolution_clock::now() - start) \
|
||||
.count(); \
|
||||
std::cout << print_message << elapsedTime << " ms " << std::endl;
|
||||
|
||||
#define CUDA_DRVAPI_CALL(call) \
|
||||
do { \
|
||||
CUresult err__ = call; \
|
||||
if (err__ != CUDA_SUCCESS) { \
|
||||
const char *szErrName = NULL; \
|
||||
cuGetErrorName(err__, &szErrName); \
|
||||
std::ostringstream errorLog; \
|
||||
errorLog << "CUDA driver API error " << szErrName; \
|
||||
throw NVDECException::makeNVDECException(errorLog.str(), err__, __FUNCTION__, __FILE__, __LINE__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const char *GetVideoCodecString(cudaVideoCodec eCodec) {
|
||||
static struct {
|
||||
cudaVideoCodec eCodec;
|
||||
const char *name;
|
||||
} aCodecName[] = {
|
||||
{cudaVideoCodec_MPEG1, "MPEG-1"},
|
||||
{cudaVideoCodec_MPEG2, "MPEG-2"},
|
||||
{cudaVideoCodec_MPEG4, "MPEG-4 (ASP)"},
|
||||
{cudaVideoCodec_VC1, "VC-1/WMV"},
|
||||
{cudaVideoCodec_H264, "AVC/H.264"},
|
||||
{cudaVideoCodec_JPEG, "M-JPEG"},
|
||||
{cudaVideoCodec_H264_SVC, "H.264/SVC"},
|
||||
{cudaVideoCodec_H264_MVC, "H.264/MVC"},
|
||||
{cudaVideoCodec_HEVC, "H.265/HEVC"},
|
||||
{cudaVideoCodec_VP8, "VP8"},
|
||||
{cudaVideoCodec_VP9, "VP9"},
|
||||
{cudaVideoCodec_AV1, "AV1"},
|
||||
{cudaVideoCodec_NumCodecs, "Invalid"},
|
||||
{cudaVideoCodec_YUV420, "YUV 4:2:0"},
|
||||
{cudaVideoCodec_YV12, "YV12 4:2:0"},
|
||||
{cudaVideoCodec_NV12, "NV12 4:2:0"},
|
||||
{cudaVideoCodec_YUYV, "YUYV 4:2:2"},
|
||||
{cudaVideoCodec_UYVY, "UYVY 4:2:2"},
|
||||
};
|
||||
|
||||
if (eCodec >= 0 && eCodec <= cudaVideoCodec_NumCodecs) {
|
||||
return aCodecName[eCodec].name;
|
||||
}
|
||||
for (int i = cudaVideoCodec_NumCodecs + 1; i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) {
|
||||
if (eCodec == aCodecName[i].eCodec) {
|
||||
return aCodecName[eCodec].name;
|
||||
}
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
static const char *GetVideoChromaFormatString(cudaVideoChromaFormat eChromaFormat) {
|
||||
static struct {
|
||||
cudaVideoChromaFormat eChromaFormat;
|
||||
const char *name;
|
||||
} aChromaFormatName[] = {
|
||||
{cudaVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)"},
|
||||
{cudaVideoChromaFormat_420, "YUV 420"},
|
||||
{cudaVideoChromaFormat_422, "YUV 422"},
|
||||
{cudaVideoChromaFormat_444, "YUV 444"},
|
||||
};
|
||||
|
||||
if (eChromaFormat >= 0 && eChromaFormat < sizeof(aChromaFormatName) / sizeof(aChromaFormatName[0])) {
|
||||
return aChromaFormatName[eChromaFormat].name;
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
static float GetChromaHeightFactor(cudaVideoSurfaceFormat eSurfaceFormat) {
|
||||
float factor = 0.5;
|
||||
switch (eSurfaceFormat) {
|
||||
case cudaVideoSurfaceFormat_NV12:
|
||||
case cudaVideoSurfaceFormat_P016:
|
||||
factor = 0.5;
|
||||
break;
|
||||
case cudaVideoSurfaceFormat_YUV444:
|
||||
case cudaVideoSurfaceFormat_YUV444_16Bit:
|
||||
factor = 1.0;
|
||||
break;
|
||||
}
|
||||
|
||||
return factor;
|
||||
}
|
||||
|
||||
static int GetChromaPlaneCount(cudaVideoSurfaceFormat eSurfaceFormat) {
|
||||
int numPlane = 1;
|
||||
switch (eSurfaceFormat) {
|
||||
case cudaVideoSurfaceFormat_NV12:
|
||||
case cudaVideoSurfaceFormat_P016:
|
||||
numPlane = 1;
|
||||
break;
|
||||
case cudaVideoSurfaceFormat_YUV444:
|
||||
case cudaVideoSurfaceFormat_YUV444_16Bit:
|
||||
numPlane = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
return numPlane;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Base class for decoder interface.
|
||||
*/
|
||||
class NvDecoder {
|
||||
|
||||
public:
|
||||
NvDecoder() {}
|
||||
/**
|
||||
* @brief This function is used to initialize the decoder session.
|
||||
* Application must call this function to initialize the decoder, before
|
||||
* starting to decode any frames.
|
||||
*/
|
||||
NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, bool bLowLatency = false,
|
||||
bool bDeviceFramePitched = false, const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL,
|
||||
bool extract_user_SEI_Message = false, int maxWidth = 0, int maxHeight = 0, unsigned int clkRate = 1000,
|
||||
bool force_zero_latency = false);
|
||||
~NvDecoder();
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the current CUDA context.
|
||||
*/
|
||||
CUcontext GetContext() { return m_cuContext; }
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the output frame width.
|
||||
* NV12/P016 output format width is 2 byte aligned because of U and V interleave
|
||||
*/
|
||||
int GetWidth() {
|
||||
assert(m_nWidth);
|
||||
return (m_eOutputFormat == cudaVideoSurfaceFormat_NV12 || m_eOutputFormat == cudaVideoSurfaceFormat_P016)
|
||||
? (m_nWidth + 1) & ~1
|
||||
: m_nWidth;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the actual decode width
|
||||
*/
|
||||
int GetDecodeWidth() {
|
||||
assert(m_nWidth);
|
||||
return m_nWidth;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the output frame height (Luma height).
|
||||
*/
|
||||
int GetHeight() {
|
||||
assert(m_nLumaHeight);
|
||||
return m_nLumaHeight;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the current chroma height.
|
||||
*/
|
||||
int GetChromaHeight() {
|
||||
assert(m_nChromaHeight);
|
||||
return m_nChromaHeight;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the number of chroma planes.
|
||||
*/
|
||||
int GetNumChromaPlanes() {
|
||||
assert(m_nNumChromaPlanes);
|
||||
return m_nNumChromaPlanes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the current frame size based on pixel format.
|
||||
*/
|
||||
int GetFrameSize() {
|
||||
assert(m_nWidth);
|
||||
return GetWidth() * (m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes)) * m_nBPP;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the current frame Luma plane size.
|
||||
*/
|
||||
int GetLumaPlaneSize() {
|
||||
assert(m_nWidth);
|
||||
return GetWidth() * m_nLumaHeight * m_nBPP;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the current frame chroma plane size.
|
||||
*/
|
||||
int GetChromaPlaneSize() {
|
||||
assert(m_nWidth);
|
||||
return GetWidth() * (m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the pitch of the device buffer holding the decoded frame.
|
||||
*/
|
||||
int GetDeviceFramePitch() {
|
||||
assert(m_nWidth);
|
||||
return m_nDeviceFramePitch ? (int)m_nDeviceFramePitch : GetWidth() * m_nBPP;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the bit depth associated with the pixel format.
|
||||
*/
|
||||
int GetBitDepth() {
|
||||
assert(m_nWidth);
|
||||
return m_nBitDepthMinus8 + 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the bytes used per pixel.
|
||||
*/
|
||||
int GetBPP() {
|
||||
assert(m_nWidth);
|
||||
return m_nBPP;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get the YUV chroma format
|
||||
*/
|
||||
cudaVideoSurfaceFormat GetOutputFormat() { return m_eOutputFormat; }
|
||||
|
||||
/**
|
||||
* @brief This function is used to get information about the video stream (codec, display parameters etc)
|
||||
*/
|
||||
CUVIDEOFORMAT GetVideoFormatInfo() {
|
||||
assert(m_nWidth);
|
||||
return m_videoFormat;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function is used to get codec string from codec id
|
||||
*/
|
||||
const char *GetCodecString(cudaVideoCodec eCodec);
|
||||
|
||||
/**
|
||||
* @brief This function is used to print information about the video stream
|
||||
*/
|
||||
std::string GetVideoInfo() const { return m_videoInfo.str(); }
|
||||
|
||||
/**
|
||||
* @brief This function decodes a frame and returns the number of frames that are available for
|
||||
* display. All frames that are available for display should be read before making a subsequent decode call.
|
||||
* @param pData - pointer to the data buffer that is to be decoded
|
||||
* @param nSize - size of the data buffer in bytes
|
||||
* @param nFlags - CUvideopacketflags for setting decode options
|
||||
* @param nTimestamp - presentation timestamp
|
||||
*/
|
||||
int Decode(const uint8_t *pData, int nSize, int nFlags = 0, int64_t nTimestamp = 0);
|
||||
|
||||
/**
|
||||
* @brief This function returns a decoded frame and timestamp. This function should be called in a loop for
|
||||
* fetching all the frames that are available for display.
|
||||
*/
|
||||
uint8_t *GetFrame(int64_t *pTimestamp = nullptr);
|
||||
|
||||
/**
|
||||
* @brief This function decodes a frame and returns the locked frame buffers
|
||||
* This makes the buffers available for use by the application without the buffers
|
||||
* getting overwritten, even if subsequent decode calls are made. The frame buffers
|
||||
* remain locked, until UnlockFrame() is called
|
||||
*/
|
||||
uint8_t *GetLockedFrame(int64_t *pTimestamp = nullptr);
|
||||
|
||||
/**
|
||||
* @brief This function unlocks the frame buffer and makes the frame buffers available for write again
|
||||
* @param ppFrame - pointer to array of frames that are to be unlocked
|
||||
* @param nFrame - number of frames to be unlocked
|
||||
*/
|
||||
void UnlockFrame(uint8_t **pFrame);
|
||||
|
||||
/**
|
||||
* @brief This function allows app to set decoder reconfig params
|
||||
* @param pCropRect - cropping rectangle coordinates
|
||||
* @param pResizeDim - width and height of resized output
|
||||
*/
|
||||
int setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim);
|
||||
|
||||
/**
|
||||
* @brief This function allows app to set operating point for AV1 SVC clips
|
||||
* @param opPoint - operating point of an AV1 scalable bitstream
|
||||
* @param bDispAllLayers - Output all decoded frames of an AV1 scalable bitstream
|
||||
*/
|
||||
void SetOperatingPoint(const uint32_t opPoint, const bool bDispAllLayers) {
|
||||
m_nOperatingPoint = opPoint;
|
||||
m_bDispAllLayers = bDispAllLayers;
|
||||
}
|
||||
|
||||
// start a timer
|
||||
void startTimer() { m_stDecode_time.Start(); }
|
||||
|
||||
// stop the timer
|
||||
double stopTimer() { return m_stDecode_time.Stop(); }
|
||||
|
||||
void setDecoderSessionID(int sessionID) { decoderSessionID = sessionID; }
|
||||
int getDecoderSessionID() { return decoderSessionID; }
|
||||
|
||||
// Session overhead refers to decoder initialization and deinitialization time
|
||||
static void addDecoderSessionOverHead(int sessionID, int64_t duration) { sessionOverHead[sessionID] += duration; }
|
||||
static int64_t getDecoderSessionOverHead(int sessionID) { return sessionOverHead[sessionID]; }
|
||||
|
||||
protected:
|
||||
int decoderSessionID; // Decoder session identifier. Used to gather session level stats.
|
||||
static std::map<int, int64_t> sessionOverHead; // Records session overhead of initialization+deinitialization time.
|
||||
// Format is (thread id, duration)
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when decoding of sequence starts
|
||||
*/
|
||||
static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) {
|
||||
return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when a decoded frame is ready to be decoded
|
||||
*/
|
||||
static int CUDAAPI HandlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) {
|
||||
return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when a decoded frame is available for display
|
||||
*/
|
||||
static int CUDAAPI HandlePictureDisplayProc(void *pUserData, CUVIDPARSERDISPINFO *pDispInfo) {
|
||||
return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback to get operating point when AV1 SVC sequence
|
||||
* header start.
|
||||
*/
|
||||
static int CUDAAPI HandleOperatingPointProc(void *pUserData, CUVIDOPERATINGPOINTINFO *pOPInfo) {
|
||||
return ((NvDecoder *)pUserData)->GetOperatingPoint(pOPInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Callback function to be registered for getting a callback when all the unregistered user SEI Messages
|
||||
* are parsed for a frame.
|
||||
*/
|
||||
static int CUDAAPI HandleSEIMessagesProc(void *pUserData, CUVIDSEIMESSAGEINFO *pSEIMessageInfo) {
|
||||
return ((NvDecoder *)pUserData)->GetSEIMessage(pSEIMessageInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief This function gets called when a sequence is ready to be decoded. The function also gets called
|
||||
when there is format change
|
||||
*/
|
||||
int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat);
|
||||
|
||||
/**
|
||||
* @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from this
|
||||
* function to decode the picture
|
||||
*/
|
||||
int HandlePictureDecode(CUVIDPICPARAMS *pPicParams);
|
||||
|
||||
/**
|
||||
* @brief This function gets called after a picture is decoded and available for display. Frames are fetched and
|
||||
stored in internal buffer
|
||||
*/
|
||||
int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo);
|
||||
|
||||
/**
|
||||
* @brief This function gets called when AV1 sequence encounter more than one operating points
|
||||
*/
|
||||
int GetOperatingPoint(CUVIDOPERATINGPOINTINFO *pOPInfo);
|
||||
|
||||
/**
|
||||
* @brief This function gets called when all unregistered user SEI messages are parsed for a frame
|
||||
*/
|
||||
int GetSEIMessage(CUVIDSEIMESSAGEINFO *pSEIMessageInfo);
|
||||
|
||||
/**
|
||||
* @brief This function reconfigure decoder if there is a change in sequence params.
|
||||
*/
|
||||
int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat);
|
||||
|
||||
public:
|
||||
CUcontext m_cuContext = NULL;
|
||||
CUvideoctxlock m_ctxLock;
|
||||
CUvideoparser m_hParser = NULL;
|
||||
CUvideodecoder m_hDecoder = NULL;
|
||||
bool m_bUseDeviceFrame;
|
||||
// dimension of the output
|
||||
unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0;
|
||||
unsigned int m_nNumChromaPlanes = 0;
|
||||
// height of the mapped surface
|
||||
int m_nSurfaceHeight = 0;
|
||||
int m_nSurfaceWidth = 0;
|
||||
cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs;
|
||||
cudaVideoChromaFormat m_eChromaFormat = cudaVideoChromaFormat_420;
|
||||
cudaVideoSurfaceFormat m_eOutputFormat = cudaVideoSurfaceFormat_NV12;
|
||||
int m_nBitDepthMinus8 = 0;
|
||||
int m_nBPP = 1;
|
||||
CUVIDEOFORMAT m_videoFormat = {};
|
||||
Rect m_displayRect = {};
|
||||
// stock of frames
|
||||
std::vector<uint8_t *> m_vpFrame;
|
||||
// timestamps of decoded frames
|
||||
std::vector<int64_t> m_vTimestamp;
|
||||
int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0;
|
||||
int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[MAX_FRM_CNT];
|
||||
CUVIDSEIMESSAGEINFO *m_pCurrSEIMessage = NULL;
|
||||
CUVIDSEIMESSAGEINFO m_SEIMessagesDisplayOrder[MAX_FRM_CNT];
|
||||
FILE *m_fpSEI = NULL;
|
||||
bool m_bEndDecodeDone = false;
|
||||
std::mutex m_mtxVPFrame;
|
||||
int m_nFrameAlloc = 0;
|
||||
CUstream m_cuvidStream = 0;
|
||||
bool m_bDeviceFramePitched = false;
|
||||
size_t m_nDeviceFramePitch = 0;
|
||||
Rect m_cropRect = {};
|
||||
Dim m_resizeDim = {};
|
||||
|
||||
std::ostringstream m_videoInfo;
|
||||
unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0;
|
||||
bool m_bReconfigExternal = false;
|
||||
bool m_bReconfigExtPPChange = false;
|
||||
StopWatch m_stDecode_time;
|
||||
|
||||
unsigned int m_nOperatingPoint = 0;
|
||||
bool m_bDispAllLayers = false;
|
||||
// In H.264, there is an inherent display latency for video contents
|
||||
// which do not have num_reorder_frames=0 in the VUI. This applies to
|
||||
// All-Intra and IPPP sequences as well. If the user wants zero display
|
||||
// latency for All-Intra and IPPP sequences, the below flag will enable
|
||||
// the display callback immediately after the decode callback.
|
||||
bool m_bForce_zero_latency = false;
|
||||
bool m_bExtractSEIMessage = false;
|
||||
};
|
|
@ -0,0 +1,379 @@
|
|||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2023 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
extern "C" {
|
||||
#include <libavcodec/avcodec.h>
|
||||
#include <libavformat/avformat.h>
|
||||
#include <libavformat/avio.h>
|
||||
/* Explicitly include bsf.h when building against FFmpeg 4.3 (libavcodec 58.45.100) or later for backward compatibility
|
||||
*/
|
||||
#if LIBAVCODEC_VERSION_INT >= 3824484
|
||||
#include <libavcodec/bsf.h>
|
||||
#endif
|
||||
}
|
||||
#include "NvCodecUtils.h"
|
||||
#include "nvcuvid.h"
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
//! \file FFmpegDemuxer.h
|
||||
//! \brief Provides functionality for stream demuxing
|
||||
//!
|
||||
//! This header file is used by Decode/Transcode apps to demux input video clips before decoding frames from it.
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @brief libavformat wrapper class. Retrieves the elementary encoded stream from the container format.
|
||||
*/
|
||||
class FFmpegDemuxer {
|
||||
private:
|
||||
AVFormatContext *fmtc = NULL;
|
||||
AVIOContext *avioc = NULL;
|
||||
AVPacket *pkt = NULL; /*!< AVPacket stores compressed data typically exported by demuxers and then passed as input
|
||||
to decoders */
|
||||
AVPacket *pktFiltered = NULL;
|
||||
AVBSFContext *bsfc = NULL;
|
||||
|
||||
int iVideoStream;
|
||||
bool bMp4H264, bMp4HEVC, bMp4MPEG4;
|
||||
AVCodecID eVideoCodec;
|
||||
AVPixelFormat eChromaFormat;
|
||||
int nWidth, nHeight, nBitDepth, nBPP, nChromaHeight;
|
||||
double timeBase = 0.0;
|
||||
int64_t userTimeScale = 0;
|
||||
|
||||
uint8_t *pDataWithHeader = NULL;
|
||||
|
||||
unsigned int frameCount = 0;
|
||||
|
||||
public:
|
||||
class DataProvider {
|
||||
public:
|
||||
virtual ~DataProvider() {}
|
||||
virtual int GetData(uint8_t *pBuf, int nBuf) = 0;
|
||||
};
|
||||
|
||||
private:
|
||||
/**
|
||||
* @brief Private constructor to initialize libavformat resources.
|
||||
* @param fmtc - Pointer to AVFormatContext allocated inside avformat_open_input()
|
||||
*/
|
||||
FFmpegDemuxer(AVFormatContext *fmtc, int64_t timeScale = 1000 /*Hz*/) : fmtc(fmtc) {
|
||||
if (!fmtc) {
|
||||
LOG(ERROR) << "No AVFormatContext provided.";
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate the AVPackets and initialize to default values
|
||||
pkt = av_packet_alloc();
|
||||
pktFiltered = av_packet_alloc();
|
||||
if (!pkt || !pktFiltered) {
|
||||
LOG(ERROR) << "AVPacket allocation failed";
|
||||
return;
|
||||
}
|
||||
|
||||
LOG(INFO) << "Media format: " << fmtc->iformat->long_name << " (" << fmtc->iformat->name << ")";
|
||||
|
||||
ck(avformat_find_stream_info(fmtc, NULL));
|
||||
iVideoStream = av_find_best_stream(fmtc, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
|
||||
if (iVideoStream < 0) {
|
||||
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " "
|
||||
<< "Could not find stream in input file";
|
||||
av_packet_free(&pkt);
|
||||
av_packet_free(&pktFiltered);
|
||||
return;
|
||||
}
|
||||
|
||||
// fmtc->streams[iVideoStream]->need_parsing = AVSTREAM_PARSE_NONE;
|
||||
eVideoCodec = fmtc->streams[iVideoStream]->codecpar->codec_id;
|
||||
nWidth = fmtc->streams[iVideoStream]->codecpar->width;
|
||||
nHeight = fmtc->streams[iVideoStream]->codecpar->height;
|
||||
eChromaFormat = (AVPixelFormat)fmtc->streams[iVideoStream]->codecpar->format;
|
||||
AVRational rTimeBase = fmtc->streams[iVideoStream]->time_base;
|
||||
timeBase = av_q2d(rTimeBase);
|
||||
userTimeScale = timeScale;
|
||||
|
||||
// Set bit depth, chroma height, bits per pixel based on eChromaFormat of input
|
||||
switch (eChromaFormat) {
|
||||
case AV_PIX_FMT_YUV420P10LE:
|
||||
case AV_PIX_FMT_GRAY10LE: // monochrome is treated as 420 with chroma filled with 0x0
|
||||
nBitDepth = 10;
|
||||
nChromaHeight = (nHeight + 1) >> 1;
|
||||
nBPP = 2;
|
||||
break;
|
||||
case AV_PIX_FMT_YUV420P12LE:
|
||||
nBitDepth = 12;
|
||||
nChromaHeight = (nHeight + 1) >> 1;
|
||||
nBPP = 2;
|
||||
break;
|
||||
case AV_PIX_FMT_YUV444P10LE:
|
||||
nBitDepth = 10;
|
||||
nChromaHeight = nHeight << 1;
|
||||
nBPP = 2;
|
||||
break;
|
||||
case AV_PIX_FMT_YUV444P12LE:
|
||||
nBitDepth = 12;
|
||||
nChromaHeight = nHeight << 1;
|
||||
nBPP = 2;
|
||||
break;
|
||||
case AV_PIX_FMT_YUV444P:
|
||||
nBitDepth = 8;
|
||||
nChromaHeight = nHeight << 1;
|
||||
nBPP = 1;
|
||||
break;
|
||||
case AV_PIX_FMT_YUV420P:
|
||||
case AV_PIX_FMT_YUVJ420P:
|
||||
case AV_PIX_FMT_YUVJ422P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
|
||||
case AV_PIX_FMT_YUVJ444P: // jpeg decoder output is subsampled to NV12 for 422/444 so treat it as 420
|
||||
case AV_PIX_FMT_GRAY8: // monochrome is treated as 420 with chroma filled with 0x0
|
||||
nBitDepth = 8;
|
||||
nChromaHeight = (nHeight + 1) >> 1;
|
||||
nBPP = 1;
|
||||
break;
|
||||
default:
|
||||
LOG(WARNING) << "ChromaFormat not recognized. Assuming 420";
|
||||
eChromaFormat = AV_PIX_FMT_YUV420P;
|
||||
nBitDepth = 8;
|
||||
nChromaHeight = (nHeight + 1) >> 1;
|
||||
nBPP = 1;
|
||||
}
|
||||
|
||||
bMp4H264 = eVideoCodec == AV_CODEC_ID_H264 && (!strcmp(fmtc->iformat->long_name, "QuickTime / MOV") ||
|
||||
!strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") ||
|
||||
!strcmp(fmtc->iformat->long_name, "Matroska / WebM"));
|
||||
bMp4HEVC = eVideoCodec == AV_CODEC_ID_HEVC && (!strcmp(fmtc->iformat->long_name, "QuickTime / MOV") ||
|
||||
!strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") ||
|
||||
!strcmp(fmtc->iformat->long_name, "Matroska / WebM"));
|
||||
|
||||
bMp4MPEG4 = eVideoCodec == AV_CODEC_ID_MPEG4 && (!strcmp(fmtc->iformat->long_name, "QuickTime / MOV") ||
|
||||
!strcmp(fmtc->iformat->long_name, "FLV (Flash Video)") ||
|
||||
!strcmp(fmtc->iformat->long_name, "Matroska / WebM"));
|
||||
|
||||
// Initialize bitstream filter and its required resources
|
||||
if (bMp4H264) {
|
||||
const AVBitStreamFilter *bsf = av_bsf_get_by_name("h264_mp4toannexb");
|
||||
if (!bsf) {
|
||||
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " "
|
||||
<< "av_bsf_get_by_name() failed";
|
||||
av_packet_free(&pkt);
|
||||
av_packet_free(&pktFiltered);
|
||||
return;
|
||||
}
|
||||
ck(av_bsf_alloc(bsf, &bsfc));
|
||||
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
|
||||
ck(av_bsf_init(bsfc));
|
||||
}
|
||||
if (bMp4HEVC) {
|
||||
const AVBitStreamFilter *bsf = av_bsf_get_by_name("hevc_mp4toannexb");
|
||||
if (!bsf) {
|
||||
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__ << " "
|
||||
<< "av_bsf_get_by_name() failed";
|
||||
av_packet_free(&pkt);
|
||||
av_packet_free(&pktFiltered);
|
||||
return;
|
||||
}
|
||||
ck(av_bsf_alloc(bsf, &bsfc));
|
||||
avcodec_parameters_copy(bsfc->par_in, fmtc->streams[iVideoStream]->codecpar);
|
||||
ck(av_bsf_init(bsfc));
|
||||
}
|
||||
}
|
||||
|
||||
AVFormatContext *CreateFormatContext(DataProvider *pDataProvider) {
|
||||
|
||||
AVFormatContext *ctx = NULL;
|
||||
if (!(ctx = avformat_alloc_context())) {
|
||||
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
uint8_t *avioc_buffer = NULL;
|
||||
int avioc_buffer_size = 8 * 1024 * 1024;
|
||||
avioc_buffer = (uint8_t *)av_malloc(avioc_buffer_size);
|
||||
if (!avioc_buffer) {
|
||||
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
|
||||
return NULL;
|
||||
}
|
||||
avioc = avio_alloc_context(avioc_buffer, avioc_buffer_size, 0, pDataProvider, &ReadPacket, NULL, NULL);
|
||||
if (!avioc) {
|
||||
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
|
||||
return NULL;
|
||||
}
|
||||
ctx->pb = avioc;
|
||||
|
||||
ck(avformat_open_input(&ctx, NULL, NULL, NULL));
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Allocate and return AVFormatContext*.
|
||||
* @param szFilePath - Filepath pointing to input stream.
|
||||
* @return Pointer to AVFormatContext
|
||||
*/
|
||||
AVFormatContext *CreateFormatContext(const char *szFilePath) {
|
||||
avformat_network_init();
|
||||
|
||||
AVFormatContext *ctx = NULL;
|
||||
ck(avformat_open_input(&ctx, szFilePath, NULL, NULL));
|
||||
return ctx;
|
||||
}
|
||||
|
||||
public:
|
||||
FFmpegDemuxer(const char *szFilePath, int64_t timescale = 1000 /*Hz*/)
|
||||
: FFmpegDemuxer(CreateFormatContext(szFilePath), timescale) {}
|
||||
FFmpegDemuxer(DataProvider *pDataProvider) : FFmpegDemuxer(CreateFormatContext(pDataProvider)) { avioc = fmtc->pb; }
|
||||
~FFmpegDemuxer() {
|
||||
|
||||
if (!fmtc) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (pkt) {
|
||||
av_packet_free(&pkt);
|
||||
}
|
||||
if (pktFiltered) {
|
||||
av_packet_free(&pktFiltered);
|
||||
}
|
||||
|
||||
if (bsfc) {
|
||||
av_bsf_free(&bsfc);
|
||||
}
|
||||
|
||||
avformat_close_input(&fmtc);
|
||||
|
||||
if (avioc) {
|
||||
av_freep(&avioc->buffer);
|
||||
av_freep(&avioc);
|
||||
}
|
||||
|
||||
if (pDataWithHeader) {
|
||||
av_free(pDataWithHeader);
|
||||
}
|
||||
}
|
||||
AVCodecID GetVideoCodec() { return eVideoCodec; }
|
||||
AVPixelFormat GetChromaFormat() { return eChromaFormat; }
|
||||
int GetWidth() { return nWidth; }
|
||||
int GetHeight() { return nHeight; }
|
||||
int GetBitDepth() { return nBitDepth; }
|
||||
int GetFrameSize() { return nWidth * (nHeight + nChromaHeight) * nBPP; }
|
||||
bool Demux(uint8_t **ppVideo, int *pnVideoBytes, int64_t *pts = NULL) {
|
||||
if (!fmtc) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*pnVideoBytes = 0;
|
||||
|
||||
if (pkt->data) {
|
||||
av_packet_unref(pkt);
|
||||
}
|
||||
|
||||
int e = 0;
|
||||
while ((e = av_read_frame(fmtc, pkt)) >= 0 && pkt->stream_index != iVideoStream) {
|
||||
av_packet_unref(pkt);
|
||||
}
|
||||
if (e < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bMp4H264 || bMp4HEVC) {
|
||||
if (pktFiltered->data) {
|
||||
av_packet_unref(pktFiltered);
|
||||
}
|
||||
ck(av_bsf_send_packet(bsfc, pkt));
|
||||
ck(av_bsf_receive_packet(bsfc, pktFiltered));
|
||||
*ppVideo = pktFiltered->data;
|
||||
*pnVideoBytes = pktFiltered->size;
|
||||
if (pts)
|
||||
*pts = (int64_t)(pktFiltered->pts * userTimeScale * timeBase);
|
||||
} else {
|
||||
|
||||
if (bMp4MPEG4 && (frameCount == 0)) {
|
||||
|
||||
int extraDataSize = fmtc->streams[iVideoStream]->codecpar->extradata_size;
|
||||
|
||||
if (extraDataSize > 0) {
|
||||
|
||||
// extradata contains start codes 00 00 01. Subtract its size
|
||||
pDataWithHeader = (uint8_t *)av_malloc(extraDataSize + pkt->size - 3 * sizeof(uint8_t));
|
||||
|
||||
if (!pDataWithHeader) {
|
||||
LOG(ERROR) << "FFmpeg error: " << __FILE__ << " " << __LINE__;
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(pDataWithHeader, fmtc->streams[iVideoStream]->codecpar->extradata, extraDataSize);
|
||||
memcpy(pDataWithHeader + extraDataSize, pkt->data + 3, pkt->size - 3 * sizeof(uint8_t));
|
||||
|
||||
*ppVideo = pDataWithHeader;
|
||||
*pnVideoBytes = extraDataSize + pkt->size - 3 * sizeof(uint8_t);
|
||||
}
|
||||
|
||||
} else {
|
||||
*ppVideo = pkt->data;
|
||||
*pnVideoBytes = pkt->size;
|
||||
}
|
||||
|
||||
if (pts)
|
||||
*pts = (int64_t)(pkt->pts * userTimeScale * timeBase);
|
||||
}
|
||||
|
||||
frameCount++;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ReadPacket(void *opaque, uint8_t *pBuf, int nBuf) {
|
||||
return ((DataProvider *)opaque)->GetData(pBuf, nBuf);
|
||||
}
|
||||
};
|
||||
|
||||
inline cudaVideoCodec FFmpeg2NvCodecId(AVCodecID id) {
|
||||
switch (id) {
|
||||
case AV_CODEC_ID_MPEG1VIDEO:
|
||||
return cudaVideoCodec_MPEG1;
|
||||
case AV_CODEC_ID_MPEG2VIDEO:
|
||||
return cudaVideoCodec_MPEG2;
|
||||
case AV_CODEC_ID_MPEG4:
|
||||
return cudaVideoCodec_MPEG4;
|
||||
case AV_CODEC_ID_WMV3:
|
||||
case AV_CODEC_ID_VC1:
|
||||
return cudaVideoCodec_VC1;
|
||||
case AV_CODEC_ID_H264:
|
||||
return cudaVideoCodec_H264;
|
||||
case AV_CODEC_ID_HEVC:
|
||||
return cudaVideoCodec_HEVC;
|
||||
case AV_CODEC_ID_VP8:
|
||||
return cudaVideoCodec_VP8;
|
||||
case AV_CODEC_ID_VP9:
|
||||
return cudaVideoCodec_VP9;
|
||||
case AV_CODEC_ID_MJPEG:
|
||||
return cudaVideoCodec_JPEG;
|
||||
case AV_CODEC_ID_AV1:
|
||||
return cudaVideoCodec_AV1;
|
||||
default:
|
||||
return cudaVideoCodec_NumCodecs;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2023 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
extern "C" {
|
||||
#include <libavformat/avformat.h>
|
||||
#include <libavutil/opt.h>
|
||||
#include <libswresample/swresample.h>
|
||||
};
|
||||
#include "Logger.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern simplelogger::Logger *logger;
|
||||
|
||||
static string AvErrorToString(int av_error_code) {
|
||||
const auto buf_size = 1024U;
|
||||
char *err_string = (char *)calloc(buf_size, sizeof(*err_string));
|
||||
if (!err_string) {
|
||||
return string();
|
||||
}
|
||||
|
||||
if (0 != av_strerror(av_error_code, err_string, buf_size - 1)) {
|
||||
free(err_string);
|
||||
stringstream ss;
|
||||
ss << "Unknown error with code " << av_error_code;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
string str(err_string);
|
||||
free(err_string);
|
||||
return str;
|
||||
}
|
||||
|
||||
class FFmpegStreamer {
|
||||
private:
|
||||
AVFormatContext *oc = NULL;
|
||||
AVStream *vs = NULL;
|
||||
int nFps = 0;
|
||||
|
||||
public:
|
||||
FFmpegStreamer(AVCodecID eCodecId, int nWidth, int nHeight, int nFps, const char *szInFilePath) : nFps(nFps) {
|
||||
avformat_network_init();
|
||||
|
||||
int ret = 0;
|
||||
|
||||
if ((eCodecId == AV_CODEC_ID_H264) || (eCodecId == AV_CODEC_ID_HEVC))
|
||||
ret = avformat_alloc_output_context2(&oc, NULL, "mpegts", NULL);
|
||||
else if (eCodecId == AV_CODEC_ID_AV1)
|
||||
ret = avformat_alloc_output_context2(&oc, NULL, "ivf", NULL);
|
||||
|
||||
if (ret < 0) {
|
||||
LOG(ERROR) << "FFmpeg: failed to allocate an AVFormatContext. Error message: " << AvErrorToString(ret);
|
||||
return;
|
||||
}
|
||||
|
||||
oc->url = av_strdup(szInFilePath);
|
||||
LOG(INFO) << "Streaming destination: " << oc->url;
|
||||
|
||||
// Add video stream to oc
|
||||
vs = avformat_new_stream(oc, NULL);
|
||||
if (!vs) {
|
||||
LOG(ERROR) << "FFMPEG: Could not alloc video stream";
|
||||
return;
|
||||
}
|
||||
vs->id = 0;
|
||||
|
||||
// Set video parameters
|
||||
AVCodecParameters *vpar = vs->codecpar;
|
||||
vpar->codec_id = eCodecId;
|
||||
vpar->codec_type = AVMEDIA_TYPE_VIDEO;
|
||||
vpar->width = nWidth;
|
||||
vpar->height = nHeight;
|
||||
|
||||
// Everything is ready. Now open the output stream.
|
||||
if (avio_open(&oc->pb, oc->url, AVIO_FLAG_WRITE) < 0) {
|
||||
LOG(ERROR) << "FFMPEG: Could not open " << oc->url;
|
||||
return;
|
||||
}
|
||||
|
||||
// Write the container header
|
||||
if (avformat_write_header(oc, NULL)) {
|
||||
LOG(ERROR) << "FFMPEG: avformat_write_header error!";
|
||||
return;
|
||||
}
|
||||
}
|
||||
~FFmpegStreamer() {
|
||||
if (oc) {
|
||||
av_write_trailer(oc);
|
||||
avio_close(oc->pb);
|
||||
avformat_free_context(oc);
|
||||
}
|
||||
}
|
||||
|
||||
bool Stream(uint8_t *pData, int nBytes, int nPts) {
|
||||
AVPacket *pkt = av_packet_alloc();
|
||||
if (!pkt) {
|
||||
LOG(ERROR) << "AVPacket allocation failed !";
|
||||
return false;
|
||||
}
|
||||
pkt->pts = av_rescale_q(nPts++, AVRational{1, nFps}, vs->time_base);
|
||||
// No B-frames
|
||||
pkt->dts = pkt->pts;
|
||||
pkt->stream_index = vs->index;
|
||||
pkt->data = pData;
|
||||
pkt->size = nBytes;
|
||||
|
||||
if (!memcmp(pData, "\x00\x00\x00\x01\x67", 5)) {
|
||||
pkt->flags |= AV_PKT_FLAG_KEY;
|
||||
}
|
||||
|
||||
// Write the compressed frame into the output
|
||||
int ret = av_write_frame(oc, pkt);
|
||||
av_write_frame(oc, NULL);
|
||||
if (ret < 0) {
|
||||
LOG(ERROR) << "FFMPEG: Error while writing video frame";
|
||||
}
|
||||
|
||||
av_packet_free(&pkt);
|
||||
return true;
|
||||
}
|
||||
};
|
|
@ -0,0 +1,235 @@
|
|||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2023 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <time.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <winsock.h>
|
||||
|
||||
#pragma comment(lib, "ws2_32.lib")
|
||||
#undef ERROR
|
||||
#else
|
||||
#include <arpa/inet.h>
|
||||
#include <netinet/in.h>
|
||||
#include <sys/socket.h>
|
||||
#include <unistd.h>
|
||||
#define SOCKET int
|
||||
#define INVALID_SOCKET -1
|
||||
#endif
|
||||
|
||||
enum LogLevel { TRACE, INFO, WARNING, ERROR, FATAL };
|
||||
|
||||
namespace simplelogger {
|
||||
class Logger {
|
||||
public:
|
||||
Logger(LogLevel level, bool bPrintTimeStamp) : level(level), bPrintTimeStamp(bPrintTimeStamp) {}
|
||||
virtual ~Logger() {}
|
||||
virtual std::ostream &GetStream() = 0;
|
||||
virtual void FlushStream() {}
|
||||
bool ShouldLogFor(LogLevel l) { return l >= level; }
|
||||
char *GetLead(LogLevel l, const char *szFile, int nLine, const char *szFunc) {
|
||||
if (l < TRACE || l > FATAL) {
|
||||
sprintf(szLead, "[?????] ");
|
||||
return szLead;
|
||||
}
|
||||
const char *szLevels[] = {"TRACE", "INFO", "WARN", "ERROR", "FATAL"};
|
||||
if (bPrintTimeStamp) {
|
||||
time_t t = time(NULL);
|
||||
struct tm *ptm = localtime(&t);
|
||||
sprintf(szLead, "[%-5s][%02d:%02d:%02d] ", szLevels[l], ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
|
||||
} else {
|
||||
sprintf(szLead, "[%-5s] ", szLevels[l]);
|
||||
}
|
||||
return szLead;
|
||||
}
|
||||
void EnterCriticalSection() { mtx.lock(); }
|
||||
void LeaveCriticalSection() { mtx.unlock(); }
|
||||
|
||||
private:
|
||||
LogLevel level;
|
||||
char szLead[80];
|
||||
bool bPrintTimeStamp;
|
||||
std::mutex mtx;
|
||||
};
|
||||
|
||||
class LoggerFactory {
|
||||
public:
|
||||
static Logger *CreateFileLogger(std::string strFilePath, LogLevel level = INFO, bool bPrintTimeStamp = true) {
|
||||
return new FileLogger(strFilePath, level, bPrintTimeStamp);
|
||||
}
|
||||
static Logger *CreateConsoleLogger(LogLevel level = INFO, bool bPrintTimeStamp = true) {
|
||||
return new ConsoleLogger(level, bPrintTimeStamp);
|
||||
}
|
||||
static Logger *CreateUdpLogger(char *szHost, unsigned uPort, LogLevel level = INFO, bool bPrintTimeStamp = true) {
|
||||
return new UdpLogger(szHost, uPort, level, bPrintTimeStamp);
|
||||
}
|
||||
|
||||
private:
|
||||
LoggerFactory() {}
|
||||
|
||||
class FileLogger : public Logger {
|
||||
public:
|
||||
FileLogger(std::string strFilePath, LogLevel level, bool bPrintTimeStamp) : Logger(level, bPrintTimeStamp) {
|
||||
pFileOut = new std::ofstream();
|
||||
pFileOut->open(strFilePath.c_str());
|
||||
}
|
||||
~FileLogger() { pFileOut->close(); }
|
||||
std::ostream &GetStream() { return *pFileOut; }
|
||||
|
||||
private:
|
||||
std::ofstream *pFileOut;
|
||||
};
|
||||
|
||||
class ConsoleLogger : public Logger {
|
||||
public:
|
||||
ConsoleLogger(LogLevel level, bool bPrintTimeStamp) : Logger(level, bPrintTimeStamp) {}
|
||||
std::ostream &GetStream() { return std::cout; }
|
||||
};
|
||||
|
||||
class UdpLogger : public Logger {
|
||||
private:
|
||||
class UdpOstream : public std::ostream {
|
||||
public:
|
||||
UdpOstream(char *szHost, unsigned short uPort) : std::ostream(&sb), socket(INVALID_SOCKET) {
|
||||
#ifdef _WIN32
|
||||
WSADATA w;
|
||||
if (WSAStartup(0x0101, &w) != 0) {
|
||||
fprintf(stderr, "WSAStartup() failed.\n");
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
socket = ::socket(AF_INET, SOCK_DGRAM, 0);
|
||||
if (socket == INVALID_SOCKET) {
|
||||
#ifdef _WIN32
|
||||
WSACleanup();
|
||||
#endif
|
||||
fprintf(stderr, "socket() failed.\n");
|
||||
return;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
unsigned int b1, b2, b3, b4;
|
||||
sscanf(szHost, "%u.%u.%u.%u", &b1, &b2, &b3, &b4);
|
||||
struct in_addr addr = {(unsigned char)b1, (unsigned char)b2, (unsigned char)b3, (unsigned char)b4};
|
||||
#else
|
||||
struct in_addr addr = {inet_addr(szHost)};
|
||||
#endif
|
||||
struct sockaddr_in s = {AF_INET, htons(uPort), addr};
|
||||
server = s;
|
||||
}
|
||||
~UdpOstream() throw() {
|
||||
if (socket == INVALID_SOCKET) {
|
||||
return;
|
||||
}
|
||||
#ifdef _WIN32
|
||||
closesocket(socket);
|
||||
WSACleanup();
|
||||
#else
|
||||
close(socket);
|
||||
#endif
|
||||
}
|
||||
void Flush() {
|
||||
if (sendto(socket, sb.str().c_str(), (int)sb.str().length() + 1, 0, (struct sockaddr *)&server,
|
||||
(int)sizeof(sockaddr_in)) == -1) {
|
||||
fprintf(stderr, "sendto() failed.\n");
|
||||
}
|
||||
sb.str("");
|
||||
}
|
||||
|
||||
private:
|
||||
std::stringbuf sb;
|
||||
SOCKET socket;
|
||||
struct sockaddr_in server;
|
||||
};
|
||||
|
||||
public:
|
||||
UdpLogger(char *szHost, unsigned uPort, LogLevel level, bool bPrintTimeStamp)
|
||||
: Logger(level, bPrintTimeStamp), udpOut(szHost, (unsigned short)uPort) {}
|
||||
UdpOstream &GetStream() { return udpOut; }
|
||||
virtual void FlushStream() { udpOut.Flush(); }
|
||||
|
||||
private:
|
||||
UdpOstream udpOut;
|
||||
};
|
||||
};
|
||||
|
||||
class LogTransaction {
|
||||
public:
|
||||
LogTransaction(Logger *pLogger, LogLevel level, const char *szFile, const int nLine, const char *szFunc)
|
||||
: pLogger(pLogger), level(level) {
|
||||
if (!pLogger) {
|
||||
std::cout << "[-----] ";
|
||||
return;
|
||||
}
|
||||
if (!pLogger->ShouldLogFor(level)) {
|
||||
return;
|
||||
}
|
||||
pLogger->EnterCriticalSection();
|
||||
pLogger->GetStream() << pLogger->GetLead(level, szFile, nLine, szFunc);
|
||||
}
|
||||
~LogTransaction() {
|
||||
if (!pLogger) {
|
||||
std::cout << std::endl;
|
||||
return;
|
||||
}
|
||||
if (!pLogger->ShouldLogFor(level)) {
|
||||
return;
|
||||
}
|
||||
pLogger->GetStream() << std::endl;
|
||||
pLogger->FlushStream();
|
||||
pLogger->LeaveCriticalSection();
|
||||
if (level == FATAL) {
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
std::ostream &GetStream() {
|
||||
if (!pLogger) {
|
||||
return std::cout;
|
||||
}
|
||||
if (!pLogger->ShouldLogFor(level)) {
|
||||
return ossNull;
|
||||
}
|
||||
return pLogger->GetStream();
|
||||
}
|
||||
|
||||
private:
|
||||
Logger *pLogger;
|
||||
LogLevel level;
|
||||
std::ostringstream ossNull;
|
||||
};
|
||||
|
||||
} // namespace simplelogger
|
||||
|
||||
extern simplelogger::Logger *logger;
|
||||
#define LOG(level) simplelogger::LogTransaction(logger, level, __FILE__, __LINE__, __FUNCTION__).GetStream()
|
|
@ -0,0 +1,547 @@
|
|||
/*
|
||||
* This copyright notice applies to this header file only:
|
||||
*
|
||||
* Copyright (c) 2010-2023 NVIDIA Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the software, and to permit persons to whom the
|
||||
* software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
//! \file NvCodecUtils.h
|
||||
//! \brief Miscellaneous classes and error checking functions.
|
||||
//!
|
||||
//! Used by Transcode/Encode samples apps for reading input files, mutithreading, performance measurement or colorspace
|
||||
//! conversion while decoding.
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
#pragma once
|
||||
#include "Logger.h"
|
||||
#include <assert.h>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <iomanip>
|
||||
#include <ios>
|
||||
#include <list>
|
||||
#include <sstream>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
extern simplelogger::Logger *logger;
|
||||
|
||||
#ifdef __cuda_cuda_h__
|
||||
inline bool check(CUresult e, int iLine, const char *szFile) {
|
||||
if (e != CUDA_SUCCESS) {
|
||||
const char *szErrName = NULL;
|
||||
cuGetErrorName(e, &szErrName);
|
||||
LOG(FATAL) << "CUDA driver API error " << szErrName << " at line " << iLine << " in file " << szFile;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __CUDA_RUNTIME_H__
|
||||
inline bool check(cudaError_t e, int iLine, const char *szFile) {
|
||||
if (e != cudaSuccess) {
|
||||
LOG(FATAL) << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " << iLine << " in file " << szFile;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _NV_ENCODEAPI_H_
|
||||
inline bool check(NVENCSTATUS e, int iLine, const char *szFile) {
|
||||
const char *aszErrName[] = {
|
||||
"NV_ENC_SUCCESS",
|
||||
"NV_ENC_ERR_NO_ENCODE_DEVICE",
|
||||
"NV_ENC_ERR_UNSUPPORTED_DEVICE",
|
||||
"NV_ENC_ERR_INVALID_ENCODERDEVICE",
|
||||
"NV_ENC_ERR_INVALID_DEVICE",
|
||||
"NV_ENC_ERR_DEVICE_NOT_EXIST",
|
||||
"NV_ENC_ERR_INVALID_PTR",
|
||||
"NV_ENC_ERR_INVALID_EVENT",
|
||||
"NV_ENC_ERR_INVALID_PARAM",
|
||||
"NV_ENC_ERR_INVALID_CALL",
|
||||
"NV_ENC_ERR_OUT_OF_MEMORY",
|
||||
"NV_ENC_ERR_ENCODER_NOT_INITIALIZED",
|
||||
"NV_ENC_ERR_UNSUPPORTED_PARAM",
|
||||
"NV_ENC_ERR_LOCK_BUSY",
|
||||
"NV_ENC_ERR_NOT_ENOUGH_BUFFER",
|
||||
"NV_ENC_ERR_INVALID_VERSION",
|
||||
"NV_ENC_ERR_MAP_FAILED",
|
||||
"NV_ENC_ERR_NEED_MORE_INPUT",
|
||||
"NV_ENC_ERR_ENCODER_BUSY",
|
||||
"NV_ENC_ERR_EVENT_NOT_REGISTERED",
|
||||
"NV_ENC_ERR_GENERIC",
|
||||
"NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY",
|
||||
"NV_ENC_ERR_UNIMPLEMENTED",
|
||||
"NV_ENC_ERR_RESOURCE_REGISTER_FAILED",
|
||||
"NV_ENC_ERR_RESOURCE_NOT_REGISTERED",
|
||||
"NV_ENC_ERR_RESOURCE_NOT_MAPPED",
|
||||
};
|
||||
if (e != NV_ENC_SUCCESS) {
|
||||
LOG(FATAL) << "NVENC error " << aszErrName[e] << " at line " << iLine << " in file " << szFile;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _WINERROR_
|
||||
inline bool check(HRESULT e, int iLine, const char *szFile) {
|
||||
if (e != S_OK) {
|
||||
std::stringstream stream;
|
||||
stream << std::hex << std::uppercase << e;
|
||||
LOG(FATAL) << "HRESULT error 0x" << stream.str() << " at line " << iLine << " in file " << szFile;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__gl_h_) || defined(__GL_H__)
|
||||
inline bool check(GLenum e, int iLine, const char *szFile) {
|
||||
if (e != 0) {
|
||||
LOG(ERROR) << "GLenum error " << e << " at line " << iLine << " in file " << szFile;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
inline bool check(int e, int iLine, const char *szFile) {
|
||||
if (e < 0) {
|
||||
LOG(ERROR) << "General error " << e << " at line " << iLine << " in file " << szFile;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#define ck(call) check(call, __LINE__, __FILE__)
|
||||
#define MAKE_FOURCC(ch0, ch1, ch2, ch3) \
|
||||
((uint32_t)(uint8_t)(ch0) | ((uint32_t)(uint8_t)(ch1) << 8) | ((uint32_t)(uint8_t)(ch2) << 16) | \
|
||||
((uint32_t)(uint8_t)(ch3) << 24))
|
||||
|
||||
/**
|
||||
* @brief Wrapper class around std::thread
|
||||
*/
|
||||
class NvThread {
|
||||
public:
|
||||
NvThread() = default;
|
||||
NvThread(const NvThread &) = delete;
|
||||
NvThread &operator=(const NvThread &other) = delete;
|
||||
|
||||
NvThread(std::thread &&thread) : t(std::move(thread)) {}
|
||||
|
||||
NvThread(NvThread &&thread) : t(std::move(thread.t)) {}
|
||||
|
||||
NvThread &operator=(NvThread &&other) {
|
||||
t = std::move(other.t);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~NvThread() { join(); }
|
||||
|
||||
void join() {
|
||||
if (t.joinable()) {
|
||||
t.join();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::thread t;
|
||||
};
|
||||
|
||||
#ifndef _WIN32
|
||||
#define _stricmp strcasecmp
|
||||
#define _stat64 stat64
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Utility class to allocate buffer memory. Helps avoid I/O during the encode/decode loop in case of performance
|
||||
* tests.
|
||||
*/
|
||||
class BufferedFileReader {
|
||||
public:
|
||||
/**
|
||||
* @brief Constructor function to allocate appropriate memory and copy file contents into it
|
||||
*/
|
||||
BufferedFileReader(const char *szFileName, bool bPartial = false) {
|
||||
struct _stat64 st;
|
||||
|
||||
if (_stat64(szFileName, &st) != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
nSize = st.st_size;
|
||||
while (nSize) {
|
||||
try {
|
||||
pBuf = new uint8_t[(size_t)nSize];
|
||||
if (nSize != st.st_size) {
|
||||
LOG(WARNING) << "File is too large - only " << std::setprecision(4) << 100.0 * nSize / st.st_size
|
||||
<< "% is loaded";
|
||||
}
|
||||
break;
|
||||
} catch (std::bad_alloc) {
|
||||
if (!bPartial) {
|
||||
LOG(ERROR) << "Failed to allocate memory in BufferedReader";
|
||||
return;
|
||||
}
|
||||
nSize = (uint32_t)(nSize * 0.9);
|
||||
}
|
||||
}
|
||||
|
||||
std::ifstream fpIn(szFileName, std::ifstream::in | std::ifstream::binary);
|
||||
if (!fpIn) {
|
||||
LOG(ERROR) << "Unable to open input file: " << szFileName;
|
||||
return;
|
||||
}
|
||||
|
||||
std::streamsize nRead = fpIn.read(reinterpret_cast<char *>(pBuf), nSize).gcount();
|
||||
fpIn.close();
|
||||
|
||||
assert(nRead == nSize);
|
||||
}
|
||||
~BufferedFileReader() {
|
||||
if (pBuf) {
|
||||
delete[] pBuf;
|
||||
}
|
||||
}
|
||||
bool GetBuffer(uint8_t **ppBuf, uint64_t *pnSize) {
|
||||
if (!pBuf) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*ppBuf = pBuf;
|
||||
*pnSize = nSize;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t *pBuf = NULL;
|
||||
uint64_t nSize = 0;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Template class to facilitate color space conversion
|
||||
*/
|
||||
template <typename T> class YuvConverter {
|
||||
public:
|
||||
YuvConverter(int nWidth, int nHeight) : nWidth(nWidth), nHeight(nHeight) {
|
||||
pQuad = new T[((nWidth + 1) / 2) * ((nHeight + 1) / 2)];
|
||||
}
|
||||
~YuvConverter() { delete[] pQuad; }
|
||||
void PlanarToUVInterleaved(T *pFrame, int nPitch = 0) {
|
||||
if (nPitch == 0) {
|
||||
nPitch = nWidth;
|
||||
}
|
||||
|
||||
// sizes of source surface plane
|
||||
int nSizePlaneY = nPitch * nHeight;
|
||||
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
|
||||
int nSizePlaneV = nSizePlaneU;
|
||||
|
||||
T *puv = pFrame + nSizePlaneY;
|
||||
if (nPitch == nWidth) {
|
||||
memcpy(pQuad, puv, nSizePlaneU * sizeof(T));
|
||||
} else {
|
||||
for (int i = 0; i < (nHeight + 1) / 2; i++) {
|
||||
memcpy(pQuad + ((nWidth + 1) / 2) * i, puv + ((nPitch + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
|
||||
}
|
||||
}
|
||||
T *pv = puv + nSizePlaneU;
|
||||
for (int y = 0; y < (nHeight + 1) / 2; y++) {
|
||||
for (int x = 0; x < (nWidth + 1) / 2; x++) {
|
||||
puv[y * nPitch + x * 2] = pQuad[y * ((nWidth + 1) / 2) + x];
|
||||
puv[y * nPitch + x * 2 + 1] = pv[y * ((nPitch + 1) / 2) + x];
|
||||
}
|
||||
}
|
||||
}
|
||||
void UVInterleavedToPlanar(T *pFrame, int nPitch = 0) {
|
||||
if (nPitch == 0) {
|
||||
nPitch = nWidth;
|
||||
}
|
||||
|
||||
// sizes of source surface plane
|
||||
int nSizePlaneY = nPitch * nHeight;
|
||||
int nSizePlaneU = ((nPitch + 1) / 2) * ((nHeight + 1) / 2);
|
||||
int nSizePlaneV = nSizePlaneU;
|
||||
|
||||
T *puv = pFrame + nSizePlaneY, *pu = puv, *pv = puv + nSizePlaneU;
|
||||
|
||||
// split chroma from interleave to planar
|
||||
for (int y = 0; y < (nHeight + 1) / 2; y++) {
|
||||
for (int x = 0; x < (nWidth + 1) / 2; x++) {
|
||||
pu[y * ((nPitch + 1) / 2) + x] = puv[y * nPitch + x * 2];
|
||||
pQuad[y * ((nWidth + 1) / 2) + x] = puv[y * nPitch + x * 2 + 1];
|
||||
}
|
||||
}
|
||||
if (nPitch == nWidth) {
|
||||
memcpy(pv, pQuad, nSizePlaneV * sizeof(T));
|
||||
} else {
|
||||
for (int i = 0; i < (nHeight + 1) / 2; i++) {
|
||||
memcpy(pv + ((nPitch + 1) / 2) * i, pQuad + ((nWidth + 1) / 2) * i, ((nWidth + 1) / 2) * sizeof(T));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
T *pQuad;
|
||||
int nWidth, nHeight;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Class for writing IVF format header for AV1 codec
|
||||
*/
|
||||
class IVFUtils {
|
||||
public:
|
||||
void WriteFileHeader(std::vector<uint8_t> &vPacket, uint32_t nFourCC, uint32_t nWidth, uint32_t nHeight,
|
||||
uint32_t nFrameRateNum, uint32_t nFrameRateDen, uint32_t nFrameCnt) {
|
||||
char header[32];
|
||||
|
||||
header[0] = 'D';
|
||||
header[1] = 'K';
|
||||
header[2] = 'I';
|
||||
header[3] = 'F';
|
||||
mem_put_le16(header + 4, 0); // version
|
||||
mem_put_le16(header + 6, 32); // header size
|
||||
mem_put_le32(header + 8, nFourCC); // fourcc
|
||||
mem_put_le16(header + 12, nWidth); // width
|
||||
mem_put_le16(header + 14, nHeight); // height
|
||||
mem_put_le32(header + 16, nFrameRateNum); // rate
|
||||
mem_put_le32(header + 20, nFrameRateDen); // scale
|
||||
mem_put_le32(header + 24, nFrameCnt); // length
|
||||
mem_put_le32(header + 28, 0); // unused
|
||||
|
||||
vPacket.insert(vPacket.end(), &header[0], &header[32]);
|
||||
}
|
||||
|
||||
void WriteFrameHeader(std::vector<uint8_t> &vPacket, size_t nFrameSize, int64_t pts) {
|
||||
char header[12];
|
||||
mem_put_le32(header, (int)nFrameSize);
|
||||
mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF));
|
||||
mem_put_le32(header + 8, (int)(pts >> 32));
|
||||
|
||||
vPacket.insert(vPacket.end(), &header[0], &header[12]);
|
||||
}
|
||||
|
||||
private:
|
||||
static inline void mem_put_le32(void *vmem, int val) {
|
||||
unsigned char *mem = (unsigned char *)vmem;
|
||||
mem[0] = (unsigned char)((val >> 0) & 0xff);
|
||||
mem[1] = (unsigned char)((val >> 8) & 0xff);
|
||||
mem[2] = (unsigned char)((val >> 16) & 0xff);
|
||||
mem[3] = (unsigned char)((val >> 24) & 0xff);
|
||||
}
|
||||
|
||||
static inline void mem_put_le16(void *vmem, int val) {
|
||||
unsigned char *mem = (unsigned char *)vmem;
|
||||
mem[0] = (unsigned char)((val >> 0) & 0xff);
|
||||
mem[1] = (unsigned char)((val >> 8) & 0xff);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Utility class to measure elapsed time in seconds between the block of executed code
|
||||
*/
|
||||
class StopWatch {
|
||||
public:
|
||||
void Start() { t0 = std::chrono::high_resolution_clock::now(); }
|
||||
double Stop() {
|
||||
return std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
std::chrono::high_resolution_clock::now().time_since_epoch() - t0.time_since_epoch())
|
||||
.count() /
|
||||
1.0e9;
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::high_resolution_clock::time_point t0;
|
||||
};
|
||||
|
||||
template <typename T> class ConcurrentQueue {
|
||||
public:
|
||||
ConcurrentQueue() {}
|
||||
ConcurrentQueue(size_t size) : maxSize(size) {}
|
||||
ConcurrentQueue(const ConcurrentQueue &) = delete;
|
||||
ConcurrentQueue &operator=(const ConcurrentQueue &) = delete;
|
||||
|
||||
void setSize(size_t s) { maxSize = s; }
|
||||
|
||||
void push_back(const T &value) {
|
||||
// Do not use a std::lock_guard here. We will need to explicitly
|
||||
// unlock before notify_one as the other waiting thread will
|
||||
// automatically try to acquire mutex once it wakes up
|
||||
// (which will happen on notify_one)
|
||||
std::unique_lock<std::mutex> lock(m_mutex);
|
||||
auto wasEmpty = m_List.empty();
|
||||
|
||||
while (full()) {
|
||||
m_cond.wait(lock);
|
||||
}
|
||||
|
||||
m_List.push_back(value);
|
||||
if (wasEmpty && !m_List.empty()) {
|
||||
lock.unlock();
|
||||
m_cond.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
T pop_front() {
|
||||
std::unique_lock<std::mutex> lock(m_mutex);
|
||||
|
||||
while (m_List.empty()) {
|
||||
m_cond.wait(lock);
|
||||
}
|
||||
auto wasFull = full();
|
||||
T data = std::move(m_List.front());
|
||||
m_List.pop_front();
|
||||
|
||||
if (wasFull && !full()) {
|
||||
lock.unlock();
|
||||
m_cond.notify_one();
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
T front() {
|
||||
std::unique_lock<std::mutex> lock(m_mutex);
|
||||
|
||||
while (m_List.empty()) {
|
||||
m_cond.wait(lock);
|
||||
}
|
||||
|
||||
return m_List.front();
|
||||
}
|
||||
|
||||
size_t size() {
|
||||
std::unique_lock<std::mutex> lock(m_mutex);
|
||||
return m_List.size();
|
||||
}
|
||||
|
||||
bool empty() {
|
||||
std::unique_lock<std::mutex> lock(m_mutex);
|
||||
return m_List.empty();
|
||||
}
|
||||
void clear() {
|
||||
std::unique_lock<std::mutex> lock(m_mutex);
|
||||
m_List.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
bool full() {
|
||||
if (maxSize > 0 && m_List.size() == maxSize)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
std::list<T> m_List;
|
||||
std::mutex m_mutex;
|
||||
std::condition_variable m_cond;
|
||||
size_t maxSize;
|
||||
};
|
||||
|
||||
inline void CheckInputFile(const char *szInFilePath) {
|
||||
std::ifstream fpIn(szInFilePath, std::ios::in | std::ios::binary);
|
||||
if (fpIn.fail()) {
|
||||
std::ostringstream err;
|
||||
err << "Unable to open input file: " << szInFilePath << std::endl;
|
||||
throw std::invalid_argument(err.str());
|
||||
}
|
||||
}
|
||||
|
||||
inline void ValidateResolution(int nWidth, int nHeight) {
|
||||
|
||||
if (nWidth <= 0 || nHeight <= 0) {
|
||||
std::ostringstream err;
|
||||
err << "Please specify positive non zero resolution as -s WxH. Current resolution is " << nWidth << "x"
|
||||
<< nHeight << std::endl;
|
||||
throw std::invalid_argument(err.str());
|
||||
}
|
||||
}
|
||||
|
||||
template <class COLOR32>
|
||||
void Nv12ToColor32(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 0);
|
||||
template <class COLOR64>
|
||||
void Nv12ToColor64(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 0);
|
||||
|
||||
template <class COLOR32>
|
||||
void P016ToColor32(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 4);
|
||||
template <class COLOR64>
|
||||
void P016ToColor64(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 4);
|
||||
|
||||
template <class COLOR32>
|
||||
void YUV444ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 0);
|
||||
template <class COLOR64>
|
||||
void YUV444ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 0);
|
||||
|
||||
template <class COLOR32>
|
||||
void YUV444P16ToColor32(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 4);
|
||||
template <class COLOR64>
|
||||
void YUV444P16ToColor64(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgra, int nBgraPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 4);
|
||||
|
||||
template <class COLOR32>
|
||||
void Nv12ToColorPlanar(uint8_t *dpNv12, int nNv12Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 0);
|
||||
template <class COLOR32>
|
||||
void P016ToColorPlanar(uint8_t *dpP016, int nP016Pitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 4);
|
||||
|
||||
template <class COLOR32>
|
||||
void YUV444ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 0);
|
||||
template <class COLOR32>
|
||||
void YUV444P16ToColorPlanar(uint8_t *dpYUV444, int nPitch, uint8_t *dpBgrp, int nBgrpPitch, int nWidth, int nHeight,
|
||||
int iMatrix = 4);
|
||||
|
||||
void Bgra64ToP016(uint8_t *dpBgra, int nBgraPitch, uint8_t *dpP016, int nP016Pitch, int nWidth, int nHeight,
|
||||
int iMatrix = 4);
|
||||
|
||||
void ConvertUInt8ToUInt16(uint8_t *dpUInt8, uint16_t *dpUInt16, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
|
||||
void ConvertUInt16ToUInt8(uint16_t *dpUInt16, uint8_t *dpUInt8, int nSrcPitch, int nDestPitch, int nWidth, int nHeight);
|
||||
|
||||
void ResizeNv12(unsigned char *dpDstNv12, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcNv12,
|
||||
int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstNv12UV = nullptr);
|
||||
void ResizeP016(unsigned char *dpDstP016, int nDstPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcP016,
|
||||
int nSrcPitch, int nSrcWidth, int nSrcHeight, unsigned char *dpDstP016UV = nullptr);
|
||||
|
||||
void ScaleYUV420(unsigned char *dpDstY, unsigned char *dpDstU, unsigned char *dpDstV, int nDstPitch,
|
||||
int nDstChromaPitch, int nDstWidth, int nDstHeight, unsigned char *dpSrcY, unsigned char *dpSrcU,
|
||||
unsigned char *dpSrcV, int nSrcPitch, int nSrcChromaPitch, int nSrcWidth, int nSrcHeight,
|
||||
bool bSemiplanar);
|
||||
|
||||
#ifdef __cuda_cuda_h__
|
||||
void ComputeCRC(uint8_t *pBuffer, uint32_t *crcValue, CUstream_st *outputCUStream);
|
||||
#endif
|
Загрузка…
Ссылка в новой задаче