30#define MAX_SUPPORTED_WIDTH 1950
31#define MAX_SUPPORTED_HEIGHT 1100
34#include "libavutil/hwcontext_vaapi.h"
36typedef struct VAAPIDecodeContext {
38 VAEntrypoint va_entrypoint;
40 VAContextID va_context;
42#if FF_API_STRUCT_VAAPI_CONTEXT
45 struct vaapi_context *old_context;
46 AVBufferRef *device_ref;
50 AVHWDeviceContext *device;
51 AVVAAPIDeviceContext *hwctx;
53 AVHWFramesContext *frames;
54 AVVAAPIFramesContext *hwfc;
56 enum AVPixelFormat surface_format;
72 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
73 path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
74 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
76 video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
77 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 pts_offset_seconds = NO_PTS_OFFSET;
86 video_pts_seconds = NO_PTS_OFFSET;
87 audio_pts_seconds = NO_PTS_OFFSET;
116 if (abs(diff) <= amount)
127static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
129 const enum AVPixelFormat *p;
131 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
135 case AV_PIX_FMT_VAAPI:
140 case AV_PIX_FMT_VDPAU:
148 case AV_PIX_FMT_DXVA2_VLD:
153 case AV_PIX_FMT_D3D11:
159#if defined(__APPLE__)
161 case AV_PIX_FMT_VIDEOTOOLBOX:
168 case AV_PIX_FMT_CUDA:
184 return AV_PIX_FMT_NONE;
187int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
191 case AV_CODEC_ID_H264:
192 case AV_CODEC_ID_MPEG2VIDEO:
193 case AV_CODEC_ID_VC1:
194 case AV_CODEC_ID_WMV1:
195 case AV_CODEC_ID_WMV2:
196 case AV_CODEC_ID_WMV3:
211 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
221 if (avformat_open_input(&pFormatCtx, path.c_str(), NULL, NULL) != 0)
222 throw InvalidFile(
"File could not be opened.", path);
225 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
232 packet_status.
reset(
true);
235 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
237 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
244 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
251 if (videoStream == -1 && audioStream == -1)
252 throw NoStreamsFound(
"No video or audio streams found in this file.", path);
255 if (videoStream != -1) {
260 pStream = pFormatCtx->streams[videoStream];
266 const AVCodec *pCodec = avcodec_find_decoder(codecId);
267 AVDictionary *opts = NULL;
268 int retry_decode_open = 2;
273 if (
hw_de_on && (retry_decode_open==2)) {
275 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
278 retry_decode_open = 0;
283 if (pCodec == NULL) {
284 throw InvalidCodec(
"A valid video codec could not be found for this file.", path);
288 av_dict_set(&opts,
"strict",
"experimental", 0);
292 int i_decoder_hw = 0;
294 char *adapter_ptr = NULL;
297 fprintf(stderr,
"Hardware decoding device number: %d\n", adapter_num);
300 pCodecCtx->get_format = get_hw_dec_format;
302 if (adapter_num < 3 && adapter_num >=0) {
304 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
305 adapter_ptr = adapter;
307 switch (i_decoder_hw) {
309 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
312 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
315 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
318 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
321 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
328 switch (i_decoder_hw) {
330 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
333 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
336 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
339 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
342 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
345#elif defined(__APPLE__)
348 switch (i_decoder_hw) {
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
353 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
356 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
367 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
369 if( adapter_ptr != NULL ) {
370#elif defined(__APPLE__)
371 if( adapter_ptr != NULL ) {
373 if( adapter_ptr != NULL ) {
382 hw_device_ctx = NULL;
384 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
385 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
386 throw InvalidCodec(
"Hardware device reference create failed.", path);
417 throw InvalidCodec(
"Hardware device create failed.", path);
427 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
431 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &opts);
432 if (avcodec_return < 0) {
433 std::stringstream avcodec_error_msg;
434 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
440 AVHWFramesConstraints *constraints = NULL;
441 void *hwconfig = NULL;
442 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
446 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
447 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
450 if (pCodecCtx->coded_width < constraints->min_width ||
451 pCodecCtx->coded_height < constraints->min_height ||
452 pCodecCtx->coded_width > constraints->max_width ||
453 pCodecCtx->coded_height > constraints->max_height) {
456 retry_decode_open = 1;
459 av_buffer_unref(&hw_device_ctx);
460 hw_device_ctx = NULL;
465 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
466 retry_decode_open = 0;
468 av_hwframe_constraints_free(&constraints);
481 if (pCodecCtx->coded_width < 0 ||
482 pCodecCtx->coded_height < 0 ||
483 pCodecCtx->coded_width > max_w ||
484 pCodecCtx->coded_height > max_h ) {
485 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
487 retry_decode_open = 1;
490 av_buffer_unref(&hw_device_ctx);
491 hw_device_ctx = NULL;
495 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
496 retry_decode_open = 0;
504 retry_decode_open = 0;
506 }
while (retry_decode_open);
515 if (audioStream != -1) {
520 aStream = pFormatCtx->streams[audioStream];
526 const AVCodec *aCodec = avcodec_find_decoder(codecId);
532 if (aCodec == NULL) {
533 throw InvalidCodec(
"A valid audio codec could not be found for this file.", path);
537 AVDictionary *opts = NULL;
538 av_dict_set(&opts,
"strict",
"experimental", 0);
541 if (avcodec_open2(aCodecCtx, aCodec, &opts) < 0)
542 throw InvalidCodec(
"An audio codec was found, but could not be opened.", path);
552 AVDictionaryEntry *tag = NULL;
553 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
554 QString str_key = tag->key;
555 QString str_value = tag->value;
556 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
560 previous_packet_location.
frame = -1;
592 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
598 AVPacket *recent_packet = packet;
603 int max_attempts = 128;
608 "attempts", attempts);
620 RemoveAVPacket(recent_packet);
625 if(avcodec_is_open(pCodecCtx)) {
626 avcodec_flush_buffers(pCodecCtx);
632 av_buffer_unref(&hw_device_ctx);
633 hw_device_ctx = NULL;
641 if(avcodec_is_open(aCodecCtx)) {
642 avcodec_flush_buffers(aCodecCtx);
649 working_cache.
Clear();
652 avformat_close_input(&pFormatCtx);
653 av_freep(&pFormatCtx);
658 largest_frame_processed = 0;
659 seek_audio_frame_found = 0;
660 seek_video_frame_found = 0;
661 current_video_frame = 0;
662 last_video_frame.reset();
666bool FFmpegReader::HasAlbumArt() {
670 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
671 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
674void FFmpegReader::UpdateAudioInfo() {
686 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
702 if (aStream->duration > 0 && aStream->duration >
info.
duration) {
705 }
else if (pFormatCtx->duration > 0 &&
info.
duration <= 0.0f) {
707 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
750 AVDictionaryEntry *tag = NULL;
751 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
752 QString str_key = tag->key;
753 QString str_value = tag->value;
754 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
758void FFmpegReader::UpdateVideoInfo() {
766 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
773 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
785 if (pStream->sample_aspect_ratio.num != 0) {
808 if (!check_interlace) {
809 check_interlace =
true;
811 switch(field_order) {
812 case AV_FIELD_PROGRESSIVE:
825 case AV_FIELD_UNKNOWN:
827 check_interlace =
false;
842 if (
info.
duration <= 0.0f && pFormatCtx->duration >= 0) {
844 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
854 if (
info.
duration <= 0.0f && pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
872 is_duration_known =
false;
875 is_duration_known =
true;
885 AVDictionaryEntry *tag = NULL;
886 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
887 QString str_key = tag->key;
888 QString str_value = tag->value;
889 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
894 return this->is_duration_known;
900 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.", path);
903 if (requested_frame < 1)
909 throw InvalidFile(
"Could not detect the duration of the video or audio stream.", path);
925 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
939 int64_t diff = requested_frame - last_frame;
940 if (diff >= 1 && diff <= 20) {
942 frame = ReadStream(requested_frame);
947 Seek(requested_frame);
956 frame = ReadStream(requested_frame);
964std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
966 bool check_seek =
false;
967 int packet_error = -1;
977 CheckWorkingFrames(requested_frame);
982 if (is_cache_found) {
986 if (!hold_packet || !packet) {
988 packet_error = GetNextPacket();
989 if (packet_error < 0 && !packet) {
1000 check_seek = CheckSeek(
false);
1012 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1016 ProcessVideoPacket(requested_frame);
1019 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1023 ProcessAudioPacket(requested_frame);
1028 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1029 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1031 if (packet->stream_index == videoStream) {
1033 }
else if (packet->stream_index == audioStream) {
1039 RemoveAVPacket(packet);
1049 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1066 "largest_frame_processed", largest_frame_processed,
1067 "Working Cache Count", working_cache.
Count());
1076 CheckWorkingFrames(requested_frame);
1092 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1095 if (!frame->has_image_data) {
1100 frame->AddAudioSilence(samples_in_frame);
1105 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1107 f->AddAudioSilence(samples_in_frame);
1115int FFmpegReader::GetNextPacket() {
1116 int found_packet = 0;
1117 AVPacket *next_packet;
1118 next_packet =
new AVPacket();
1119 found_packet = av_read_frame(pFormatCtx, next_packet);
1123 RemoveAVPacket(packet);
1126 if (found_packet >= 0) {
1128 packet = next_packet;
1131 if (packet->stream_index == videoStream) {
1133 }
else if (packet->stream_index == audioStream) {
1142 return found_packet;
1146bool FFmpegReader::GetAVFrame() {
1147 int frameFinished = 0;
1153 int send_packet_err = 0;
1154 int64_t send_packet_pts = 0;
1155 if ((packet && packet->stream_index == videoStream) || !packet) {
1156 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1158 if (packet && send_packet_err >= 0) {
1159 send_packet_pts = GetPacketPTS();
1160 hold_packet =
false;
1170 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1171 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1172 if (send_packet_err == AVERROR(EAGAIN)) {
1174 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1176 if (send_packet_err == AVERROR(EINVAL)) {
1177 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1179 if (send_packet_err == AVERROR(ENOMEM)) {
1180 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1187 int receive_frame_err = 0;
1188 AVFrame *next_frame2;
1196 next_frame2 = next_frame;
1199 while (receive_frame_err >= 0) {
1200 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1202 if (receive_frame_err != 0) {
1203 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1205 if (receive_frame_err == AVERROR_EOF) {
1207 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1208 avcodec_flush_buffers(pCodecCtx);
1211 if (receive_frame_err == AVERROR(EINVAL)) {
1213 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1214 avcodec_flush_buffers(pCodecCtx);
1216 if (receive_frame_err == AVERROR(EAGAIN)) {
1218 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1220 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1222 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1233 if (next_frame2->format == hw_de_av_pix_fmt) {
1234 next_frame->format = AV_PIX_FMT_YUV420P;
1235 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1238 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1246 next_frame = next_frame2;
1254 av_image_alloc(pFrame->data, pFrame->linesize,
info.
width,
info.
height, (AVPixelFormat)(pStream->codecpar->format), 1);
1255 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1262 if (next_frame->pts != AV_NOPTS_VALUE) {
1265 video_pts = next_frame->pts;
1266 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1268 video_pts = next_frame->pkt_dts;
1272 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1283 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1289 if (frameFinished) {
1293 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1302 return frameFinished;
1306bool FFmpegReader::CheckSeek(
bool is_video) {
1311 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1319 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1322 if (max_seeked_frame >= seeking_frame) {
1325 "is_video_seek", is_video_seek,
1326 "max_seeked_frame", max_seeked_frame,
1327 "seeking_frame", seeking_frame,
1328 "seeking_pts", seeking_pts,
1329 "seek_video_frame_found", seek_video_frame_found,
1330 "seek_audio_frame_found", seek_audio_frame_found);
1333 Seek(seeking_frame - (10 * seek_count * seek_count));
1337 "is_video_seek", is_video_seek,
1338 "packet->pts", GetPacketPTS(),
1339 "seeking_pts", seeking_pts,
1340 "seeking_frame", seeking_frame,
1341 "seek_video_frame_found", seek_video_frame_found,
1342 "seek_audio_frame_found", seek_audio_frame_found);
1356void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1359 int frame_finished = GetAVFrame();
1362 if (!frame_finished) {
1365 RemoveAVFrame(pFrame);
1371 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1374 if (!seek_video_frame_found && is_seeking)
1375 seek_video_frame_found = current_frame;
1381 working_cache.
Add(CreateFrame(requested_frame));
1393 AVFrame *pFrameRGB =
nullptr;
1394 uint8_t *buffer =
nullptr;
1398 if (pFrameRGB ==
nullptr)
1399 throw OutOfMemory(
"Failed to allocate frame buffer", path);
1420 max_width = std::max(
float(max_width), max_width * max_scale_x);
1421 max_height = std::max(
float(max_height), max_height * max_scale_y);
1427 QSize width_size(max_width * max_scale_x,
1430 max_height * max_scale_y);
1432 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1433 max_width = std::max(max_width, width_size.width());
1434 max_height = std::max(max_height, width_size.height());
1436 max_width = std::max(max_width, height_size.width());
1437 max_height = std::max(max_height, height_size.height());
1444 float preview_ratio = 1.0;
1451 max_width =
info.
width * max_scale_x * preview_ratio;
1452 max_height =
info.
height * max_scale_y * preview_ratio;
1457 int original_height = height;
1458 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1460 float ratio = float(width) / float(height);
1461 int possible_width = round(max_height * ratio);
1462 int possible_height = round(max_width / ratio);
1464 if (possible_width <= max_width) {
1466 width = possible_width;
1467 height = max_height;
1471 height = possible_height;
1476 const int bytes_per_pixel = 4;
1477 int buffer_size = (width * height * bytes_per_pixel) + 128;
1478 buffer =
new unsigned char[buffer_size]();
1483 int scale_mode = SWS_FAST_BILINEAR;
1485 scale_mode = SWS_BICUBIC;
1491 sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1492 original_height, pFrameRGB->data, pFrameRGB->linesize);
1495 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1500 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1503 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1507 working_cache.
Add(f);
1510 last_video_frame = f;
1516 RemoveAVFrame(pFrame);
1517 sws_freeContext(img_convert_ctx);
1523 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1527void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1530 if (packet && packet->pts != AV_NOPTS_VALUE) {
1532 location = GetAudioPTSLocation(packet->pts);
1535 if (!seek_audio_frame_found && is_seeking)
1536 seek_audio_frame_found = location.
frame;
1543 working_cache.
Add(CreateFrame(requested_frame));
1547 "requested_frame", requested_frame,
1548 "target_frame", location.
frame,
1552 int frame_finished = 0;
1556 int packet_samples = 0;
1560 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1561 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1565 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1566 if (receive_frame_err >= 0) {
1569 if (receive_frame_err == AVERROR_EOF) {
1573 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1575 avcodec_flush_buffers(aCodecCtx);
1577 if (receive_frame_err != 0) {
1582 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1585 if (frame_finished) {
1591 audio_pts = audio_frame->pts;
1594 location = GetAudioPTSLocation(audio_pts);
1597 int plane_size = -1;
1598 data_size = av_samples_get_buffer_size(&plane_size,
1600 audio_frame->nb_samples,
1613 int pts_remaining_samples = packet_samples /
info.
channels;
1616 if (pts_remaining_samples == 0) {
1618 "packet_samples", packet_samples,
1620 "pts_remaining_samples", pts_remaining_samples);
1624 while (pts_remaining_samples) {
1629 int samples = samples_per_frame - previous_packet_location.
sample_start;
1630 if (samples > pts_remaining_samples)
1631 samples = pts_remaining_samples;
1634 pts_remaining_samples -= samples;
1636 if (pts_remaining_samples > 0) {
1638 previous_packet_location.
frame++;
1650 "packet_samples", packet_samples,
1654 "AV_SAMPLE_FMT_S16", AV_SAMPLE_FMT_S16);
1659 audio_converted->nb_samples = audio_frame->nb_samples;
1660 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_S16, 0);
1670 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
1679 audio_converted->data,
1680 audio_converted->linesize[0],
1681 audio_converted->nb_samples,
1683 audio_frame->linesize[0],
1684 audio_frame->nb_samples);
1688 audio_converted->data[0],
1689 static_cast<size_t>(audio_converted->nb_samples)
1690 * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16)
1699 av_free(audio_converted->data[0]);
1702 int64_t starting_frame_number = -1;
1703 bool partial_frame =
true;
1704 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
1706 starting_frame_number = location.
frame;
1707 int channel_buffer_size = packet_samples /
info.
channels;
1708 float *channel_buffer =
new float[channel_buffer_size];
1711 for (
int z = 0; z < channel_buffer_size; z++)
1712 channel_buffer[z] = 0.0f;
1718 for (
int sample = 0; sample < packet_samples; sample++) {
1720 if (channel_filter == channel) {
1722 channel_buffer[position] = audio_buf[sample] * (1.0f / (1 << 15));
1739 int remaining_samples = channel_buffer_size;
1740 float *iterate_channel_buffer = channel_buffer;
1741 while (remaining_samples > 0) {
1747 int samples = samples_per_frame - start;
1748 if (samples > remaining_samples)
1749 samples = remaining_samples;
1752 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1755 if (samples_per_frame == start + samples)
1756 partial_frame =
false;
1758 partial_frame =
true;
1761 f->AddAudio(
true, channel_filter, start, iterate_channel_buffer,
1766 "frame", starting_frame_number,
1769 "channel", channel_filter,
1770 "partial_frame", partial_frame,
1771 "samples_per_frame", samples_per_frame);
1774 working_cache.
Add(f);
1777 remaining_samples -= samples;
1780 if (remaining_samples > 0)
1781 iterate_channel_buffer += samples;
1784 starting_frame_number++;
1791 delete[] channel_buffer;
1792 channel_buffer = NULL;
1793 iterate_channel_buffer = NULL;
1808 "requested_frame", requested_frame,
1809 "starting_frame", location.
frame,
1810 "end_frame", starting_frame_number - 1,
1811 "audio_pts_seconds", audio_pts_seconds);
1817void FFmpegReader::Seek(int64_t requested_frame) {
1819 if (requested_frame < 1)
1820 requested_frame = 1;
1823 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
1830 "requested_frame", requested_frame,
1831 "seek_count", seek_count,
1832 "last_frame", last_frame);
1835 working_cache.
Clear();
1839 video_pts_seconds = NO_PTS_OFFSET;
1841 audio_pts_seconds = NO_PTS_OFFSET;
1842 hold_packet =
false;
1844 current_video_frame = 0;
1845 largest_frame_processed = 0;
1850 packet_status.
reset(
false);
1856 int buffer_amount = std::max(max_concurrent_frames, 8);
1857 if (requested_frame - buffer_amount < 20) {
1871 if (seek_count == 1) {
1874 seeking_pts = ConvertFrameToVideoPTS(1);
1876 seek_audio_frame_found = 0;
1877 seek_video_frame_found = 0;
1881 bool seek_worked =
false;
1882 int64_t seek_target = 0;
1886 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1888 fprintf(stderr,
"%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1891 is_video_seek =
true;
1898 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1900 fprintf(stderr,
"%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1903 is_video_seek =
false;
1912 avcodec_flush_buffers(aCodecCtx);
1916 avcodec_flush_buffers(pCodecCtx);
1919 previous_packet_location.
frame = -1;
1924 if (seek_count == 1) {
1926 seeking_pts = seek_target;
1927 seeking_frame = requested_frame;
1929 seek_audio_frame_found = 0;
1930 seek_video_frame_found = 0;
1958int64_t FFmpegReader::GetPacketPTS() {
1960 int64_t current_pts = packet->pts;
1961 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
1962 current_pts = packet->dts;
1968 return AV_NOPTS_VALUE;
1973void FFmpegReader::UpdatePTSOffset() {
1974 if (pts_offset_seconds != NO_PTS_OFFSET) {
1978 pts_offset_seconds = 0.0;
1979 double video_pts_offset_seconds = 0.0;
1980 double audio_pts_offset_seconds = 0.0;
1982 bool has_video_pts =
false;
1985 has_video_pts =
true;
1987 bool has_audio_pts =
false;
1990 has_audio_pts =
true;
1994 while (!has_video_pts || !has_audio_pts) {
1996 if (GetNextPacket() < 0)
2001 int64_t pts = GetPacketPTS();
2004 if (!has_video_pts && packet->stream_index == videoStream) {
2010 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2011 has_video_pts =
true;
2014 else if (!has_audio_pts && packet->stream_index == audioStream) {
2020 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2021 has_audio_pts =
true;
2027 if (has_video_pts && has_audio_pts) {
2039 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
2044int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2046 int64_t previous_video_frame = current_video_frame;
2055 if (current_video_frame == 0)
2056 current_video_frame = frame;
2060 if (frame == previous_video_frame) {
2065 current_video_frame++;
2074int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2076 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2086int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2088 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2098AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2106 int64_t whole_frame = int64_t(frame);
2109 double sample_start_percentage = frame - double(whole_frame);
2115 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2118 if (whole_frame < 1)
2120 if (sample_start < 0)
2127 if (previous_packet_location.
frame != -1) {
2128 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2129 int64_t orig_frame = location.
frame;
2134 location.
frame = previous_packet_location.
frame;
2137 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2146 previous_packet_location = location;
2153std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2155 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2159 output = working_cache.
GetFrame(requested_frame);
2160 if(output)
return output;
2168 working_cache.
Add(output);
2171 if (requested_frame > largest_frame_processed)
2172 largest_frame_processed = requested_frame;
2179bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2182 bool seek_trash =
false;
2183 int64_t max_seeked_frame = seek_audio_frame_found;
2184 if (seek_video_frame_found > max_seeked_frame) {
2185 max_seeked_frame = seek_video_frame_found;
2187 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2188 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2196void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2199 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2202 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2203 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2206 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2209 std::shared_ptr<Frame> f = *working_itr;
2212 if (!f || f->number > requested_frame) {
2218 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2219 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2222 bool is_video_ready =
false;
2223 bool is_audio_ready =
false;
2224 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2225 if ((frame_pts_seconds <= video_pts_seconds)
2226 || (recent_pts_diff > 1.5)
2230 is_video_ready =
true;
2232 "frame_number", f->number,
2233 "frame_pts_seconds", frame_pts_seconds,
2234 "video_pts_seconds", video_pts_seconds,
2235 "recent_pts_diff", recent_pts_diff);
2239 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2241 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2243 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2248 if (last_video_frame && !f->has_image_data) {
2250 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2251 }
else if (!f->has_image_data) {
2252 f->AddColor(
"#000000");
2257 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2258 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2259 || (recent_pts_diff > 1.5)
2264 is_audio_ready =
true;
2266 "frame_number", f->number,
2267 "frame_pts_seconds", frame_pts_seconds,
2268 "audio_pts_seconds", audio_pts_seconds,
2269 "audio_pts_diff", audio_pts_diff,
2270 "recent_pts_diff", recent_pts_diff);
2272 bool is_seek_trash = IsPartialFrame(f->number);
2280 "frame_number", f->number,
2281 "is_video_ready", is_video_ready,
2282 "is_audio_ready", is_audio_ready,
2288 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2291 "requested_frame", requested_frame,
2292 "f->number", f->number,
2293 "is_seek_trash", is_seek_trash,
2294 "Working Cache Count", working_cache.
Count(),
2298 if (!is_seek_trash) {
2303 working_cache.
Remove(f->number);
2306 last_frame = f->number;
2309 working_cache.
Remove(f->number);
2316 working_frames.clear();
2317 working_frames.shrink_to_fit();
2321void FFmpegReader::CheckFPS() {
2329 int frames_per_second[3] = {0,0,0};
2330 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2333 int all_frames_detected = 0;
2334 int starting_frames_detected = 0;
2339 if (GetNextPacket() < 0)
2344 if (packet->stream_index == videoStream) {
2347 fps_index = int(video_seconds);
2350 if (fps_index >= 0 && fps_index < max_fps_index) {
2352 starting_frames_detected++;
2353 frames_per_second[fps_index]++;
2357 all_frames_detected++;
2362 float avg_fps = 30.0;
2363 if (starting_frames_detected > 0 && fps_index > 0) {
2364 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2368 if (avg_fps < 8.0) {
2377 if (all_frames_detected > 0) {
2391void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2395 av_freep(&remove_frame->data[0]);
2403void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2408 delete remove_packet;
2423 root[
"type"] =
"FFmpegReader";
2424 root[
"path"] = path;
2439 catch (
const std::exception& e) {
2441 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2452 if (!root[
"path"].isNull())
2453 path = root[
"path"].asString();
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AVCODEC_MAX_AUDIO_FRAME_SIZE
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define MY_INPUT_BUFFER_PADDING_SIZE
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
#define FF_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
void Open() override
Open File - which is called by the constructor automatically.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)