30#define MAX_SUPPORTED_WIDTH 1950
31#define MAX_SUPPORTED_HEIGHT 1100
34#include "libavutil/hwcontext_vaapi.h"
36typedef struct VAAPIDecodeContext {
38 VAEntrypoint va_entrypoint;
40 VAContextID va_context;
42#if FF_API_STRUCT_VAAPI_CONTEXT
45 struct vaapi_context *old_context;
46 AVBufferRef *device_ref;
50 AVHWDeviceContext *device;
51 AVVAAPIDeviceContext *hwctx;
53 AVHWFramesContext *frames;
54 AVVAAPIFramesContext *hwfc;
56 enum AVPixelFormat surface_format;
72 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
73 path(
path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
74 seek_audio_frame_found(0), seek_video_frame_found(0),is_duration_known(false), largest_frame_processed(0),
76 video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
77 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
85 pts_offset_seconds = NO_PTS_OFFSET;
86 video_pts_seconds = NO_PTS_OFFSET;
87 audio_pts_seconds = NO_PTS_OFFSET;
116 if (abs(diff) <= amount)
127static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
129 const enum AVPixelFormat *p;
131 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
133#if defined(__linux__)
135 case AV_PIX_FMT_VAAPI:
140 case AV_PIX_FMT_VDPAU:
148 case AV_PIX_FMT_DXVA2_VLD:
153 case AV_PIX_FMT_D3D11:
159#if defined(__APPLE__)
161 case AV_PIX_FMT_VIDEOTOOLBOX:
168 case AV_PIX_FMT_CUDA:
184 return AV_PIX_FMT_NONE;
187int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
191 case AV_CODEC_ID_H264:
192 case AV_CODEC_ID_MPEG2VIDEO:
193 case AV_CODEC_ID_VC1:
194 case AV_CODEC_ID_WMV1:
195 case AV_CODEC_ID_WMV2:
196 case AV_CODEC_ID_WMV3:
211 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
221 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
225 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
232 packet_status.
reset(
true);
235 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
237 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
244 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
251 if (videoStream == -1 && audioStream == -1)
255 if (videoStream != -1) {
260 pStream = pFormatCtx->streams[videoStream];
266 const AVCodec *pCodec = avcodec_find_decoder(codecId);
267 AVDictionary *
opts = NULL;
268 int retry_decode_open = 2;
273 if (
hw_de_on && (retry_decode_open==2)) {
275 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
278 retry_decode_open = 0;
283 if (pCodec == NULL) {
284 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
288 av_dict_set(&
opts,
"strict",
"experimental", 0);
292 int i_decoder_hw = 0;
294 char *adapter_ptr = NULL;
297 fprintf(stderr,
"Hardware decoding device number: %d\n", adapter_num);
300 pCodecCtx->get_format = get_hw_dec_format;
302 if (adapter_num < 3 && adapter_num >=0) {
303#if defined(__linux__)
304 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
305 adapter_ptr = adapter;
307 switch (i_decoder_hw) {
309 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
312 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
315 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
318 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
321 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
328 switch (i_decoder_hw) {
330 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
333 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
336 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
339 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
342 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
345#elif defined(__APPLE__)
348 switch (i_decoder_hw) {
350 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
353 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
356 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
366#if defined(__linux__)
367 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
369 if( adapter_ptr != NULL ) {
370#elif defined(__APPLE__)
371 if( adapter_ptr != NULL ) {
380 hw_device_ctx = NULL;
382 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
383 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
425 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
429 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
430 if (avcodec_return < 0) {
431 std::stringstream avcodec_error_msg;
432 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
438 AVHWFramesConstraints *constraints = NULL;
439 void *hwconfig = NULL;
440 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
444 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
445 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
448 if (pCodecCtx->coded_width < constraints->min_width ||
449 pCodecCtx->coded_height < constraints->min_height ||
450 pCodecCtx->coded_width > constraints->max_width ||
451 pCodecCtx->coded_height > constraints->max_height) {
454 retry_decode_open = 1;
457 av_buffer_unref(&hw_device_ctx);
458 hw_device_ctx = NULL;
463 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
464 retry_decode_open = 0;
466 av_hwframe_constraints_free(&constraints);
479 if (pCodecCtx->coded_width < 0 ||
480 pCodecCtx->coded_height < 0 ||
481 pCodecCtx->coded_width > max_w ||
482 pCodecCtx->coded_height > max_h ) {
483 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
485 retry_decode_open = 1;
488 av_buffer_unref(&hw_device_ctx);
489 hw_device_ctx = NULL;
493 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
494 retry_decode_open = 0;
502 retry_decode_open = 0;
504 }
while (retry_decode_open);
513 if (audioStream != -1) {
518 aStream = pFormatCtx->streams[audioStream];
524 const AVCodec *aCodec = avcodec_find_decoder(codecId);
530 if (aCodec == NULL) {
531 throw InvalidCodec(
"A valid audio codec could not be found for this file.",
path);
535 AVDictionary *
opts = NULL;
536 av_dict_set(&
opts,
"strict",
"experimental", 0);
539 if (avcodec_open2(aCodecCtx, aCodec, &
opts) < 0)
540 throw InvalidCodec(
"An audio codec was found, but could not be opened.",
path);
550 AVDictionaryEntry *tag = NULL;
551 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
552 QString str_key = tag->key;
553 QString str_value = tag->value;
554 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
558 previous_packet_location.
frame = -1;
590 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
596 AVPacket *recent_packet = packet;
601 int max_attempts = 128;
606 "attempts", attempts);
618 RemoveAVPacket(recent_packet);
623 if(avcodec_is_open(pCodecCtx)) {
624 avcodec_flush_buffers(pCodecCtx);
630 av_buffer_unref(&hw_device_ctx);
631 hw_device_ctx = NULL;
639 if(avcodec_is_open(aCodecCtx)) {
640 avcodec_flush_buffers(aCodecCtx);
647 working_cache.
Clear();
650 avformat_close_input(&pFormatCtx);
651 av_freep(&pFormatCtx);
656 largest_frame_processed = 0;
657 seek_audio_frame_found = 0;
658 seek_video_frame_found = 0;
659 current_video_frame = 0;
660 last_video_frame.reset();
664bool FFmpegReader::HasAlbumArt() {
668 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
669 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
672void FFmpegReader::UpdateAudioInfo() {
684 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
700 if (aStream->duration > 0 && aStream->duration >
info.
duration) {
703 }
else if (pFormatCtx->duration > 0 &&
info.
duration <= 0.0f) {
705 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
748 AVDictionaryEntry *tag = NULL;
749 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
750 QString str_key = tag->key;
751 QString str_value = tag->value;
752 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
756void FFmpegReader::UpdateVideoInfo() {
764 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
771 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
783 if (pStream->sample_aspect_ratio.num != 0) {
806 if (!check_interlace) {
807 check_interlace =
true;
809 switch(field_order) {
810 case AV_FIELD_PROGRESSIVE:
823 case AV_FIELD_UNKNOWN:
825 check_interlace =
false;
840 if (
info.
duration <= 0.0f && pFormatCtx->duration >= 0) {
842 info.
duration = float(pFormatCtx->duration) / AV_TIME_BASE;
852 if (
info.
duration <= 0.0f && pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
870 is_duration_known =
false;
873 is_duration_known =
true;
883 AVDictionaryEntry *tag = NULL;
884 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
885 QString str_key = tag->key;
886 QString str_value = tag->value;
887 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
892 return this->is_duration_known;
898 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
901 if (requested_frame < 1)
907 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
923 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
937 int64_t diff = requested_frame - last_frame;
938 if (diff >= 1 && diff <= 20) {
940 frame = ReadStream(requested_frame);
945 Seek(requested_frame);
954 frame = ReadStream(requested_frame);
962std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
964 bool check_seek =
false;
965 int packet_error = -1;
975 CheckWorkingFrames(requested_frame);
980 if (is_cache_found) {
984 if (!hold_packet || !packet) {
986 packet_error = GetNextPacket();
987 if (packet_error < 0 && !packet) {
998 check_seek = CheckSeek(
false);
1010 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1014 ProcessVideoPacket(requested_frame);
1017 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1021 ProcessAudioPacket(requested_frame);
1026 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1027 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1029 if (packet->stream_index == videoStream) {
1031 }
else if (packet->stream_index == audioStream) {
1037 RemoveAVPacket(packet);
1047 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1064 "largest_frame_processed", largest_frame_processed,
1065 "Working Cache Count", working_cache.
Count());
1074 CheckWorkingFrames(requested_frame);
1090 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1093 if (!frame->has_image_data) {
1098 frame->AddAudioSilence(samples_in_frame);
1103 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1105 f->AddAudioSilence(samples_in_frame);
1113int FFmpegReader::GetNextPacket() {
1114 int found_packet = 0;
1115 AVPacket *next_packet;
1116 next_packet =
new AVPacket();
1117 found_packet = av_read_frame(pFormatCtx, next_packet);
1121 RemoveAVPacket(packet);
1124 if (found_packet >= 0) {
1126 packet = next_packet;
1129 if (packet->stream_index == videoStream) {
1131 }
else if (packet->stream_index == audioStream) {
1140 return found_packet;
1144bool FFmpegReader::GetAVFrame() {
1145 int frameFinished = 0;
1151 int send_packet_err = 0;
1152 int64_t send_packet_pts = 0;
1153 if ((packet && packet->stream_index == videoStream) || !packet) {
1154 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1156 if (packet && send_packet_err >= 0) {
1157 send_packet_pts = GetPacketPTS();
1158 hold_packet =
false;
1168 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1169 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1170 if (send_packet_err == AVERROR(EAGAIN)) {
1172 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1174 if (send_packet_err == AVERROR(EINVAL)) {
1175 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1177 if (send_packet_err == AVERROR(ENOMEM)) {
1178 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1185 int receive_frame_err = 0;
1186 AVFrame *next_frame2;
1194 next_frame2 = next_frame;
1197 while (receive_frame_err >= 0) {
1198 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1200 if (receive_frame_err != 0) {
1201 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1203 if (receive_frame_err == AVERROR_EOF) {
1205 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1206 avcodec_flush_buffers(pCodecCtx);
1209 if (receive_frame_err == AVERROR(EINVAL)) {
1211 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1212 avcodec_flush_buffers(pCodecCtx);
1214 if (receive_frame_err == AVERROR(EAGAIN)) {
1216 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1218 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1220 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1231 if (next_frame2->format == hw_de_av_pix_fmt) {
1232 next_frame->format = AV_PIX_FMT_YUV420P;
1233 if ((err = av_hwframe_transfer_data(next_frame,next_frame2,0)) < 0) {
1236 if ((err = av_frame_copy_props(next_frame,next_frame2)) < 0) {
1244 next_frame = next_frame2;
1252 av_image_alloc(pFrame->data, pFrame->linesize,
info.
width,
info.
height, (AVPixelFormat)(pStream->codecpar->format), 1);
1253 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)next_frame->data, next_frame->linesize,
1260 if (next_frame->pts != AV_NOPTS_VALUE) {
1263 video_pts = next_frame->pts;
1264 }
else if (next_frame->pkt_dts != AV_NOPTS_VALUE) {
1266 video_pts = next_frame->pkt_dts;
1270 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1281 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1287 if (frameFinished) {
1291 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1300 return frameFinished;
1304bool FFmpegReader::CheckSeek(
bool is_video) {
1309 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1317 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1320 if (max_seeked_frame >= seeking_frame) {
1323 "is_video_seek", is_video_seek,
1324 "max_seeked_frame", max_seeked_frame,
1325 "seeking_frame", seeking_frame,
1326 "seeking_pts", seeking_pts,
1327 "seek_video_frame_found", seek_video_frame_found,
1328 "seek_audio_frame_found", seek_audio_frame_found);
1331 Seek(seeking_frame - (10 * seek_count * seek_count));
1335 "is_video_seek", is_video_seek,
1336 "packet->pts", GetPacketPTS(),
1337 "seeking_pts", seeking_pts,
1338 "seeking_frame", seeking_frame,
1339 "seek_video_frame_found", seek_video_frame_found,
1340 "seek_audio_frame_found", seek_audio_frame_found);
1354void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1357 int frame_finished = GetAVFrame();
1360 if (!frame_finished) {
1363 RemoveAVFrame(pFrame);
1369 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1372 if (!seek_video_frame_found && is_seeking)
1373 seek_video_frame_found = current_frame;
1379 working_cache.
Add(CreateFrame(requested_frame));
1391 AVFrame *pFrameRGB =
nullptr;
1392 uint8_t *buffer =
nullptr;
1396 if (pFrameRGB ==
nullptr)
1418 max_width = std::max(
float(max_width), max_width * max_scale_x);
1419 max_height = std::max(
float(max_height), max_height * max_scale_y);
1425 QSize width_size(max_width * max_scale_x,
1428 max_height * max_scale_y);
1430 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1431 max_width = std::max(max_width, width_size.width());
1432 max_height = std::max(max_height, width_size.height());
1434 max_width = std::max(max_width, height_size.width());
1435 max_height = std::max(max_height, height_size.height());
1442 float preview_ratio = 1.0;
1449 max_width =
info.
width * max_scale_x * preview_ratio;
1450 max_height =
info.
height * max_scale_y * preview_ratio;
1455 int original_height = height;
1456 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1458 float ratio = float(width) / float(height);
1459 int possible_width = round(max_height * ratio);
1460 int possible_height = round(max_width / ratio);
1462 if (possible_width <= max_width) {
1464 width = possible_width;
1465 height = max_height;
1469 height = possible_height;
1474 const int bytes_per_pixel = 4;
1475 int buffer_size = (width * height * bytes_per_pixel) + 128;
1476 buffer =
new unsigned char[buffer_size]();
1481 int scale_mode = SWS_FAST_BILINEAR;
1483 scale_mode = SWS_BICUBIC;
1489 sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
1490 original_height, pFrameRGB->data, pFrameRGB->linesize);
1493 std::shared_ptr<Frame> f = CreateFrame(current_frame);
1498 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
1501 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
1505 working_cache.
Add(f);
1508 last_video_frame = f;
1514 RemoveAVFrame(pFrame);
1515 sws_freeContext(img_convert_ctx);
1521 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
1525void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
1528 if (packet && packet->pts != AV_NOPTS_VALUE) {
1530 location = GetAudioPTSLocation(packet->pts);
1533 if (!seek_audio_frame_found && is_seeking)
1534 seek_audio_frame_found = location.
frame;
1541 working_cache.
Add(CreateFrame(requested_frame));
1545 "requested_frame", requested_frame,
1546 "target_frame", location.
frame,
1550 int frame_finished = 0;
1554 int packet_samples = 0;
1558 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
1559 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1563 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
1564 if (receive_frame_err >= 0) {
1567 if (receive_frame_err == AVERROR_EOF) {
1571 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
1573 avcodec_flush_buffers(aCodecCtx);
1575 if (receive_frame_err != 0) {
1580 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
1583 if (frame_finished) {
1589 audio_pts = audio_frame->pts;
1592 location = GetAudioPTSLocation(audio_pts);
1595 int plane_size = -1;
1596 data_size = av_samples_get_buffer_size(&plane_size,
1598 audio_frame->nb_samples,
1611 int pts_remaining_samples = packet_samples /
info.
channels;
1614 if (pts_remaining_samples == 0) {
1616 "packet_samples", packet_samples,
1618 "pts_remaining_samples", pts_remaining_samples);
1622 while (pts_remaining_samples) {
1627 int samples = samples_per_frame - previous_packet_location.
sample_start;
1628 if (samples > pts_remaining_samples)
1629 samples = pts_remaining_samples;
1632 pts_remaining_samples -= samples;
1634 if (pts_remaining_samples > 0) {
1636 previous_packet_location.
frame++;
1648 "packet_samples", packet_samples,
1652 "AV_SAMPLE_FMT_S16", AV_SAMPLE_FMT_S16);
1657 audio_converted->nb_samples = audio_frame->nb_samples;
1658 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_S16, 0);
1668 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_S16, 0);
1677 audio_converted->data,
1678 audio_converted->linesize[0],
1679 audio_converted->nb_samples,
1681 audio_frame->linesize[0],
1682 audio_frame->nb_samples);
1686 audio_converted->data[0],
1687 static_cast<size_t>(audio_converted->nb_samples)
1688 * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16)
1697 av_free(audio_converted->data[0]);
1700 int64_t starting_frame_number = -1;
1701 bool partial_frame =
true;
1702 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
1704 starting_frame_number = location.
frame;
1705 int channel_buffer_size = packet_samples /
info.
channels;
1706 float *channel_buffer =
new float[channel_buffer_size];
1709 for (
int z = 0; z < channel_buffer_size; z++)
1710 channel_buffer[z] = 0.0f;
1716 for (
int sample = 0; sample < packet_samples; sample++) {
1718 if (channel_filter == channel) {
1720 channel_buffer[position] = audio_buf[sample] * (1.0f / (1 << 15));
1737 int remaining_samples = channel_buffer_size;
1738 float *iterate_channel_buffer = channel_buffer;
1739 while (remaining_samples > 0) {
1745 int samples = samples_per_frame - start;
1746 if (samples > remaining_samples)
1747 samples = remaining_samples;
1750 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
1753 if (samples_per_frame == start + samples)
1754 partial_frame =
false;
1756 partial_frame =
true;
1759 f->AddAudio(
true, channel_filter, start, iterate_channel_buffer,
1764 "frame", starting_frame_number,
1767 "channel", channel_filter,
1768 "partial_frame", partial_frame,
1769 "samples_per_frame", samples_per_frame);
1772 working_cache.
Add(f);
1775 remaining_samples -= samples;
1778 if (remaining_samples > 0)
1779 iterate_channel_buffer += samples;
1782 starting_frame_number++;
1789 delete[] channel_buffer;
1790 channel_buffer = NULL;
1791 iterate_channel_buffer = NULL;
1806 "requested_frame", requested_frame,
1807 "starting_frame", location.
frame,
1808 "end_frame", starting_frame_number - 1,
1809 "audio_pts_seconds", audio_pts_seconds);
1815void FFmpegReader::Seek(int64_t requested_frame) {
1817 if (requested_frame < 1)
1818 requested_frame = 1;
1821 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
1828 "requested_frame", requested_frame,
1829 "seek_count", seek_count,
1830 "last_frame", last_frame);
1833 working_cache.
Clear();
1837 video_pts_seconds = NO_PTS_OFFSET;
1839 audio_pts_seconds = NO_PTS_OFFSET;
1840 hold_packet =
false;
1842 current_video_frame = 0;
1843 largest_frame_processed = 0;
1848 packet_status.
reset(
false);
1854 int buffer_amount = std::max(max_concurrent_frames, 8);
1855 if (requested_frame - buffer_amount < 20) {
1869 if (seek_count == 1) {
1872 seeking_pts = ConvertFrameToVideoPTS(1);
1874 seek_audio_frame_found = 0;
1875 seek_video_frame_found = 0;
1879 bool seek_worked =
false;
1880 int64_t seek_target = 0;
1884 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
1886 fprintf(stderr,
"%s: error while seeking video stream\n", pFormatCtx->AV_FILENAME);
1889 is_video_seek =
true;
1896 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
1898 fprintf(stderr,
"%s: error while seeking audio stream\n", pFormatCtx->AV_FILENAME);
1901 is_video_seek =
false;
1910 avcodec_flush_buffers(aCodecCtx);
1914 avcodec_flush_buffers(pCodecCtx);
1917 previous_packet_location.
frame = -1;
1922 if (seek_count == 1) {
1924 seeking_pts = seek_target;
1925 seeking_frame = requested_frame;
1927 seek_audio_frame_found = 0;
1928 seek_video_frame_found = 0;
1956int64_t FFmpegReader::GetPacketPTS() {
1958 int64_t current_pts = packet->pts;
1959 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
1960 current_pts = packet->dts;
1966 return AV_NOPTS_VALUE;
1971void FFmpegReader::UpdatePTSOffset() {
1972 if (pts_offset_seconds != NO_PTS_OFFSET) {
1976 pts_offset_seconds = 0.0;
1977 double video_pts_offset_seconds = 0.0;
1978 double audio_pts_offset_seconds = 0.0;
1980 bool has_video_pts =
false;
1983 has_video_pts =
true;
1985 bool has_audio_pts =
false;
1988 has_audio_pts =
true;
1992 while (!has_video_pts || !has_audio_pts) {
1994 if (GetNextPacket() < 0)
1999 int64_t pts = GetPacketPTS();
2002 if (!has_video_pts && packet->stream_index == videoStream) {
2008 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2009 has_video_pts =
true;
2012 else if (!has_audio_pts && packet->stream_index == audioStream) {
2018 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2019 has_audio_pts =
true;
2025 if (has_video_pts && has_audio_pts) {
2037 pts_offset_seconds = std::max(video_pts_offset_seconds, audio_pts_offset_seconds);
2042int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2044 int64_t previous_video_frame = current_video_frame;
2053 if (current_video_frame == 0)
2054 current_video_frame = frame;
2058 if (frame == previous_video_frame) {
2063 current_video_frame++;
2072int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2074 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2084int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2086 double seconds = (double(frame_number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2096AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2104 int64_t whole_frame = int64_t(frame);
2107 double sample_start_percentage = frame - double(whole_frame);
2113 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2116 if (whole_frame < 1)
2118 if (sample_start < 0)
2125 if (previous_packet_location.
frame != -1) {
2126 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2127 int64_t orig_frame = location.
frame;
2132 location.
frame = previous_packet_location.
frame;
2135 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2144 previous_packet_location = location;
2151std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2153 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2157 output = working_cache.
GetFrame(requested_frame);
2158 if(output)
return output;
2166 working_cache.
Add(output);
2169 if (requested_frame > largest_frame_processed)
2170 largest_frame_processed = requested_frame;
2177bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2180 bool seek_trash =
false;
2181 int64_t max_seeked_frame = seek_audio_frame_found;
2182 if (seek_video_frame_found > max_seeked_frame) {
2183 max_seeked_frame = seek_video_frame_found;
2185 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2186 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2194void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2197 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2200 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2201 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2204 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2207 std::shared_ptr<Frame> f = *working_itr;
2210 if (!f || f->number > requested_frame) {
2216 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2217 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2220 bool is_video_ready =
false;
2221 bool is_audio_ready =
false;
2222 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2223 if ((frame_pts_seconds <= video_pts_seconds)
2224 || (recent_pts_diff > 1.5)
2228 is_video_ready =
true;
2230 "frame_number", f->number,
2231 "frame_pts_seconds", frame_pts_seconds,
2232 "video_pts_seconds", video_pts_seconds,
2233 "recent_pts_diff", recent_pts_diff);
2237 for (int64_t previous_frame = requested_frame - 1; previous_frame > 0; previous_frame--) {
2239 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2241 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2246 if (last_video_frame && !f->has_image_data) {
2248 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2249 }
else if (!f->has_image_data) {
2250 f->AddColor(
"#000000");
2255 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2256 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2257 || (recent_pts_diff > 1.5)
2262 is_audio_ready =
true;
2264 "frame_number", f->number,
2265 "frame_pts_seconds", frame_pts_seconds,
2266 "audio_pts_seconds", audio_pts_seconds,
2267 "audio_pts_diff", audio_pts_diff,
2268 "recent_pts_diff", recent_pts_diff);
2270 bool is_seek_trash = IsPartialFrame(f->number);
2278 "frame_number", f->number,
2279 "is_video_ready", is_video_ready,
2280 "is_audio_ready", is_audio_ready,
2286 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2289 "requested_frame", requested_frame,
2290 "f->number", f->number,
2291 "is_seek_trash", is_seek_trash,
2292 "Working Cache Count", working_cache.
Count(),
2296 if (!is_seek_trash) {
2301 working_cache.
Remove(f->number);
2304 last_frame = f->number;
2307 working_cache.
Remove(f->number);
2314 working_frames.clear();
2315 working_frames.shrink_to_fit();
2319void FFmpegReader::CheckFPS() {
2327 int frames_per_second[3] = {0,0,0};
2328 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2331 int all_frames_detected = 0;
2332 int starting_frames_detected = 0;
2337 if (GetNextPacket() < 0)
2342 if (packet->stream_index == videoStream) {
2345 fps_index = int(video_seconds);
2348 if (fps_index >= 0 && fps_index < max_fps_index) {
2350 starting_frames_detected++;
2351 frames_per_second[fps_index]++;
2355 all_frames_detected++;
2360 float avg_fps = 30.0;
2361 if (starting_frames_detected > 0 && fps_index > 0) {
2362 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2366 if (avg_fps < 8.0) {
2375 if (all_frames_detected > 0) {
2389void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2393 av_freep(&remove_frame->data[0]);
2401void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2406 delete remove_packet;
2421 root[
"type"] =
"FFmpegReader";
2422 root[
"path"] =
path;
2437 catch (
const std::exception& e) {
2439 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
2450 if (!root[
"path"].isNull())
2451 path = root[
"path"].asString();
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AVCODEC_MAX_AUDIO_FRAME_SIZE
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define MY_INPUT_BUFFER_PADDING_SIZE
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
#define FF_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
void Open() override
Open File - which is called by the constructor automatically.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)