From 4b9a0cfe5ed35604a7eca9f7a3d3956d1a796b6a Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 5 May 2021 19:44:01 +0200 Subject: [PATCH] Fix media processing getting stuck on too much stdin/stderr (#16136) * Fix media processing getting stuck on too much stdin/stderr See thoughtbot/terrapin#5 * Remove dependency on paperclip-av-transcoder gem * Remove dependency on streamio-ffmpeg gem * Disable stdin on ffmpeg process --- Gemfile | 2 - Gemfile.lock | 11 --- app/lib/video_metadata_extractor.rb | 54 +++++++++++++ app/models/media_attachment.rb | 4 +- config/application.rb | 4 +- lib/paperclip/attachment_extensions.rb | 4 + lib/paperclip/gif_transcoder.rb | 3 +- lib/paperclip/image_extractor.rb | 14 ++-- lib/paperclip/transcoder.rb | 102 +++++++++++++++++++++++++ lib/paperclip/transcoder_extensions.rb | 14 ---- lib/paperclip/video_transcoder.rb | 26 ------- lib/terrapin/multi_pipe_extensions.rb | 63 +++++++++++++++ 12 files changed, 234 insertions(+), 67 deletions(-) create mode 100644 app/lib/video_metadata_extractor.rb create mode 100644 lib/paperclip/transcoder.rb delete mode 100644 lib/paperclip/transcoder_extensions.rb delete mode 100644 lib/paperclip/video_transcoder.rb create mode 100644 lib/terrapin/multi_pipe_extensions.rb diff --git a/Gemfile b/Gemfile index 3aee5d7cc..5d5fa144b 100644 --- a/Gemfile +++ b/Gemfile @@ -21,8 +21,6 @@ gem 'aws-sdk-s3', '~> 1.85', require: false gem 'fog-core', '<= 2.1.0' gem 'fog-openstack', '~> 0.3', require: false gem 'paperclip', '~> 6.0' -gem 'paperclip-av-transcoder', '~> 0.6' -gem 'streamio-ffmpeg', '~> 3.0' gem 'blurhash', '~> 0.1' gem 'active_model_serializers', '~> 0.10' diff --git a/Gemfile.lock b/Gemfile.lock index 3df8f4bc5..9f4fe4e91 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -75,8 +75,6 @@ GEM ast (2.4.1) attr_encrypted (3.1.0) encryptor (~> 3.0.0) - av (0.9.0) - cocaine (~> 0.5.3) awrence (1.1.1) aws-eventstream (1.1.0) aws-partitions (1.397.0) @@ -151,8 +149,6 @@ GEM cld3 (3.3.0) ffi (>= 1.1.0, < 1.12.0) climate_control (0.2.0) - cocaine (0.5.8) - climate_control (>= 0.0.3, < 1.0) coderay (1.1.3) color_diff (0.1) concurrent-ruby (1.1.7) @@ -394,9 +390,6 @@ GEM mime-types mimemagic (~> 0.3.0) terrapin (~> 0.6.0) - paperclip-av-transcoder (0.6.4) - av (~> 0.9.0) - paperclip (>= 2.5.2) parallel (1.20.1) parallel_tests (3.4.0) parallel @@ -609,8 +602,6 @@ GEM stackprof (0.2.16) statsd-ruby (1.4.0) stoplight (2.2.1) - streamio-ffmpeg (3.0.2) - multi_json (~> 1.8) strong_migrations (0.7.2) activerecord (>= 5) temple (0.8.2) @@ -755,7 +746,6 @@ DEPENDENCIES omniauth-saml (~> 1.10) ox (~> 2.13) paperclip (~> 6.0) - paperclip-av-transcoder (~> 0.6) parallel (~> 1.20) parallel_tests (~> 3.4) parslet @@ -801,7 +791,6 @@ DEPENDENCIES sprockets-rails (~> 3.2) stackprof stoplight (~> 2.2.1) - streamio-ffmpeg (~> 3.0) strong_migrations (~> 0.7) thor (~> 1.0) tty-prompt (~> 0.22) diff --git a/app/lib/video_metadata_extractor.rb b/app/lib/video_metadata_extractor.rb new file mode 100644 index 000000000..03e40f923 --- /dev/null +++ b/app/lib/video_metadata_extractor.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +class VideoMetadataExtractor + attr_reader :duration, :bitrate, :video_codec, :audio_codec, + :colorspace, :width, :height, :frame_rate + + def initialize(path) + @path = path + @metadata = Oj.load(ffmpeg_command_output, mode: :strict, symbol_keys: true) + + parse_metadata + rescue Terrapin::ExitStatusError, Oj::ParseError + @invalid = true + rescue Terrapin::CommandNotFoundError + raise Paperclip::Errors::CommandNotFoundError, 'Could not run the `ffprobe` command. Please install ffmpeg.' + end + + def valid? + !@invalid + end + + private + + def ffmpeg_command_output + command = Terrapin::CommandLine.new('ffprobe', '-i :path -print_format :format -show_format -show_streams -show_error -loglevel :loglevel') + command.run(path: @path, format: 'json', loglevel: 'fatal') + end + + def parse_metadata + if @metadata.key?(:format) + @duration = @metadata[:format][:duration].to_f + @bitrate = @metadata[:format][:bit_rate].to_i + end + + if @metadata.key?(:streams) + video_streams = @metadata[:streams].select { |stream| stream[:codec_type] == 'video' } + audio_streams = @metadata[:streams].select { |stream| stream[:codec_type] == 'audio' } + + if (video_stream = video_streams.first) + @video_codec = video_stream[:codec_name] + @colorspace = video_stream[:pix_fmt] + @width = video_stream[:width] + @height = video_stream[:height] + @frame_rate = video_stream[:avg_frame_rate] == '0/0' ? nil : Rational(video_stream[:avg_frame_rate]) + end + + if (audio_stream = audio_streams.first) + @audio_codec = audio_stream[:codec_name] + end + end + + @invalid = true if @metadata.key?(:error) + end +end diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb index 663bb0896..87e590b2d 100644 --- a/app/models/media_attachment.rb +++ b/app/models/media_attachment.rb @@ -287,7 +287,7 @@ class MediaAttachment < ApplicationRecord if instance.file_content_type == 'image/gif' [:gif_transcoder, :blurhash_transcoder] elsif VIDEO_MIME_TYPES.include?(instance.file_content_type) - [:video_transcoder, :blurhash_transcoder, :type_corrector] + [:transcoder, :blurhash_transcoder, :type_corrector] elsif AUDIO_MIME_TYPES.include?(instance.file_content_type) [:image_extractor, :transcoder, :type_corrector] else @@ -388,7 +388,7 @@ class MediaAttachment < ApplicationRecord # paths but ultimately the same file, so it makes sense to memoize the # result while disregarding the path def ffmpeg_data(path = nil) - @ffmpeg_data ||= FFMPEG::Movie.new(path) + @ffmpeg_data ||= VideoMetadataExtractor.new(path) end def enqueue_processing diff --git a/config/application.rb b/config/application.rb index af7735221..c9475ba04 100644 --- a/config/application.rb +++ b/config/application.rb @@ -11,12 +11,12 @@ require_relative '../lib/redis/namespace_extensions' require_relative '../lib/paperclip/url_generator_extensions' require_relative '../lib/paperclip/attachment_extensions' require_relative '../lib/paperclip/media_type_spoof_detector_extensions' -require_relative '../lib/paperclip/transcoder_extensions' require_relative '../lib/paperclip/lazy_thumbnail' require_relative '../lib/paperclip/gif_transcoder' -require_relative '../lib/paperclip/video_transcoder' +require_relative '../lib/paperclip/transcoder' require_relative '../lib/paperclip/type_corrector' require_relative '../lib/paperclip/response_with_limit_adapter' +require_relative '../lib/terrapin/multi_pipe_extensions' require_relative '../lib/mastodon/snowflake' require_relative '../lib/mastodon/version' require_relative '../lib/devise/two_factor_ldap_authenticatable' diff --git a/lib/paperclip/attachment_extensions.rb b/lib/paperclip/attachment_extensions.rb index 94f7769b6..2f2656616 100644 --- a/lib/paperclip/attachment_extensions.rb +++ b/lib/paperclip/attachment_extensions.rb @@ -2,6 +2,10 @@ module Paperclip module AttachmentExtensions + def meta + instance_read(:meta) + end + # We overwrite this method to support delayed processing in # Sidekiq. Since we process the original file to reduce disk # usage, and we still want to generate thumbnails straight diff --git a/lib/paperclip/gif_transcoder.rb b/lib/paperclip/gif_transcoder.rb index 9f3c8e8be..74aa1a0b2 100644 --- a/lib/paperclip/gif_transcoder.rb +++ b/lib/paperclip/gif_transcoder.rb @@ -100,7 +100,8 @@ end module Paperclip # This transcoder is only to be used for the MediaAttachment model - # to convert animated gifs to webm + # to convert animated GIFs to videos + class GifTranscoder < Paperclip::Processor def make return File.open(@file.path) unless needs_convert? diff --git a/lib/paperclip/image_extractor.rb b/lib/paperclip/image_extractor.rb index aab675a06..17fe4326f 100644 --- a/lib/paperclip/image_extractor.rb +++ b/lib/paperclip/image_extractor.rb @@ -31,21 +31,17 @@ module Paperclip private def extract_image_from_file! - ::Av.logger = Paperclip.logger - - cli = ::Av.cli dst = Tempfile.new([File.basename(@file.path, '.*'), '.png']) dst.binmode - cli.add_source(@file.path) - cli.add_destination(dst.path) - cli.add_output_param loglevel: 'fatal' - begin - cli.run - rescue Cocaine::ExitStatusError, ::Av::CommandError + command = Terrapin::CommandLine.new('ffmpeg', '-i :source -loglevel :loglevel -y :destination', logger: Paperclip.logger) + command.run(source: @file.path, destination: dst.path, loglevel: 'fatal') + rescue Terrapin::ExitStatusError dst.close(true) return nil + rescue Terrapin::CommandNotFoundError + raise Paperclip::Errors::CommandNotFoundError, 'Could not run the `ffmpeg` command. Please install ffmpeg.' end dst diff --git a/lib/paperclip/transcoder.rb b/lib/paperclip/transcoder.rb new file mode 100644 index 000000000..e99704086 --- /dev/null +++ b/lib/paperclip/transcoder.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +module Paperclip + # This transcoder is only to be used for the MediaAttachment model + # to check when uploaded videos are actually gifv's + class Transcoder < Paperclip::Processor + def initialize(file, options = {}, attachment = nil) + super + + @current_format = File.extname(@file.path) + @basename = File.basename(@file.path, @current_format) + @format = options[:format] + @time = options[:time] || 3 + @passthrough_options = options[:passthrough_options] + @convert_options = options[:convert_options].dup + end + + def make + metadata = VideoMetadataExtractor.new(@file.path) + + unless metadata.valid? + log("Unsupported file #{@file.path}") + return File.open(@file.path) + end + + update_attachment_type(metadata) + update_options_from_metadata(metadata) + + destination = Tempfile.new([@basename, @format ? ".#{@format}" : '']) + destination.binmode + + @output_options = @convert_options[:output]&.dup || {} + @input_options = @convert_options[:input]&.dup || {} + + case @format.to_s + when /jpg$/, /jpeg$/, /png$/, /gif$/ + @input_options['ss'] = @time + + @output_options['f'] = 'image2' + @output_options['vframes'] = 1 + when 'mp4' + @output_options['acodec'] = 'aac' + @output_options['strict'] = 'experimental' + end + + command_arguments, interpolations = prepare_command(destination) + + begin + command = Terrapin::CommandLine.new('ffmpeg', command_arguments.join(' '), logger: Paperclip.logger) + command.run(interpolations) + rescue Terrapin::ExitStatusError => e + raise Paperclip::Error, "Error while transcoding #{@basename}: #{e}" + rescue Terrapin::CommandNotFoundError + raise Paperclip::Errors::CommandNotFoundError, 'Could not run the `ffmpeg` command. Please install ffmpeg.' + end + + destination + end + + private + + def prepare_command(destination) + command_arguments = ['-nostdin'] + interpolations = {} + interpolation_keys = 0 + + @input_options.each_pair do |key, value| + interpolation_key = interpolation_keys + command_arguments << "-#{key} :#{interpolation_key}" + interpolations[interpolation_key] = value + interpolation_keys += 1 + end + + command_arguments << '-i :source' + interpolations[:source] = @file.path + + @output_options.each_pair do |key, value| + interpolation_key = interpolation_keys + command_arguments << "-#{key} :#{interpolation_key}" + interpolations[interpolation_key] = value + interpolation_keys += 1 + end + + command_arguments << '-y :destination' + interpolations[:destination] = destination.path + + [command_arguments, interpolations] + end + + def update_options_from_metadata(metadata) + return unless @passthrough_options && @passthrough_options[:video_codecs].include?(metadata.video_codec) && @passthrough_options[:audio_codecs].include?(metadata.audio_codec) && @passthrough_options[:colorspaces].include?(metadata.colorspace) + + @format = @passthrough_options[:options][:format] || @format + @time = @passthrough_options[:options][:time] || @time + @convert_options = @passthrough_options[:options][:convert_options].dup + end + + def update_attachment_type(metadata) + @attachment.instance.type = MediaAttachment.types[:gifv] unless metadata.audio_codec + end + end +end diff --git a/lib/paperclip/transcoder_extensions.rb b/lib/paperclip/transcoder_extensions.rb deleted file mode 100644 index c0b2447f3..000000000 --- a/lib/paperclip/transcoder_extensions.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -module Paperclip - module TranscoderExtensions - # Prevent the transcoder from modifying our meta hash - def initialize(file, options = {}, attachment = nil) - meta_value = attachment&.instance_read(:meta) - super - attachment&.instance_write(:meta, meta_value) - end - end -end - -Paperclip::Transcoder.prepend(Paperclip::TranscoderExtensions) diff --git a/lib/paperclip/video_transcoder.rb b/lib/paperclip/video_transcoder.rb deleted file mode 100644 index 4d9544231..000000000 --- a/lib/paperclip/video_transcoder.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -module Paperclip - # This transcoder is only to be used for the MediaAttachment model - # to check when uploaded videos are actually gifv's - class VideoTranscoder < Paperclip::Processor - def make - movie = FFMPEG::Movie.new(@file.path) - - attachment.instance.type = MediaAttachment.types[:gifv] unless movie.audio_codec - - Paperclip::Transcoder.make(file, actual_options(movie), attachment) - end - - private - - def actual_options(movie) - opts = options[:passthrough_options] - if opts && opts[:video_codecs].include?(movie.video_codec) && opts[:audio_codecs].include?(movie.audio_codec) && opts[:colorspaces].include?(movie.colorspace) - opts[:options] - else - options - end - end - end -end diff --git a/lib/terrapin/multi_pipe_extensions.rb b/lib/terrapin/multi_pipe_extensions.rb new file mode 100644 index 000000000..51d7de37c --- /dev/null +++ b/lib/terrapin/multi_pipe_extensions.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: false +# Fix adapted from https://github.com/thoughtbot/terrapin/pull/5 + +module Terrapin + module MultiPipeExtensions + def read + read_streams(@stdout_in, @stderr_in) + end + + def close_read + begin + @stdout_in.close + rescue IOError + # Do nothing + end + + begin + @stderr_in.close + rescue IOError + # Do nothing + end + end + + def read_streams(output, error) + @stdout_output = '' + @stderr_output = '' + + read_fds = [output, error] + + until read_fds.empty? + to_read, = IO.select(read_fds) + + if to_read.include?(output) + @stdout_output << read_stream(output) + read_fds.delete(output) if output.closed? + end + + if to_read.include?(error) + @stderr_output << read_stream(error) + read_fds.delete(error) if error.closed? + end + end + end + + def read_stream(io) + result = '' + + begin + while (partial_result = io.read_nonblock(8192)) + result << partial_result + end + rescue EOFError, Errno::EPIPE + io.close + rescue Errno::EINTR, Errno::EWOULDBLOCK, Errno::EAGAIN + # Do nothing + end + + result + end + end +end + +Terrapin::CommandLine::MultiPipe.prepend(Terrapin::MultiPipeExtensions)