2018-08-25 21:25:39 +10:00
# frozen_string_literal: true
2023-05-24 19:55:40 +10:00
require_relative 'base'
2018-08-25 21:25:39 +10:00
2023-05-24 00:08:26 +10:00
module Mastodon::CLI
2023-05-24 19:55:40 +10:00
class Media < Base
2018-11-09 07:06:26 +11:00
include ActionView :: Helpers :: NumberHelper
2023-05-04 13:33:55 +10:00
VALID_PATH_SEGMENTS_SIZE = [ 7 , 10 ] . freeze
2019-09-10 21:48:48 +10:00
option :days , type : :numeric , default : 7 , aliases : [ :d ]
2022-12-15 05:50:07 +11:00
option :prune_profiles , type : :boolean , default : false
option :remove_headers , type : :boolean , default : false
option :include_follows , type : :boolean , default : false
2019-09-10 21:48:48 +10:00
option :concurrency , type : :numeric , default : 5 , aliases : [ :c ]
2018-08-31 11:46:13 +10:00
option :dry_run , type : :boolean , default : false
2022-12-15 05:50:07 +11:00
desc 'remove' , 'Remove remote media files, headers or avatars'
2018-08-25 21:25:39 +10:00
long_desc <<-DESC
2022-12-15 05:50:07 +11:00
Removes locally cached copies of media attachments ( and optionally profile
2023-05-20 01:13:29 +10:00
headers and avatars ) from other servers . By default , only media attachments
2022-12-15 05:50:07 +11:00
are removed .
2018-08-25 21:25:39 +10:00
The - - days option specifies how old media attachments have to be before
2022-12-15 05:50:07 +11:00
they are removed . In case of avatars and headers , it specifies how old
the last webfinger request and update to the user has to be before they
are pruned . It defaults to 7 days .
If - - prune - profiles is specified , only avatars and headers are removed .
If - - remove - headers is specified , only headers are removed .
If - - include - follows is specified along with - - prune - profiles or
- - remove - headers , all non - local profiles will be pruned irrespective of
follow status . By default , only accounts that are not followed by or
following anyone locally are pruned .
2018-08-25 21:25:39 +10:00
DESC
def remove
2024-01-26 19:53:44 +11:00
fail_with_message '--prune-profiles and --remove-headers should not be specified simultaneously' if options [ :prune_profiles ] && options [ :remove_headers ]
2023-05-31 00:07:44 +10:00
2024-01-26 19:53:44 +11:00
fail_with_message '--include-follows can only be used with --prune-profiles or --remove-headers' if options [ :include_follows ] && ! ( options [ :prune_profiles ] || options [ :remove_headers ] )
2023-05-31 00:07:44 +10:00
time_ago = options [ :days ] . days . ago
2018-08-25 21:25:39 +10:00
2022-12-15 05:50:07 +11:00
if options [ :prune_profiles ] || options [ :remove_headers ]
processed , aggregate = parallelize_with_progress ( Account . remote . where ( { last_webfingered_at : .. time_ago , updated_at : .. time_ago } ) ) do | account |
next if ! options [ :include_follows ] && Follow . where ( account : account ) . or ( Follow . where ( target_account : account ) ) . exists?
next if account . avatar . blank? && account . header . blank?
next if options [ :remove_headers ] && account . header . blank?
2019-09-10 21:48:48 +10:00
2023-12-02 03:00:44 +11:00
size = account . header_file_size || 0
size += account . avatar_file_size || 0 if options [ :prune_profiles ]
2018-08-25 21:25:39 +10:00
2023-05-31 00:07:44 +10:00
unless dry_run?
2022-12-15 05:50:07 +11:00
account . header . destroy
account . avatar . destroy if options [ :prune_profiles ]
account . save!
end
size
2019-09-10 21:48:48 +10:00
end
2018-08-27 00:53:06 +10:00
2023-05-31 00:07:44 +10:00
say ( " Visited #{ processed } accounts and removed profile media totaling #{ number_to_human_size ( aggregate ) } #{ dry_run_mode_suffix } " , :green , true )
2018-08-27 00:53:06 +10:00
end
2019-09-10 21:48:48 +10:00
2022-12-15 05:50:07 +11:00
unless options [ :prune_profiles ] || options [ :remove_headers ]
2024-01-31 02:29:42 +11:00
processed , aggregate = parallelize_with_progress ( MediaAttachment . cached . remote . where ( created_at : .. time_ago ) ) do | media_attachment |
2022-12-15 05:50:07 +11:00
next if media_attachment . file . blank?
size = ( media_attachment . file_file_size || 0 ) + ( media_attachment . thumbnail_file_size || 0 )
2023-05-31 00:07:44 +10:00
unless dry_run?
2022-12-15 05:50:07 +11:00
media_attachment . file . destroy
media_attachment . thumbnail . destroy
media_attachment . save
end
size
end
2023-05-31 00:07:44 +10:00
say ( " Removed #{ processed } media attachments (approx. #{ number_to_human_size ( aggregate ) } ) #{ dry_run_mode_suffix } " , :green , true )
2022-12-15 05:50:07 +11:00
end
2018-08-25 21:25:39 +10:00
end
2019-09-10 23:29:12 +10:00
2019-12-09 01:37:12 +11:00
option :start_after
2020-03-26 11:56:41 +11:00
option :prefix
2020-09-01 11:33:21 +10:00
option :fix_permissions , type : :boolean , default : false
2019-12-09 01:37:12 +11:00
option :dry_run , type : :boolean , default : false
desc 'remove-orphans' , 'Scan storage and check for files that do not belong to existing media attachments'
long_desc << ~ LONG_DESC
Scans file storage for files that do not belong to existing media attachments . Because this operation
requires iterating over every single file individually , it will be slow .
Please mind that some storage providers charge for the necessary API requests to list objects .
LONG_DESC
def remove_orphans
progress = create_progress_bar ( nil )
reclaimed_bytes = 0
removed = 0
2020-03-26 11:56:41 +11:00
prefix = options [ :prefix ]
2019-12-09 01:37:12 +11:00
case Paperclip :: Attachment . default_options [ :storage ]
when :s3
paperclip_instance = MediaAttachment . new . file
s3_interface = paperclip_instance . s3_interface
2020-09-02 08:17:58 +10:00
s3_permissions = Paperclip :: Attachment . default_options [ :s3_permissions ]
2019-12-09 01:37:12 +11:00
bucket = s3_interface . bucket ( Paperclip :: Attachment . default_options [ :s3_credentials ] [ :bucket ] )
last_key = options [ :start_after ]
loop do
2019-12-09 14:26:00 +11:00
objects = begin
2023-02-19 09:09:40 +11:00
bucket . objects ( start_after : last_key , prefix : prefix ) . limit ( 1000 ) . map { | x | x }
rescue = > e
progress . log ( pastel . red ( " Error fetching list of files: #{ e } " ) )
progress . log ( " If you want to continue from this point, add --start-after= #{ last_key } to your command " ) if last_key
break
2019-12-09 14:26:00 +11:00
end
2019-12-09 01:37:12 +11:00
break if objects . empty?
2020-03-26 11:56:41 +11:00
last_key = objects . last . key
record_map = preload_records_from_mixed_objects ( objects )
2019-12-09 01:37:12 +11:00
objects . each do | object |
2023-05-31 00:07:44 +10:00
object . acl . put ( acl : s3_permissions ) if options [ :fix_permissions ] && ! dry_run?
2020-09-01 11:33:21 +10:00
2020-04-27 07:29:08 +10:00
path_segments = object . key . split ( '/' )
path_segments . delete ( 'cache' )
2023-05-04 13:33:55 +10:00
unless VALID_PATH_SEGMENTS_SIZE . include? ( path_segments . size )
2020-05-16 02:41:27 +10:00
progress . log ( pastel . yellow ( " Unrecognized file found: #{ object . key } " ) )
next
end
2020-03-26 11:56:41 +11:00
model_name = path_segments . first . classify
attachment_name = path_segments [ 1 ] . singularize
record_id = path_segments [ 2 .. - 2 ] . join . to_i
file_name = path_segments . last
record = record_map . dig ( model_name , record_id )
attachment = record & . public_send ( attachment_name )
2019-12-09 01:37:12 +11:00
progress . increment
2020-03-26 11:56:41 +11:00
next unless attachment . blank? || ! attachment . variant? ( file_name )
2019-12-09 01:37:12 +11:00
2019-12-09 14:26:00 +11:00
begin
2023-05-31 00:07:44 +10:00
object . delete unless dry_run?
2019-12-09 14:26:00 +11:00
reclaimed_bytes += object . size
removed += 1
progress . log ( " Found and removed orphan: #{ object . key } " )
rescue = > e
progress . log ( pastel . red ( " Error processing #{ object . key } : #{ e } " ) )
end
2019-12-09 01:37:12 +11:00
end
end
when :fog
2024-01-26 19:53:44 +11:00
fail_with_message 'The fog storage driver is not supported for this operation at this time'
2023-07-28 00:13:45 +10:00
when :azure
2024-01-26 19:53:44 +11:00
fail_with_message 'The azure storage driver is not supported for this operation at this time'
2019-12-09 01:37:12 +11:00
when :filesystem
require 'find'
2020-04-02 14:28:51 +11:00
root_path = ENV . fetch ( 'PAPERCLIP_ROOT_PATH' , File . join ( ':rails_root' , 'public' , 'system' ) ) . gsub ( ':rails_root' , Rails . root . to_s )
2019-12-09 01:37:12 +11:00
2020-03-26 11:56:41 +11:00
Find . find ( File . join ( * [ root_path , prefix ] . compact ) ) do | path |
2019-12-09 01:37:12 +11:00
next if File . directory? ( path )
2020-04-27 07:29:08 +10:00
key = path . gsub ( " #{ root_path } #{ File :: SEPARATOR } " , '' )
path_segments = key . split ( File :: SEPARATOR )
path_segments . delete ( 'cache' )
2023-05-04 13:33:55 +10:00
unless VALID_PATH_SEGMENTS_SIZE . include? ( path_segments . size )
2020-05-16 02:41:27 +10:00
progress . log ( pastel . yellow ( " Unrecognized file found: #{ key } " ) )
next
end
2020-03-26 11:56:41 +11:00
model_name = path_segments . first . classify
record_id = path_segments [ 2 .. - 2 ] . join . to_i
attachment_name = path_segments [ 1 ] . singularize
file_name = path_segments . last
next unless PRELOAD_MODEL_WHITELIST . include? ( model_name )
record = model_name . constantize . find_by ( id : record_id )
attachment = record & . public_send ( attachment_name )
2019-12-09 01:37:12 +11:00
progress . increment
2020-03-26 11:56:41 +11:00
next unless attachment . blank? || ! attachment . variant? ( file_name )
2019-12-09 01:37:12 +11:00
2019-12-09 14:26:00 +11:00
begin
size = File . size ( path )
2023-05-31 00:07:44 +10:00
unless dry_run?
2020-05-10 05:06:55 +10:00
File . delete ( path )
begin
FileUtils . rmdir ( File . dirname ( path ) , parents : true )
rescue Errno :: ENOTEMPTY
# OK
end
end
2019-12-09 14:26:00 +11:00
reclaimed_bytes += size
removed += 1
progress . log ( " Found and removed orphan: #{ key } " )
rescue = > e
progress . log ( pastel . red ( " Error processing #{ key } : #{ e } " ) )
end
2019-12-09 01:37:12 +11:00
end
end
progress . total = progress . progress
progress . finish
2023-05-31 00:07:44 +10:00
say ( " Removed #{ removed } orphans (approx. #{ number_to_human_size ( reclaimed_bytes ) } ) #{ dry_run_mode_suffix } " , :green , true )
2019-12-09 01:37:12 +11:00
end
2019-09-10 23:29:12 +10:00
option :account , type : :string
option :domain , type : :string
option :status , type : :numeric
2022-08-25 12:40:17 +10:00
option :days , type : :numeric
2019-09-10 23:29:12 +10:00
option :concurrency , type : :numeric , default : 5 , aliases : [ :c ]
option :verbose , type : :boolean , default : false , aliases : [ :v ]
option :dry_run , type : :boolean , default : false
2019-10-08 14:59:10 +11:00
option :force , type : :boolean , default : false
2019-09-10 23:29:12 +10:00
desc 'refresh' , 'Fetch remote media files'
long_desc <<-DESC
Re - downloads media attachments from other servers . You must specify the
source of media attachments with one of the following options :
Use the - - status option to download attachments from a specific status ,
using the status local numeric ID .
Use the - - account option to download attachments from a specific account ,
using username @domain handle of the account .
Use the - - domain option to download attachments from a specific domain .
2019-10-08 14:59:10 +11:00
2022-08-25 12:40:17 +10:00
Use the - - days option to limit attachments created within days .
2019-10-08 14:59:10 +11:00
By default , attachments that are believed to be already downloaded will
not be re - downloaded . To force re - download of every URL , use - - force .
2019-09-10 23:29:12 +10:00
DESC
def refresh
if options [ :status ]
scope = MediaAttachment . where ( status_id : options [ :status ] )
elsif options [ :account ]
2020-05-14 15:45:52 +10:00
username , domain = options [ :account ] . split ( '@' )
2019-09-10 23:29:12 +10:00
account = Account . find_remote ( username , domain )
2024-01-26 19:53:44 +11:00
fail_with_message 'No such account' if account . nil?
2019-09-10 23:29:12 +10:00
scope = MediaAttachment . where ( account_id : account . id )
elsif options [ :domain ]
scope = MediaAttachment . joins ( :account ) . merge ( Account . by_domain_and_subdomains ( options [ :domain ] ) )
2022-08-25 12:40:17 +10:00
elsif options [ :days ] . present?
scope = MediaAttachment . remote
2019-09-10 23:29:12 +10:00
else
2024-01-26 19:53:44 +11:00
fail_with_message 'Specify the source of media attachments'
2019-09-10 23:29:12 +10:00
end
2023-02-18 22:37:47 +11:00
scope = scope . where ( 'media_attachments.id > ?' , Mastodon :: Snowflake . id_at ( options [ :days ] . days . ago , with_random : false ) ) if options [ :days ] . present?
2022-08-25 12:40:17 +10:00
2019-09-10 23:29:12 +10:00
processed , aggregate = parallelize_with_progress ( scope ) do | media_attachment |
2019-10-08 14:59:10 +11:00
next if media_attachment . remote_url . blank? || ( ! options [ :force ] && media_attachment . file_file_name . present? )
2021-10-29 04:30:44 +11:00
next if DomainBlock . reject_media? ( media_attachment . account . domain )
2019-09-10 23:29:12 +10:00
2023-05-31 00:07:44 +10:00
unless dry_run?
2020-06-29 21:56:55 +10:00
media_attachment . reset_file!
media_attachment . reset_thumbnail!
2019-09-10 23:29:12 +10:00
media_attachment . save
end
2020-06-29 21:56:55 +10:00
media_attachment . file_file_size + ( media_attachment . thumbnail_file_size || 0 )
2019-09-10 23:29:12 +10:00
end
2023-05-31 00:07:44 +10:00
say ( " Downloaded #{ processed } media attachments (approx. #{ number_to_human_size ( aggregate ) } ) #{ dry_run_mode_suffix } " , :green , true )
2019-09-10 23:29:12 +10:00
end
2019-10-08 05:04:56 +11:00
desc 'usage' , 'Calculate disk space consumed by Mastodon'
def usage
2024-03-15 02:22:52 +11:00
say ( " Attachments: \t #{ number_to_human_size ( media_attachment_storage_size ) } ( #{ number_to_human_size ( local_media_attachment_storage_size ) } local) " )
2019-10-08 05:04:56 +11:00
say ( " Custom emoji: \t #{ number_to_human_size ( CustomEmoji . sum ( :image_file_size ) ) } ( #{ number_to_human_size ( CustomEmoji . local . sum ( :image_file_size ) ) } local) " )
say ( " Preview cards: \t #{ number_to_human_size ( PreviewCard . sum ( :image_file_size ) ) } " )
say ( " Avatars: \t #{ number_to_human_size ( Account . sum ( :avatar_file_size ) ) } ( #{ number_to_human_size ( Account . local . sum ( :avatar_file_size ) ) } local) " )
say ( " Headers: \t #{ number_to_human_size ( Account . sum ( :header_file_size ) ) } ( #{ number_to_human_size ( Account . local . sum ( :header_file_size ) ) } local) " )
say ( " Backups: \t #{ number_to_human_size ( Backup . sum ( :dump_file_size ) ) } " )
say ( " Imports: \t #{ number_to_human_size ( Import . sum ( :data_file_size ) ) } " )
say ( " Settings: \t #{ number_to_human_size ( SiteUpload . sum ( :file_file_size ) ) } " )
end
2019-11-04 22:55:20 +11:00
2020-03-26 11:56:41 +11:00
desc 'lookup URL' , 'Lookup where media is displayed by passing a media URL'
def lookup ( url )
2020-04-27 07:29:08 +10:00
path = Addressable :: URI . parse ( url ) . path
2022-12-15 05:50:07 +11:00
path_segments = path . split ( '/' ) [ 2 .. ]
2020-04-27 07:29:08 +10:00
path_segments . delete ( 'cache' )
2024-01-26 19:53:44 +11:00
fail_with_message 'Not a media URL' unless VALID_PATH_SEGMENTS_SIZE . include? ( path_segments . size )
2020-05-16 02:41:27 +10:00
2020-04-27 07:29:08 +10:00
model_name = path_segments . first . classify
record_id = path_segments [ 2 .. - 2 ] . join . to_i
2019-11-04 22:55:20 +11:00
2024-01-26 19:53:44 +11:00
fail_with_message " Cannot find corresponding model: #{ model_name } " unless PRELOAD_MODEL_WHITELIST . include? ( model_name )
2019-11-04 22:55:20 +11:00
2020-03-26 11:56:41 +11:00
record = model_name . constantize . find_by ( id : record_id )
record = record . status if record . respond_to? ( :status )
2019-11-04 22:55:20 +11:00
2024-01-26 19:53:44 +11:00
fail_with_message 'Cannot find corresponding record' unless record
2020-03-26 11:56:41 +11:00
display_url = ActivityPub :: TagManager . instance . url_for ( record )
2024-01-26 19:53:44 +11:00
fail_with_message 'No public URL for this type of record' if display_url . blank?
2020-03-26 11:56:41 +11:00
say ( display_url , :blue )
rescue Addressable :: URI :: InvalidURIError
2024-01-26 19:53:44 +11:00
fail_with_message 'Invalid URL'
2020-03-26 11:56:41 +11:00
end
private
2024-03-15 02:22:52 +11:00
def media_attachment_storage_size
MediaAttachment . sum ( file_and_thumbnail_size_sql )
end
def local_media_attachment_storage_size
MediaAttachment . where ( account : Account . local ) . sum ( file_and_thumbnail_size_sql )
end
def file_and_thumbnail_size_sql
Arel . sql (
<< ~ SQL . squish
COALESCE ( file_file_size , 0 ) + COALESCE ( thumbnail_file_size , 0 )
SQL
)
end
2020-03-26 11:56:41 +11:00
PRELOAD_MODEL_WHITELIST = %w(
Account
Backup
CustomEmoji
Import
MediaAttachment
PreviewCard
SiteUpload
) . freeze
def preload_records_from_mixed_objects ( objects )
preload_map = Hash . new { | hash , key | hash [ key ] = [ ] }
objects . map do | object |
2020-04-27 07:29:08 +10:00
segments = object . key . split ( '/' )
segments . delete ( 'cache' )
2023-05-04 13:33:55 +10:00
next unless VALID_PATH_SEGMENTS_SIZE . include? ( segments . size )
2020-05-16 02:41:27 +10:00
2020-03-26 11:56:41 +11:00
model_name = segments . first . classify
record_id = segments [ 2 .. - 2 ] . join . to_i
next unless PRELOAD_MODEL_WHITELIST . include? ( model_name )
preload_map [ model_name ] << record_id
end
preload_map . each_with_object ( { } ) do | ( model_name , record_ids ) , model_map |
2021-01-12 19:27:38 +11:00
model_map [ model_name ] = model_name . constantize . where ( id : record_ids ) . index_by ( & :id )
2019-11-04 22:55:20 +11:00
end
end
2018-08-25 21:25:39 +10:00
end
end