From eb305e0a98015ef19bca5ffea44f59e031fa6f11 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Tue, 11 Feb 2025 16:33:15 +0100 Subject: [PATCH] DEV: Update default config for `uploads_importer` (#31208) --- script/bulk_import/uploads_importer.yml | 29 +++++++++++++++---------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/script/bulk_import/uploads_importer.yml b/script/bulk_import/uploads_importer.yml index ac3ca5e7d35..55401826e9e 100644 --- a/script/bulk_import/uploads_importer.yml +++ b/script/bulk_import/uploads_importer.yml @@ -1,16 +1,23 @@ -source_db_path: "/path/to/your/db.sqlite3" -output_db_path: "/path/to/your/uploads.sqlite3" +# Path configurations for IntermediateDB and UploadsDB that the import script generates. +# When running inside a Docker container, these files should be located in the +# /shared/import directory to ensure proper functioning. +source_db_path: "/shared/import/intermediate.db" +output_db_path: "/shared/import/uploads.db" +# Defines the directories the import script searches for uploaded files. +# For most cases, a single path is sufficient. However, if uploads are +# spread across multiple directories that cannot or should not be merged, +# list all those directories here as multiple paths. root_paths: - - "/path/to/your/files" - - "/path/to/more/files" + - "/shared/import/files" -# Files that are downloaded from URLs are cached in this directory. -download_cache_path: "/path/to/downloaded/files" +# Directory where files downloaded from URLs are cached for processing. +download_cache_path: "/shared/import/downloaded_files" -# The number of threads to use for processing uploads is calculated as: -# thread_count = [number of cores] * [thread_count_factor] -# The thread count will be doubled if uploads are stored on S3 because there's a higher latency. +# The number of threads used for processing uploads is determined as: +# thread_count = [number of CPU cores] * [thread_count_factor] +# If uploads are stored on Amazon S3, the thread count is automatically +# doubled to mitigate higher network latencies typically associated with S3. thread_count_factor: 1.5 # Delete uploads from the output database that are not found in the source database. @@ -20,7 +27,7 @@ delete_surplus_uploads: false delete_missing_uploads: false # Check if files are missing in the upload store and update the database accordingly. -# Set to false and re-run the script afterwards if you want to create new uploads for missing files. +# Set to false and re-run the script afterward if you want to create new uploads for missing files. fix_missing: false # Create optimized images for post uploads and avatars. @@ -46,5 +53,5 @@ site_settings: # be applied to the path to try and find the file. The first transformation that results in a file # being found will be used. path_replacements: -# - ["/foo/", "/bar"] +# - ["/foo/", "/bar/"] # - ["/foo/", "/bar/baz/"]