Tags
So I made a mistake in an app I am working on and ended up creating thumbnails of thumbnails, and thumbnails of thumbnails of thumbnails... etc. I was able to delete them all, but I wanted to make a one-shot script to do it.
One Shot App
I got the idea of the one shot app from Simon Willison and replicated his setup in a chatgpt project
Initial Prompt
a mistake was made in my s3 bucket and I've created a bunch of extra files
write a script that deletes all files that contain _thumb_thumb
⬢ [devtainer] ❯ aws s3 ls s3://dropper
2024-12-29 14:32:32 16158 02271f4c-be18-4bea-b23e-d00f9fe42b9f.webp
2025-01-11 14:20:49 2878 02271f4c-be18-4bea-b23e-d00f9fe42b9f_thumb.webp
2025-01-11 14:21:17 2858 02271f4c-be18-4bea-b23e-d00f9fe42b9f_thumb_thumb.webp
2025-01-11 14:21:44 2856 02271f4c-be18-4bea-b23e-d00f9fe42b9f_thumb_thumb_thumb.webp
2025-01-11 14:21:44 2856 02271f4c-be18-4bea-b23e-d00f9fe42b9f_thumb_thumb_thumb_thumb.webp
2024-12-27 10:25:36 2812 06422c09-d0da-44ec-9339-786864ebccf2.webp
2025-01-11 14:20:49 2710 06422c09-d0da-44ec-9339-786864ebccf2_thumb.webp
2025-01-11 14:21:17 2652 06422c09-d0da-44ec-9339-786864ebccf2_thumb_thumb.webp
2025-01-11 14:21:45 2632 06422c09-d0da-44ec-9339-786864ebccf2_thumb_thumb_thumb.webp
2025-01-11 14:21:45 2632 06422c09-d0da-44ec-9339-786864ebccf2_thumb_thumb_thumb_thumb.webp
2024-12-29 20:44:14 146060 074edddb-8b46-4d94-9b55-d70a7bb74366.webp
2025-01-11 14:20:50 12476 074edddb-8b46-4d94-9b55-d70a7bb74366_thumb.webp
2025-01-11 14:21:17 12400 074edddb-8b46-4d94-9b55-d70a7bb74366_thumb_thumb.webp
create a typer application to do this job
include a --dry-run flag
make the search for _thumb_thumb editable
s3_cleanup
Here is the result of the s3_cleanup script. It ended up being 5 or 6 passes to get everything I wanted with soft-delete enabled. Nice preview outputs in the dry run. This was 90% created by chatgpt 4o, with some slight hand edits by me along the way.
#!/usr/bin/env -S uv run --quiet --script
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "boto3",
# "typer",
# "rich"
# ]
# ///
import boto3
from datetime import datetime, timezone
from rich.console import Console
from rich.table import Table
import traceback
import typer
app = typer.Typer()
console = Console()
DEFAULT_SOFT_DELETE_DIR = "soft-delete/"
def human_readable_size(size: int) -> str:
"""Convert bytes to a human-readable format."""
for unit in ["B", "KB", "MB", "GB", "TB"]:
if size < 1024:
return f"{size:.2f} {unit}"
size /= 1024
return f"{size:.2f} PB"
def calculate_file_age(last_modified: datetime) -> str:
"""Calculate the age of the file from the current time."""
now = datetime.now(timezone.utc)
age = now - last_modified
days = age.days
if days > 0:
return f"{days} days"
hours, remainder = divmod(age.seconds, 3600)
minutes = remainder // 60
if hours > 0:
return f"{hours} hours"
return f"{minutes} minutes"
@app.command()
def clean(
bucket_name: str = typer.Argument(..., help="The name of the S3 bucket."),
substring: str = typer.Argument(
..., help="The substring to search for in file names."
),
dry_run: bool = typer.Option(
False, help="If set, no files will be deleted; only listed."
),
soft_delete: bool = typer.Option(
False,
help="If set, files will be moved to a soft delete directory instead of permanently deleted.",
),
soft_delete_dir: str = typer.Option(
DEFAULT_SOFT_DELETE_DIR, help="The directory to move soft-deleted files to."
),
max_age: int = typer.Option(None, help="Maximum age of files to include, in days."),
min_age: int = typer.Option(None, help="Minimum age of files to include, in days."),
):
"""Delete or soft-delete files in an S3 bucket that contain a specific substring in their names."""
s3 = boto3.client("s3")
try:
# List all objects in the bucket
response = s3.list_objects_v2(Bucket=bucket_name)
if "Contents" not in response:
console.print("[bold yellow]No files found in the bucket.[/bold yellow]")
return
now = datetime.now(timezone.utc)
def file_within_age_range(obj):
file_age = (now - obj["LastModified"]).days
if max_age is not None and file_age > max_age:
return False
if min_age is not None and file_age < min_age:
return False
return True
files_to_process = [
obj
for obj in response["Contents"]
if substring in obj["Key"] and file_within_age_range(obj)
]
if not files_to_process:
console.print(
"[bold yellow]No files matching criteria found in the bucket.[/bold yellow]"
)
return
if dry_run:
console.print(
"[bold blue]Dry run mode: The following files would be processed:[/bold blue]"
)
table = Table(title="Files to be Processed")
table.add_column("File Name", style="cyan", no_wrap=True)
table.add_column("Size", style="magenta", justify="right")
table.add_column("Age", style="green", justify="right")
table.add_column("Action", style="yellow", no_wrap=True)
total_size = 0
for obj in files_to_process:
human_size = human_readable_size(obj["Size"])
file_age = calculate_file_age(obj["LastModified"])
action = (
f"[bold yellow]-> {soft_delete_dir}{obj['Key']}[/bold yellow]"
if soft_delete
else "[bold red]DELETE[/bold red]"
)
table.add_row(obj["Key"], human_size, file_age, action)
total_size += obj["Size"]
console.print(table)
console.print(
f"\n[bold green]Summary:[/bold green] {len(files_to_process)} files, Total Size: {human_readable_size(total_size)}, Operation: {'Soft Delete' if soft_delete else 'Delete'}"
)
else:
for obj in files_to_process:
file_key = obj["Key"]
if soft_delete:
copy_source = {"Bucket": bucket_name, "Key": file_key}
new_key = f"{soft_delete_dir}{file_key}"
s3.copy_object(
CopySource=copy_source, Bucket=bucket_name, Key=new_key
)
s3.delete_object(Bucket=bucket_name, Key=file_key)
console.print(
f"[bold yellow]Moved to soft-delete:[/bold yellow] {file_key} -> {new_key}"
)
else:
s3.delete_object(Bucket=bucket_name, Key=file_key)
console.print(f"[bold red]Deleted:[/bold red] {file_key}")
console.print(
f"[bold green]Operation completed. {len(files_to_process)} files processed. Operation: {'Soft Delete' if soft_delete else 'Delete'}[/bold green]"
)
except Exception as e:
console.print(f"[bold red]Error:[/bold red] {e}", style="red")
@app.command()
def clear_soft_delete(
bucket_name: str = typer.Argument(..., help="The name of the S3 bucket."),
soft_delete_dir: str = typer.Option(
DEFAULT_SOFT_DELETE_DIR, help="The soft delete directory to clear."
),
dry_run: bool = typer.Option(
False, help="If set, no files will be deleted; only listed."
),
):
"""Remove all files in the soft delete directory."""
s3 = boto3.client("s3")
try:
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=soft_delete_dir)
if "Contents" not in response:
console.print(
f"[bold yellow]No files found in the soft delete directory '{soft_delete_dir}'.[/bold yellow]"
)
return
files_to_delete = [obj for obj in response["Contents"]]
if dry_run:
console.print(
"[bold blue]Dry run mode: The following files would be deleted from the soft delete directory:[/bold blue]"
)
table = Table(title="Files to be Deleted")
table.add_column("File Name", style="cyan", no_wrap=True)
table.add_column("Size", style="magenta", justify="right")
total_size = 0
for obj in files_to_delete:
human_size = human_readable_size(obj["Size"])
table.add_row(obj["Key"], human_size)
total_size += obj["Size"]
console.print(table)
console.print(
f"\n[bold green]Summary:[/bold green] {len(files_to_delete)} files, Total Size: {human_readable_size(total_size)}"
)
else:
for file_key in files_to_delete:
file_key = file_key["Key"]
s3.delete_object(Bucket=bucket_name, Key=file_key)
console.print(
f"[bold red]Deleted from soft-delete:[/bold red] {file_key}"
)
console.print(
f"[bold green]Soft delete directory '{soft_delete_dir}' cleared. {len(files_to_delete)} files removed.[/bold green]"
)
except Exception as e:
console.print(f"[bold red]Error:[/bold red] {e}", style="red")
console.print("[bold red]Operation failed.[/bold red]")
console.print(traceback.format_exc())
@app.command()
def undo_soft_delete(
bucket_name: str = typer.Argument(..., help="The name of the S3 bucket."),
soft_delete_dir: str = typer.Option(
DEFAULT_SOFT_DELETE_DIR, help="The soft delete directory to restore from."
),
dry_run: bool = typer.Option(
False, help="If set, no files will be restored; only listed."
),
):
"""Restore all files from the soft delete directory to their original locations."""
s3 = boto3.client("s3")
try:
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=soft_delete_dir)
if "Contents" not in response:
console.print(
f"[bold yellow]No files found in the soft delete directory '{soft_delete_dir}'.[/bold yellow]"
)
return
files_to_restore = [obj for obj in response["Contents"]]
if dry_run:
console.print(
"[bold blue]Dry run mode: The following files would be restored:[/bold blue]"
)
table = Table(title="Files to be Restored")
table.add_column("File Name", style="cyan", no_wrap=True)
table.add_column("Size", style="magenta", justify="right")
table.add_column("Original Location", style="green", no_wrap=True)
total_size = 0
for obj in files_to_restore:
human_size = human_readable_size(obj["Size"])
original_key = obj["Key"][len(soft_delete_dir) :]
table.add_row(obj["Key"], human_size, original_key)
total_size += obj["Size"]
console.print(table)
console.print(
f"\n[bold green]Summary:[/bold green] {len(files_to_restore)} files, Total Size: {human_readable_size(total_size)}"
)
else:
for obj in files_to_restore:
file_key = obj["Key"]
original_key = file_key[
len(soft_delete_dir) :
] # Remove the soft delete prefix
copy_source = {"Bucket": bucket_name, "Key": file_key}
s3.copy_object(
CopySource=copy_source, Bucket=bucket_name, Key=original_key
)
s3.delete_object(Bucket=bucket_name, Key=file_key)
console.print(
f"[bold green]Restored:[/bold green] {file_key} -> {original_key}"
)
console.print(
f"[bold green]Restoration completed. {len(files_to_restore)} files restored.[/bold green]"
)
except Exception as e:
console.print(f"[bold red]Error:[/bold red] {e}", style="red")
if name == "main":
app()
full help text output
Here is the help text for all of the commands in the script.
dropper on main [!?] is 📦 v1.0.0 via v22.13.0 v3.11.10 on (us-east-1) NO PYTHON VENV SET took 7s
⬢ [devtainer] ❯ ./scripts/s3_cleanup.py --help
Usage: s3_cleanup.py [OPTIONS] COMMAND [ARGS]...
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --install-completion Install completion for the current shell. │
│ --show-completion Show completion for the current shell, to copy it or customize the installation. │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Commands ──────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ clean Delete or soft-delete files in an S3 bucket that contain a specific substring in their │
│ names. │
│ clear-soft-delete Remove all files in the soft delete directory. │
│ undo-soft-delete Restore all files from the soft delete directory to their original locations. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
⬢ [devtainer] ❯ ./scripts/s3_cleanup.py clean --help
Usage: s3_cleanup.py clean [OPTIONS] BUCKET_NAME SUBSTRING
Delete or soft-delete files in an S3 bucket that contain a specific substring in their names.
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * bucket_name TEXT The name of the S3 bucket. [default: None] [required] │
│ * substring TEXT The substring to search for in file names. [default: None] [required] │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --dry-run --no-dry-run If set, no files will be deleted; only listed. │
│ [default: no-dry-run] │
│ --soft-delete --no-soft-delete If set, files will be moved to a soft delete directory │
│ instead of permanently deleted. │
│ [default: no-soft-delete] │
│ --soft-delete-dir TEXT The directory to move soft-deleted files to. │
│ [default: soft-delete/] │
│ --max-age INTEGER Maximum age of files to include, in days. [default: None] │
│ --min-age INTEGER Minimum age of files to include, in days. [default: None] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
⬢ [devtainer] ❯ ./scripts/s3_cleanup.py clear-soft-delete --help
Usage: s3_cleanup.py clear-soft-delete [OPTIONS] BUCKET_NAME
Remove all files in the soft delete directory.
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * bucket_name TEXT The name of the S3 bucket. [default: None] [required] │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --soft-delete-dir TEXT The soft delete directory to clear. [default: soft-delete/] │
│ --dry-run --no-dry-run If set, no files will be deleted; only listed. [default: no-dry-run] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
⬢ [devtainer] ❯ ./scripts/s3_cleanup.py undo-soft-delete --help
Usage: s3_cleanup.py undo-soft-delete [OPTIONS] BUCKET_NAME
Restore all files from the soft delete directory to their original locations.
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * bucket_name TEXT The name of the S3 bucket. [default: None] [required] │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --soft-delete-dir TEXT The soft delete directory to restore from. [default: soft-delete/] │
│ --dry-run --no-dry-run If set, no files will be restored; only listed. │
│ [default: no-dry-run] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯