mirror of
https://github.com/vegu-ai/talemate.git
synced 2025-12-15 19:27:47 +01:00
186 lines
6.0 KiB
Python
186 lines
6.0 KiB
Python
import os
|
|
import re
|
|
import subprocess
|
|
import argparse
|
|
|
|
|
|
def find_image_references(md_file):
|
|
"""Find all image references in a markdown file."""
|
|
with open(md_file, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
pattern = r"!\[.*?\]\((.*?)\)"
|
|
matches = re.findall(pattern, content)
|
|
|
|
cleaned_paths = []
|
|
for match in matches:
|
|
path = match.lstrip("/")
|
|
if "img/" in path:
|
|
path = path[path.index("img/") + 4 :]
|
|
# Only keep references to versioned images
|
|
parts = os.path.normpath(path).split(os.sep)
|
|
if len(parts) >= 2 and parts[0].replace(".", "").isdigit():
|
|
cleaned_paths.append(path)
|
|
|
|
return cleaned_paths
|
|
|
|
|
|
def scan_markdown_files(docs_dir):
|
|
"""Recursively scan all markdown files in the docs directory."""
|
|
md_files = []
|
|
for root, _, files in os.walk(docs_dir):
|
|
for file in files:
|
|
if file.endswith(".md"):
|
|
md_files.append(os.path.join(root, file))
|
|
return md_files
|
|
|
|
|
|
def find_all_images(img_dir):
|
|
"""Find all image files in version subdirectories."""
|
|
image_files = []
|
|
for root, _, files in os.walk(img_dir):
|
|
# Get the relative path from img_dir to current directory
|
|
rel_dir = os.path.relpath(root, img_dir)
|
|
|
|
# Skip if we're in the root img directory
|
|
if rel_dir == ".":
|
|
continue
|
|
|
|
# Check if the immediate parent directory is a version number
|
|
parent_dir = rel_dir.split(os.sep)[0]
|
|
if not parent_dir.replace(".", "").isdigit():
|
|
continue
|
|
|
|
for file in files:
|
|
if file.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".svg")):
|
|
rel_path = os.path.relpath(os.path.join(root, file), img_dir)
|
|
image_files.append(rel_path)
|
|
return image_files
|
|
|
|
|
|
def grep_check_image(docs_dir, image_path):
|
|
"""
|
|
Check if versioned image is referenced anywhere using grep.
|
|
Returns True if any reference is found, False otherwise.
|
|
"""
|
|
try:
|
|
# Split the image path to get version and filename
|
|
parts = os.path.normpath(image_path).split(os.sep)
|
|
version = parts[0] # e.g., "0.29.0"
|
|
filename = parts[-1] # e.g., "world-state-suggestions-2.png"
|
|
|
|
# For versioned images, require both version and filename to match
|
|
version_pattern = f"{version}.*{filename}"
|
|
try:
|
|
result = subprocess.run(
|
|
["grep", "-r", "-l", version_pattern, docs_dir],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if result.stdout.strip():
|
|
print(
|
|
f"Found reference to {image_path} with version pattern: {version_pattern}"
|
|
)
|
|
return True
|
|
except subprocess.CalledProcessError:
|
|
pass
|
|
|
|
except Exception as e:
|
|
print(f"Error during grep check for {image_path}: {e}")
|
|
|
|
return False
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Find and optionally delete unused versioned images in MkDocs project"
|
|
)
|
|
parser.add_argument(
|
|
"--docs-dir", type=str, required=True, help="Path to the docs directory"
|
|
)
|
|
parser.add_argument(
|
|
"--img-dir", type=str, required=True, help="Path to the images directory"
|
|
)
|
|
parser.add_argument("--delete", action="store_true", help="Delete unused images")
|
|
parser.add_argument(
|
|
"--verbose", action="store_true", help="Show all found references and files"
|
|
)
|
|
parser.add_argument(
|
|
"--skip-grep", action="store_true", help="Skip the additional grep validation"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
# Convert paths to absolute paths
|
|
docs_dir = os.path.abspath(args.docs_dir)
|
|
img_dir = os.path.abspath(args.img_dir)
|
|
|
|
print(f"Scanning markdown files in: {docs_dir}")
|
|
print(f"Looking for versioned images in: {img_dir}")
|
|
|
|
# Get all markdown files
|
|
md_files = scan_markdown_files(docs_dir)
|
|
print(f"Found {len(md_files)} markdown files")
|
|
|
|
# Collect all image references
|
|
used_images = set()
|
|
for md_file in md_files:
|
|
refs = find_image_references(md_file)
|
|
used_images.update(refs)
|
|
|
|
# Get all actual images (only from version directories)
|
|
all_images = set(find_all_images(img_dir))
|
|
|
|
if args.verbose:
|
|
print("\nAll versioned image references found in markdown:")
|
|
for img in sorted(used_images):
|
|
print(f"- {img}")
|
|
|
|
print("\nAll versioned images in directory:")
|
|
for img in sorted(all_images):
|
|
print(f"- {img}")
|
|
|
|
# Find potentially unused images
|
|
unused_images = all_images - used_images
|
|
|
|
# Additional grep validation if not skipped
|
|
if not args.skip_grep and unused_images:
|
|
print("\nPerforming additional grep validation...")
|
|
actually_unused = set()
|
|
for img in unused_images:
|
|
if not grep_check_image(docs_dir, img):
|
|
actually_unused.add(img)
|
|
|
|
if len(actually_unused) != len(unused_images):
|
|
print(
|
|
f"\nGrep validation found {len(unused_images) - len(actually_unused)} additional image references!"
|
|
)
|
|
unused_images = actually_unused
|
|
|
|
# Report findings
|
|
print("\nResults:")
|
|
print(f"Total versioned images found: {len(all_images)}")
|
|
print(f"Versioned images referenced in markdown: {len(used_images)}")
|
|
print(f"Unused versioned images: {len(unused_images)}")
|
|
|
|
if unused_images:
|
|
print("\nUnused versioned images:")
|
|
for img in sorted(unused_images):
|
|
print(f"- {img}")
|
|
|
|
if args.delete:
|
|
print("\nDeleting unused versioned images...")
|
|
for img in unused_images:
|
|
full_path = os.path.join(img_dir, img)
|
|
try:
|
|
os.remove(full_path)
|
|
print(f"Deleted: {img}")
|
|
except Exception as e:
|
|
print(f"Error deleting {img}: {e}")
|
|
print("\nDeletion complete")
|
|
else:
|
|
print("\nNo unused versioned images found!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|