import os.path
import re
from pathlib import Path

import argparse
import numpy as np


def get_file_export_content(export_file_path: str) -> str:
    with open(export_file_path) as file:
        return file.read()


def get_images_from_export_file(file_content: str) -> list[str]:
    normal_images = re.findall('__GHOST_URL__/(content/images/[\d]{4}/[\d]{2}/[-\w.]+.[a-zA-Z]{2,4})', file_content)
    resized_images = re.findall('__GHOST_URL__/(content/images/size/w[a-zA-Z0-9]+/[\d]{4}/[\d]{2}/[-\w.]+.[a-zA-Z]{2,4})', file_content)
    return normal_images + resized_images


# Returns the relative location of all the images stored in the Ghost golder ("content/images/...")
def get_sanitized_images_location_from_folder(ghost_install_dir: str) -> list[str]:

    if not os.path.exists(ghost_install_dir):
        raise FileNotFoundError

    images_dir = f"{ghost_install_dir}/content/images/"
    posix_paths = [path for path in Path(images_dir).rglob("*") if not os.path.isdir(path)]
    # We want to remove the Ghost path so what is left is only the relative path
    # e.g. /opt/ghost/content/images/2022/01/img.png -> content/images/2022/01/img.png
    to_remove_from_path = ghost_install_dir.rstrip('/').replace('//', '/') + "/"
    return list(map(lambda posix_path: str(posix_path).replace(to_remove_from_path, ''), posix_paths))


def get_unused_images(images_in_dir: list[str], images_in_export: list[str]) -> list[str]:
    return np.setdiff1d(images_in_dir, images_in_export).tolist()


def get_absent_images(images_in_dir: list[str], images_in_export: list[str]) -> list[str]:
    return np.setdiff1d(images_in_export, images_in_dir).tolist()


def get_arguments_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="cleanghostimages",
        description="A tool to find un-used images stored by your Ghost blog installation."
    )
    parser.add_argument("--json-export-file", type=str, required=True, help="The json export generated by Ghost")
    parser.add_argument("--ghost-dir", type=str, required=True, help="Your Ghost installation directory")
    parser.add_argument("--print-missing-images", type=bool, action=argparse.BooleanOptionalAction)
    parser.add_argument("--statistics", type=bool, action=argparse.BooleanOptionalAction, help="Displays information about the number of images.")
    parser.add_argument("--print-unused-images", type=bool, action=argparse.BooleanOptionalAction)
    return parser


def execute():
    parser = get_arguments_parser()
    args = parser.parse_args()

    images_in_export = get_images_from_export_file(get_file_export_content(args.json_export_file))
    images_in_dir = get_sanitized_images_location_from_folder(args.ghost_dir)

    unused_files = get_unused_images(images_in_dir, images_in_export)
    absent_files = get_absent_images(images_in_dir, images_in_export)

    if args.statistics:
        print(f"Found a total of {len(images_in_export)} images in the export json.")
        print(f"Found a total of {len(images_in_dir)} images in the ghost content directory.")
        print(f"{len(unused_files)} images are not used, and {len(absent_files)} are missing.")

    if args.print_missing_images:
        print("The following images are missing:")
        for image in absent_files:
            print(image)

    if args.print_unused_images:
        print("The following images are not used:")
        for image in unused_files:
            print(image)
    return 0


if __name__ == "__main__":
    execute()
