# AUTOGENERATED! DO NOT EDIT! File to edit: nbs/utils.ipynb (unless otherwise specified).

__all__ = ['PathOrStr', 'ListOfPaths', 'ListOfStrings', 'ls', 'get_files', 'get_image_files', 'get_video_files',
           'image_extensions', 'video_extensions', 'flatten', 'mkdir', 'uniqueify', 'clean_filename']

# Cell
from pathlib import Path
from typing import Union,Dict,List,Tuple,Any,Optional,Collection

import shutil
import re
import json
import os
import mimetypes

# Cell
PathOrStr     = Union[str,Path]
ListOfPaths   = Collection[Path]
ListOfStrings = Collection[str]

# Cell
def ls(path:Path, list_hidden=False):
    "List files, while hiding hidden files or directories by default"
    if list_hidden: return list(path.iterdir())
    else:
        return [p for p in path.iterdir() if not p.name.startswith('.')]

Path.ls       = lambda x,hidden=False: ls(x,hidden)
Path.listdirs = lambda x: sorted([p for p in x.ls() if p.is_dir()])

# Cell
def get_files(path:PathOrStr, extensions:Collection[str]=None, recurse:bool=False, exclude:Optional[Collection[str]]=None,
              include:Optional[Collection[str]]=None, presort:bool=False, followlinks:bool=False) -> ListOfPaths:
    """
    Return list of files in `path` that have a suffix in `extensions`; optionally `recurse`.
    Use `include` and `exclude` for including/excluding folder names, `presort` to sort.
    """
    if recurse:
        res = []
        for i,(p,d,f) in enumerate(os.walk(path, followlinks=followlinks)):
            # skip hidden dirs
            if include is not None and i==0:   d[:] = [o for o in d if o in include]
            elif exclude is not None and i==0: d[:] = [o for o in d if o not in exclude]
            else:                              d[:] = [o for o in d if not o.startswith('.')]
            res += _get_files(path, p, f, extensions)
        if presort: res = sorted(res, key=lambda p: _path_to_same_str(p), reverse=False)
        return res
    else:
        f = [o.name for o in os.scandir(path) if o.is_file()]
        res = _get_files(path, path, f, extensions)
        if presort: res = sorted(res, key=lambda p: _path_to_same_str(p), reverse=False)
        return res

def _path_to_same_str(p_fn):
    "path -> str, but same on nt+posix, for alpha-sort only"
    s_fn = str(p_fn)
    s_fn = s_fn.replace('\\','.')
    s_fn = s_fn.replace('/','.')
    return s_fn

def _get_files(parent, p, f, extensions):
    p = Path(p)#.relative_to(parent)
    if isinstance(extensions,str): extensions = [extensions]
    low_extensions = [e.lower() for e in extensions] if extensions is not None else None
    res = [p/o for o in f if not o.startswith('.')
           and (extensions is None or f'.{o.split(".")[-1].lower()}' in low_extensions)]
    return res

# Cell
image_extensions = set(k for k,v in mimetypes.types_map.items() if v.startswith('image/'))
video_extensions = set([k for k,v in mimetypes.types_map.items() if v.startswith('video/')] + ['.mkv'])

def get_image_files(path:PathOrStr, include:Optional[ListOfStrings]=None, exclude:Optional[ListOfStrings]=None, recurse:bool=True):
    return get_files(path=path, include=include, exclude=exclude, recurse=recurse, extensions=image_extensions)

def get_video_files(path:PathOrStr, include:Optional[ListOfStrings]=None, exclude:Optional[ListOfStrings]=None, recurse:bool=True):
    return get_files(path=path, include=include, exclude=exclude, recurse=recurse, extensions=video_extensions)

# Cell
def flatten(x:list):
    flattened_list = []
    for item in x:
        if isinstance(item, (tuple,list)):
            [flattened_list.append(i) for i in item]
        else:
            flattened_list.append(item)
    return flattened_list

# Cell
mkdir     = lambda x: x.mkdir(exist_ok=True)
uniqueify = lambda x: sorted(list(set(x)))

# Cell
def clean_filename(f, truncate=True, prefix=None, to_lower=True):
    import re

    f = Path(f)
    fractions  = '\u00BC-\u00BE\u2150-\u215E' # not-exhaustive..?
    supscripts = '\u00B1-\u00B9'

    fname_new = re.sub(f'[\W{supscripts}{fractions}]', '_', f.stem) # captures (?) subscripts, fractions, other non-alphanumerics
    fname_new = re.sub(f'[^A-Za-z0-9_+]', '_', fname_new)           # captures alphabets in foreign languages
    fname_new = re.sub('^[\W_]*'     , '' , fname_new)              # replace leading spl characters or '_'

    if truncate:
        if len(fname_new) > 200: fname_new = fname_new[:200]
    if prefix:
        fname_new = f"{prefix}_{fname_new}"
    if to_lower: fname_new = fname_new.lower()

    return fname_new