# AUTOGENERATED! DO NOT EDIT! File to edit: ../../notebooks/02_preprocess_data.ipynb.

# %% auto 0
__all__ = ['process_data']

# %% ../../notebooks/02_preprocess_data.ipynb 8
import os
import requests
import subprocess
import logging
import hydra, omegaconf

from .download import download_and_move_datasets
from .utils import *

# %% ../../notebooks/02_preprocess_data.ipynb 35
@hydra.main(config_path="../../configs", config_name="config", version_base="1.2")
def process_data(cfg):
    """Download dataset, removes unsupported and corrupted images, and splits data into train, val and test.

    Parameters
    ----------
    cfg : cfg (omegaconf.DictConfig):
        Hydra Configuration
    """
    download_configs()
    
    download_and_move_datasets()
    move_to_processed()

    print("\n\nFiles other than jpg and png.\n")
    files, _ = find_filepaths('data/2_processed/')
    print('\n'.join(list(filter(lambda x: not x.endswith('jpg') and not x.endswith('png'), files))))

    print("\nFile types before cleaning:")
    get_value_counts("data/2_processed")

    clean_images(cfg)

    print("\nFile types after cleaning:")
    get_value_counts("data/2_processed")

    print("\nCounts of classes:\n")
    get_value_counts("data/2_processed", column="class")

    sampling(cfg)

