import json
import argparse
import os

def create_notebook_structure():
    return {
        "cells": [],
        "metadata": {
            "kernelspec": {
                "display_name": "Python 3",
                "language": "python",
                "name": "python3"
            },
            "language_info": {
                "codemirror_mode": {
                    "name": "ipython",
                    "version": 3
                },
                "file_extension": ".py",
                "mimetype": "text/x-python",
                "name": "python",
                "nbconvert_exporter": "python",
                "pygments_lexer": "ipython3",
                "version": "3.8.5"
            }
        },
        "nbformat": 4,
        "nbformat_minor": 4
    }

def create_code_cell(source):
    return {
        "cell_type": "code",
        "execution_count": None,
        "metadata": {},
        "outputs": [],
        "source": source.splitlines(keepends=True)
    }

def create_markdown_cell(source):
    return {
        "cell_type": "markdown",
        "metadata": {},
        "source": source.splitlines(keepends=True)
    }

GDOWN_TEMPLATE = """import pandas as pd
import missingno as msno
import os, io, shutil, re
import gdown
import csv

OUTFILE = '{outfile}'
FILE_ID = '{file_id}'

def is_html_file(path, n=512):
    if not os.path.exists(path):
        return False
    with open(path, 'r', encoding='utf-8', errors='ignore') as f:
        start = f.read(n)
    return bool(re.search(r'<!DOCTYPE html|<html', start, re.IGNORECASE))

def download_from_drive_id(file_id, out=OUTFILE):
    url = f'https://drive.google.com/uc?export=download&id={file_id}'
    print('Downloading from', url)
    gdown.download(url, out, quiet=False)
    return out

# Ensure file present and valid
if os.path.exists(OUTFILE):
    if is_html_file(OUTFILE):
        print(OUTFILE, 'looks like HTML — removing and re-downloading')
        os.remove(OUTFILE)
        download_from_drive_id(FILE_ID)
    else:
        print(OUTFILE, 'exists and looks like CSV')
else:
    download_from_drive_id(FILE_ID)

# Quick peek into the downloaded file (first 1000 chars)
with open(OUTFILE, 'r', encoding='utf-8', errors='ignore') as f:
    preview = f.read(1000)
print(preview[:1000])
"""

def main():
    parser = argparse.ArgumentParser(description="Generate a Jupyter Notebook with data loading scripts.")
    parser.add_argument("filename", help="Output notebook filename (e.g., my_notebook.ipynb)")
    parser.add_argument("--drive-id", default="PLACEHOLDER_FILE_ID", help="Google Drive File ID")
    parser.add_argument("--outfile", default="data.csv", help="Output filename for the downloaded file")
    
    parser.add_argument("-k", "--kaggle", action="store_true", help="Enable Kaggle setup")
    parser.add_argument("--kaggle-user", help="Kaggle Username")
    parser.add_argument("--kaggle-key", help="Kaggle API Key")
    parser.add_argument("--competition", default="catechol-benchmark-hackathon", help="Kaggle competition name")

    args = parser.parse_args()

    nb = create_notebook_structure()

    # Kaggle Section
    if args.kaggle:
        nb["cells"].append(create_markdown_cell("# Kaggle Setup"))
        nb["cells"].append(create_code_cell("!pip install kaggle"))
        
        if args.kaggle_user and args.kaggle_key:
            creds_script = f"""import os
os.environ['KAGGLE_USERNAME'] = "{args.kaggle_user}"
os.environ['KAGGLE_KEY'] = "{args.kaggle_key}"
"""
            nb["cells"].append(create_code_cell(creds_script))
        else:
             nb["cells"].append(create_code_cell("""import os
# TODO: Enter your Kaggle credentials here
os.environ['KAGGLE_USERNAME'] = "YOUR_USERNAME"
os.environ['KAGGLE_KEY'] = "YOUR_KEY"
"""))
        
        nb["cells"].append(create_code_cell(f"!kaggle competitions download -c {args.competition}"))

    # Drive Section
    nb["cells"].append(create_markdown_cell("# Google Drive Download"))
    
    # Fill template
    script = GDOWN_TEMPLATE.format(outfile=args.outfile, file_id=args.drive_id)
    nb["cells"].append(create_code_cell(script))

    # Write file
    if not args.filename.endswith('.ipynb'):
        args.filename += '.ipynb'
        
    with open(args.filename, 'w', encoding='utf-8') as f:
        json.dump(nb, f, indent=2)
    
    print(f"Notebook created: {args.filename}")

if __name__ == "__main__":
    main()
