#!/usr/bin/env python

import sys, os
from bs4 import BeautifulSoup
import json
import urllib
from optparse import OptionParser


__author__  = "Prayush Kumar <prayush@astro.cornell.edu>"

PROGRAM_NAME = os.path.abspath(sys.argv[0])

### option parsing ###

parser = OptionParser(
    usage   = "%prog [OPTIONS]",
    description = "Converts an HTML-converted Jupyter notebook back to a Jupyter notebook." )

parser.add_option("-i", "--html", metavar='file', help='HTML URL')
parser.add_option("-o", "--nb", metavar='file', help='Output NB Name')
options, argv_frame_files = parser.parse_args()

##

url = options.nb
response = urllib.urlopen(url)
#  for local html file
# response = open("/Users/note/jupyter/notebook.html")
text = response.read()

soup = BeautifulSoup(text, 'lxml')
# see some of the html
print(soup.div)
dictionary = {'nbformat': 4, 'nbformat_minor': 1, 'cells': [], 'metadata': {}}
for d in soup.findAll("div"):
    if 'class' in d.attrs.keys():
        for clas in d.attrs["class"]:
            if clas in ["text_cell_render", "input_area"]:
                # code cell
                if clas == "input_area":
                    cell = {}
                    cell['metadata'] = {}
                    cell['outputs'] = []
                    cell['source'] = [d.get_text()]
                    cell['execution_count'] = None
                    cell['cell_type'] = 'code'
                    dictionary['cells'].append(cell)

                else:
                    cell = {}
                    cell['metadata'] = {}

                    cell['source'] = [d.decode_contents()]
                    cell['cell_type'] = 'markdown'
                    dictionary['cells'].append(cell)
open('notebook.ipynb', 'w').write(json.dumps(dictionary))
