#!/usr/bin/env python

from __future__ import print_function

import glob
import sys
import os
import signal
import argparse

import matplotlib
matplotlib.use("AGG")
import matplotlib.pyplot as plt
import numpy as np
# sub-namespaces have been deprecated, but we like to use them anyway
from scipy import ndimage as interpolation

import ocrolib
from ocrolib import morph

signal.signal(signal.SIGINT,lambda *args:sys.exit(1))

parser = argparse.ArgumentParser(description = """
Generate HTML for debugging a book directory.

Input: a directory in standard OCRopus book format
Output: index.html files and thumbnails showing recognition results
""")
parser.add_argument("book",default="book")
parser.add_argument("-N","--npages",type=int,default=100000,help="max number of pages, default: %(default)s")
args = parser.parse_args()


def write_cseg(stream,cseg_file):
    cseg = ocrolib.read_line_segmentation(cseg_file)
    cseg = ocrolib.read_line_segmentation(cseg_file)
    csegs = linerec.extract_csegs(cseg)
    stream.write("<table><tr>")
    for i,c in enumerate(csegs):
        out = ".__"+cseg_file+"_%03d.png"%i
        plt.imsave(out,np.amax(c.img)-c.img,cmap=plt.cm.gray)
        stream.write("<td><img src=%s height=%d style='border: 1px #ccccff solid;'></td>"%(out,max(2,c.img.shape[0]/2)))
    stream.write("</tr></table>")
    stream.write("\n")


def genpage(d):
    print("===", d)
    here = os.getcwd()
    try:
        os.chdir(d)
        with open("index.html","w") as stream:
            stream.write("<h1>%s</h1>\n"%d)
            images = sorted(glob.glob("??????.bin.png"))
            for img in images:
                txt = ocrolib.fvariant(img,"txt","")
                if os.path.exists(txt):
                    with open(txt) as tf: text = tf.read()
                    stream.write("<font color='#000066'><b>%s</b></font><br>\n"%text)
                rtxt = ocrolib.fvariant(img,"txt","raw")
                if os.path.exists(rtxt):
                    with open(rtxt) as tf: rtext = tf.read()
                    stream.write("<font color='gray'><b>%s</b></font><br>\n"%rtext)
                stream.write("<p />\n")
                image = ocrolib.read_image_gray(img)
                stream.write("<img width='%d' src='%s'>\n"%(max(10,image.shape[1]/2),img))
                stream.write("<br />\n")
                stream.write("<font size=-2>")
                stream.write("<a href=%s>%s</a> / "%("..",args.book))
                stream.write("<a href=%s>%s</a> / "%("../"+d,d))
                stream.write("<a href=%s>%s</a>"%(img,img))
                stream.write("</font>")
                stream.write("<p />\n")
                cseg = ocrolib.fvariant(img,"cseg")
                if os.path.exists(cseg):
                    write_cseg(stream,cseg)
                rseg_file = ocrolib.fvariant(img,"rseg")
                if os.path.exists(rseg_file):
                    rseg = ocrolib.read_line_segmentation(rseg_file)
                    plt.figure(figsize=(20,1),dpi=150)
                    morph.showlabels(rseg)
                    figfile = ".__"+rseg_file+"_.png"
                    plt.savefig(figfile)
                    stream.write("<img height='50' src='%s'><br>\n"%figfile)
                stream.write("<hr>\n")
    finally:
        os.chdir(here)

os.chdir(args.book)
with open("index.html","w") as stream:
    for d in sorted(glob.glob("????"))[:args.npages]:
        genpage(d)
        if os.path.exists(d+".bin.png"):
            image = ocrolib.read_image_gray(d+".bin.png")
        else:
            image = np.zeros((300,300))
        out = ".__"+d+".png"
        image = interpolation.zoom(image,(0.125,0.125),order=1)
        plt.imsave(out,image,cmap=plt.cm.gray)
        stream.write("<table border=1><tr>\n")
        stream.write("<td>")
        stream.write("<a href='%s/index.html'><img src='%s'></a>"%(d,out))
        stream.write("<br>%s<br>"%d)
        stream.write("</td>\n")
        stream.write("<td>")
        count = 0
        for fname in sorted(glob.glob(d+"/??????.txt")):
            with open(fname) as tf: s = tf.read()
            if len(s)<20: continue
            stream.write("%s<br>\n"%s[:100])
            count += 1
            if count>=10: break
        stream.write("</td>\n")
        stream.write("</tr></table>\n")
