/*
 * Decompiled with CFR 0.152.
 */
package zingg.block;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.ml.util.SchemaUtils;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructType;
import scala.collection.JavaConversions;
import scala.collection.Seq;
import zingg.block.Canopy;
import zingg.block.Tree;
import zingg.client.FieldDefinition;
import zingg.client.util.ListMap;
import zingg.hash.HashFunction;

public class Block
implements Serializable {
    public static final Log LOG = LogFactory.getLog(Block.class);
    protected Dataset<Row> dupes;
    ListMap<DataType, HashFunction> functionsMap;
    long maxSize;
    Dataset<Row> training;
    protected ListMap<HashFunction, String> childless;

    protected Block(Dataset<Row> training, Dataset<Row> dupes) {
        this.training = training;
        this.dupes = dupes;
        this.childless = new ListMap();
    }

    public Block(Dataset<Row> training, Dataset<Row> dupes, ListMap<DataType, HashFunction> functionsMap, long maxSize) {
        this(training, dupes);
        this.functionsMap = functionsMap;
        this.maxSize = maxSize;
    }

    public Dataset<Row> getDupes() {
        return this.dupes;
    }

    public void setDupes(Dataset<Row> dupes) {
        this.dupes = dupes;
    }

    public long getMaxSize() {
        return this.maxSize;
    }

    public void setMaxSize(long maxSize) {
        this.maxSize = maxSize;
    }

    public Map<DataType, List<HashFunction>> getFunctionsMap() {
        return this.functionsMap;
    }

    protected void setFunctionsMap(ListMap<DataType, HashFunction> m) {
        this.functionsMap = m;
    }

    public Canopy getNodeFromCurrent(Canopy node, HashFunction function, FieldDefinition context) {
        Canopy trial = new Canopy();
        trial = node.copyTo(trial);
        trial.function = function;
        trial.context = context;
        return trial;
    }

    public Canopy getBestNode(Tree<Canopy> tree, Canopy parent, Canopy node, List<FieldDefinition> fieldsOfInterest) throws Exception {
        long least = Long.MAX_VALUE;
        boolean maxElimination = false;
        Canopy best = null;
        Iterator<FieldDefinition> iterator = fieldsOfInterest.iterator();
        block0: while (iterator.hasNext()) {
            FieldDefinition field;
            FieldDefinition context = field = iterator.next();
            if (least == 0L) break;
            List functions2 = (List)this.functionsMap.get(field.getDataType());
            if (functions2 == null) continue;
            for (HashFunction function : functions2) {
                if (least == 0L) continue block0;
                if (this.isFunctionUsed(tree, node, field.fieldName, function)) continue;
                LOG.debug("Evaluating field " + field.fieldName + " and function " + function + " for " + field.dataType);
                Canopy trial = this.getNodeFromCurrent(node, function, context);
                trial.estimateElimCount();
                long elimCount = trial.getElimCount();
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Elim Count is " + elimCount + " ,least is " + least + ", dupe count " + node.dupeN.size());
                }
                if (least <= elimCount) continue;
                long childrenSize = trial.estimateCanopies();
                if (childrenSize > 1L) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Yes, this fn has potential " + function);
                    }
                    least = elimCount;
                    best = trial;
                    best.elimCount = least;
                    continue;
                }
                LOG.debug("No child " + function);
            }
        }
        return best;
    }

    public Tree<Canopy> getBlockingTree(Tree<Canopy> tree, Canopy parent, Canopy node, List<FieldDefinition> fieldsOfInterest) throws Exception {
        long size = node.getTrainingSize();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Size, maxSize " + size + ", " + this.maxSize);
        }
        if (size > this.maxSize && node.getDupeN() != null && node.getDupeN().size() > 0) {
            Canopy best = this.getBestNode(tree, parent, node, fieldsOfInterest);
            if (best != null) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(" HashFunction is " + best + " and node is " + node);
                }
                best.copyTo(node);
                if (tree == null && parent == null) {
                    tree = new Tree<Canopy>(node);
                }
                List<Canopy> canopies = node.getCanopies();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(" Children size is " + canopies.size());
                }
                for (Canopy n : canopies) {
                    node.clearBeforeSaving();
                    tree.addLeaf(node, n);
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(" Finding for " + n);
                    }
                    this.getBlockingTree(tree, node, n, fieldsOfInterest);
                }
            } else {
                node.clearBeforeSaving();
            }
        } else {
            if (node.getDupeN() == null || node.getDupeN().size() == 0) {
                LOG.warn("Ran out of training at size " + size + " for node " + node);
            } else {
                LOG.debug("Min size reached " + size + " for node " + node);
            }
            node.clearBeforeSaving();
        }
        return tree;
    }

    public boolean checkFunctionInNode(Canopy node, String name, HashFunction function) {
        return node.getFunction() != null && node.getFunction().equals(function) && node.context.fieldName.equals(name);
    }

    public boolean isFunctionUsed(Tree<Canopy> tree, Canopy node, String fieldName, HashFunction function) {
        Canopy head;
        boolean isUsed = false;
        if (node == null || tree == null) {
            return false;
        }
        if (this.checkFunctionInNode(node, fieldName, function)) {
            return true;
        }
        Tree<Canopy> nodeTree = tree.getTree(node);
        if (nodeTree == null) {
            return false;
        }
        Tree<Canopy> parent = nodeTree.getParent();
        if (parent != null && (head = parent.getHead()) != null) {
            return this.isFunctionUsed(tree, head, fieldName, function);
        }
        return isUsed;
    }

    public static StructType appendHashCol(StructType s) {
        StructType retSchema = SchemaUtils.appendColumn((StructType)s, (String)"z_hash", (DataType)DataTypes.IntegerType, (boolean)false);
        LOG.debug("returning schema after step 1 is " + retSchema);
        return retSchema;
    }

    public static List<Canopy> getHashSuccessors(Collection<Canopy> successors, Object hash) {
        ArrayList<Canopy> retCanopy = new ArrayList<Canopy>();
        for (Canopy c : successors) {
            if (hash == null && c != null && c.getHash() == null) {
                retCanopy.add(c);
            }
            if (c == null || c.getHash() == null || !c.getHash().equals(hash)) continue;
            retCanopy.add(c);
        }
        return retCanopy;
    }

    public static StringBuilder applyTree(Row tuple, Tree<Canopy> tree, Canopy root, StringBuilder result) {
        if (root.function != null) {
            Object hash = root.function.apply(tuple, root.context.fieldName);
            result = result.append("|").append(hash);
            for (Canopy c : tree.getSuccessors(root)) {
                if (c == null || c.getHash() == null || !c.getHash().equals(hash)) continue;
                Block.applyTree(tuple, tree, c, result);
            }
        }
        return result;
    }

    public static void printTree(Tree<Canopy> tree, Canopy root) {
        if (root.dupeN != null) {
            LOG.info(" dupeN not null " + root);
            LOG.info(root.dupeN.size());
        }
        if (root.dupeRemaining != null) {
            LOG.info(" dupeRemaining not null " + root);
            LOG.info(root.dupeRemaining.size());
        }
        if (root.training != null) {
            LOG.info(" training not null " + root);
            LOG.info(root.training.size());
        }
        for (Canopy c : tree.getSuccessors(root)) {
            Block.printTree(tree, c);
        }
    }

    public static class BlockFunction
    implements MapFunction<Row, Row> {
        Tree<Canopy> tree;

        public BlockFunction(Tree<Canopy> tree) {
            this.tree = tree;
        }

        public Row call(Row r) {
            StringBuilder bf = new StringBuilder();
            bf = Block.applyTree(r, this.tree, this.tree.getHead(), bf);
            Seq s = r.toSeq();
            List seqList = JavaConversions.seqAsJavaList((Seq)s);
            ArrayList<Integer> returnList = new ArrayList<Integer>(seqList.size() + 1);
            returnList.addAll(seqList);
            returnList.add(bf.toString().hashCode());
            if (LOG.isDebugEnabled()) {
                for (Object e : returnList) {
                    LOG.debug("return row col is " + e);
                }
                LOG.debug("returning row " + RowFactory.create((Object[])new Object[]{returnList}));
            }
            return RowFactory.create((Object[])returnList.toArray());
        }
    }
}

