/*
 * Decompiled with CFR 0.152.
 */
package zingg.block;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.Row;
import zingg.client.FieldDefinition;
import zingg.client.util.ListMap;
import zingg.hash.HashFunction;

public class Canopy
implements Serializable {
    public static final Log LOG = LogFactory.getLog(Canopy.class);
    HashFunction function;
    FieldDefinition context;
    List<Row> dupeN;
    long elimCount;
    Object hash;
    List<Row> training;
    List<Row> dupeRemaining;

    public Canopy() {
    }

    public Canopy(List<Row> training, List<Row> dupeN) {
        this.training = training;
        this.dupeN = dupeN;
    }

    public Canopy(List<Row> training, List<Row> dupeN, HashFunction function, FieldDefinition context) {
        this(training, dupeN);
        this.function = function;
        this.context = context;
    }

    public HashFunction getFunction() {
        return this.function;
    }

    public void setFunction(HashFunction function) {
        this.function = function;
    }

    public FieldDefinition getContext() {
        return this.context;
    }

    public void setContext(FieldDefinition context) {
        this.context = context;
    }

    public List<Row> getDupeN() {
        return this.dupeN;
    }

    public void setDupeN(List<Row> dupeN) {
        this.dupeN = dupeN;
    }

    public long getElimCount() {
        return this.elimCount;
    }

    public void setElimCount(long elimCount) {
        this.elimCount = elimCount;
    }

    public Object getHash() {
        return this.hash;
    }

    public void setHash(Object hash) {
        this.hash = hash;
    }

    public List<Row> getTraining() {
        return this.training;
    }

    public void setTraining(List<Row> training) {
        this.training = training;
    }

    public List<Canopy> getCanopies() {
        ListMap<Object, Row> hashes = new ListMap<Object, Row>();
        ArrayList<Canopy> returnCanopies = new ArrayList<Canopy>();
        for (Row r : this.training) {
            hashes.add(this.function.apply(r, this.context.fieldName), r);
        }
        for (Object o : hashes.keySet()) {
            Canopy can = new Canopy((List)hashes.get(o), this.dupeRemaining);
            can.hash = o;
            returnCanopies.add(can);
        }
        hashes = null;
        return returnCanopies;
    }

    public long estimateCanopies() {
        HashSet<Object> hashes = new HashSet<Object>();
        for (Row r : this.training) {
            hashes.add(this.function.apply(r, this.context.fieldName));
        }
        long uniqueHashes = hashes.size();
        LOG.debug("estimateCanopies- unique hash count is " + uniqueHashes);
        return uniqueHashes;
    }

    public long getTrainingSize() {
        return this.training.size();
    }

    public String toString() {
        String str = "";
        str = this.context != null ? "Canopy [function=" + this.function + ", context=" + this.context.fieldName + ", elimCount=" + this.elimCount + ", hash=" + this.hash : "Canopy [function=" + this.function + ", context=" + this.context + ", elimCount=" + this.elimCount + ", hash=" + this.hash;
        if (this.training != null) {
            str = str + ", training=" + this.training.size();
        }
        str = str + "]";
        return str;
    }

    public void estimateElimCount() {
        LOG.debug("Applying " + this.function.getName());
        this.dupeRemaining = new ArrayList<Row>();
        for (Row r : this.dupeN) {
            Object hash1 = this.function.apply(r, this.context.fieldName);
            Object hash2 = this.function.apply(r, "z_" + this.context.fieldName);
            LOG.debug("hash1 " + hash1);
            LOG.debug("hash2 " + hash2);
            if (hash1 == null && hash2 == null) {
                this.dupeRemaining.add(r);
                continue;
            }
            if (hash1 != null && hash2 != null && hash1.equals(hash2)) {
                this.dupeRemaining.add(r);
                LOG.debug("NOT eliminatin ");
                continue;
            }
            LOG.debug("eliminatin " + r);
        }
        this.elimCount = this.dupeN.size() - this.dupeRemaining.size();
    }

    public Canopy copyTo(Canopy copyTo) {
        copyTo.function = this.function;
        copyTo.context = this.context;
        copyTo.dupeN = this.dupeN;
        copyTo.elimCount = this.elimCount;
        copyTo.hash = this.hash;
        copyTo.training = this.training;
        copyTo.dupeRemaining = this.dupeRemaining;
        return copyTo;
    }

    public void clearBeforeSaving() {
        this.training = null;
        this.dupeN = null;
        this.dupeRemaining = null;
    }
}

