/*
 * Decompiled with CFR 0.152.
 */
package zingg;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import zingg.Matcher;
import zingg.client.ZinggClientException;
import zingg.client.ZinggOptions;
import zingg.client.util.Util;
import zingg.util.DSUtil;
import zingg.util.PipeUtil;

public class Linker
extends Matcher {
    protected static String name = "zingg.Linker";
    public static final Log LOG = LogFactory.getLog(Linker.class);

    public Linker() {
        this.setZinggOptions(ZinggOptions.LINK);
    }

    @Override
    protected Dataset<Row> getBlocks(Dataset<Row> blocked, Dataset<Row> bAll) throws Exception {
        return DSUtil.joinWithItselfSourceSensitive(blocked, "z_hash", this.args).cache();
    }

    @Override
    protected Dataset<Row> selectColsFromBlocked(Dataset<Row> blocked) {
        return blocked;
    }

    @Override
    public void writeOutput(Dataset<Row> sampleOrginal, Dataset<Row> dupes) throws ZinggClientException {
        try {
            Dataset<Row> dupesActual = this.getDupesActualForGraph(dupes);
            if (this.args.getOutput() != null) {
                dupesActual = dupesActual.withColumn("z_cluster", dupesActual.col("z_zid"));
                dupesActual = Util.addUniqueCol(dupesActual, "z_cluster");
                Dataset<Row> dupes2 = DSUtil.alignLinked(dupesActual, this.args);
                dupes2 = DSUtil.postprocessLinked(dupes2, sampleOrginal);
                LOG.debug("uncertain output schema is " + dupes2.schema());
                PipeUtil.write(dupes2, this.args, this.ctx, this.args.getOutput());
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    protected Dataset<Row> getDupesActualForGraph(Dataset<Row> dupes) {
        Dataset dupesActual = dupes.filter(dupes.col("z_prediction").equalTo((Object)1.0));
        return dupesActual;
    }
}

