/*
 * Decompiled with CFR 0.152.
 */
package zingg.util;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import scala.collection.Iterator;
import scala.collection.JavaConverters;
import zingg.client.Arguments;
import zingg.client.FieldDefinition;
import zingg.client.MatchType;
import zingg.client.ZinggClientException;
import zingg.client.pipe.Pipe;
import zingg.util.PipeUtil;

public class DSUtil {
    public static final Log LOG = LogFactory.getLog(DSUtil.class);

    public static final String[] getPrefixedColumns(String[] cols) {
        for (int i = 0; i < cols.length; ++i) {
            cols[i] = "z_" + cols[i];
        }
        return cols;
    }

    public static Dataset<Row> getPrefixedColumnsDS(Dataset<Row> lines) {
        return lines.toDF(DSUtil.getPrefixedColumns(lines.columns()));
    }

    public static Dataset<Row> join(Dataset<Row> lines, Dataset<Row> lines1, String joinColumn, boolean filter) {
        Dataset pairs = lines.join(lines1, lines.col(joinColumn).equalTo((Object)lines1.col("z_" + joinColumn)));
        if (LOG.isDebugEnabled()) {
            LOG.debug("pairs length " + pairs.count());
        }
        if (filter) {
            pairs = pairs.filter(pairs.col("z_zid").gt((Object)pairs.col("z_z_zid")));
        }
        return pairs;
    }

    public static Dataset<Row> joinZColFirst(Dataset<Row> lines, Dataset<Row> lines1, String joinColumn, boolean filter) {
        Dataset pairs = lines.join(lines1, lines.col("z_" + joinColumn).equalTo((Object)lines1.col(joinColumn)), "right");
        if (LOG.isDebugEnabled()) {
            LOG.debug("pairs length " + pairs.count());
        }
        if (filter) {
            pairs = pairs.filter(pairs.col("z_zid").gt((Object)pairs.col("z_z_zid")));
        }
        return pairs;
    }

    public static Dataset<Row> joinWithItself(Dataset<Row> lines, String joinColumn, boolean filter) throws Exception {
        Dataset<Row> lines1 = DSUtil.getPrefixedColumnsDS(lines);
        return DSUtil.join(lines, lines1, joinColumn, filter);
    }

    public static Dataset<Row> joinWithItselfSourceSensitive(Dataset<Row> lines, String joinColumn, Arguments args) throws Exception {
        Dataset lines1 = DSUtil.getPrefixedColumnsDS(lines).cache();
        String[] sourceNames = args.getPipeNames();
        lines = lines.filter(lines.col("z_source").equalTo((Object)sourceNames[0]));
        lines1 = lines1.filter(lines1.col("z_z_source").notEqual((Object)sourceNames[0]));
        return DSUtil.join((Dataset<Row>)lines, (Dataset<Row>)lines1, joinColumn, false);
    }

    public static Dataset<Row> alignLinked(Dataset<Row> dupesActual, Arguments args) {
        dupesActual = dupesActual.cache();
        ArrayList<Column> cols = new ArrayList<Column>();
        cols.add(dupesActual.col("z_cluster"));
        cols.add(dupesActual.col("z_zid"));
        cols.add(dupesActual.col("z_score"));
        for (FieldDefinition def : args.getFieldDefinition()) {
            cols.add(dupesActual.col(def.fieldName));
        }
        cols.add(dupesActual.col("z_source"));
        Dataset dupes1 = dupesActual.select(((Iterator)JavaConverters.asScalaIteratorConverter(cols.iterator()).asScala()).toSeq());
        dupes1 = dupes1.dropDuplicates("z_cluster", new String[]{"z_source"});
        ArrayList<Column> cols1 = new ArrayList<Column>();
        cols1.add(dupesActual.col("z_cluster"));
        cols1.add(dupesActual.col("z_z_zid"));
        cols1.add(dupesActual.col("z_score"));
        for (FieldDefinition def : args.getFieldDefinition()) {
            cols1.add(dupesActual.col("z_" + def.fieldName));
        }
        cols1.add(dupesActual.col("z_z_source"));
        Dataset dupes2 = dupesActual.select(((Iterator)JavaConverters.asScalaIteratorConverter(cols1.iterator()).asScala()).toSeq());
        dupes2 = dupes2.toDF(dupes1.columns()).cache();
        dupes1 = dupes1.union(dupes2);
        return dupes1;
    }

    public static Dataset<Row> alignDupes(Dataset<Row> dupesActual, Arguments args) {
        dupesActual = dupesActual.cache();
        ArrayList<Column> cols = new ArrayList<Column>();
        cols.add(dupesActual.col("z_cluster"));
        cols.add(dupesActual.col("z_zid"));
        cols.add(dupesActual.col("z_prediction"));
        cols.add(dupesActual.col("z_score"));
        for (FieldDefinition def : args.getFieldDefinition()) {
            cols.add(dupesActual.col(def.fieldName));
        }
        cols.add(dupesActual.col("z_source"));
        Dataset dupes1 = dupesActual.select(((Iterator)JavaConverters.asScalaIteratorConverter(cols.iterator()).asScala()).toSeq());
        ArrayList<Column> cols1 = new ArrayList<Column>();
        cols1.add(dupesActual.col("z_cluster"));
        cols1.add(dupesActual.col("z_z_zid"));
        cols1.add(dupesActual.col("z_prediction"));
        cols1.add(dupesActual.col("z_score"));
        for (FieldDefinition def : args.getFieldDefinition()) {
            cols1.add(dupesActual.col("z_" + def.fieldName));
        }
        cols1.add(dupesActual.col("z_z_source"));
        Dataset dupes2 = dupesActual.select(((Iterator)JavaConverters.asScalaIteratorConverter(cols1.iterator()).asScala()).toSeq());
        dupes2 = dupes2.toDF(dupes1.columns()).cache();
        dupes1 = dupes1.union(dupes2);
        dupes1 = dupes1.withColumn("z_isMatch", functions.lit((Object)-1));
        return dupes1;
    }

    public static Dataset<Row> allFieldsEqual(Dataset<Row> a, Arguments args) {
        for (FieldDefinition def : args.getFieldDefinition()) {
            if (def.getMatchType() == null || def.getMatchType().contains(MatchType.DONT_USE)) continue;
            String field = def.getFieldName();
            a = a.filter(a.col(field).equalTo((Object)a.col("z_" + field)));
        }
        LOG.info("All equals done");
        return a;
    }

    public static List<Column> getFieldDefColumns(Dataset<Row> ds, Arguments args, boolean includeZid, boolean showConcise) {
        ArrayList<Column> cols = new ArrayList<Column>();
        if (includeZid) {
            cols.add(ds.col("z_zid"));
        }
        for (FieldDefinition def : args.getFieldDefinition()) {
            if (showConcise && def.matchType.contains(MatchType.DONT_USE)) continue;
            cols.add(ds.col(def.fieldName));
        }
        cols.add(ds.col("z_source"));
        return cols;
    }

    public static Dataset<Row> getFieldDefColumnsDS(Dataset<Row> ds, Arguments args, boolean includeZid) {
        return DSUtil.select(ds, DSUtil.getFieldDefColumns(ds, args, includeZid, false));
    }

    public static Dataset<Row> select(Dataset<Row> ds, List<Column> cols) {
        return ds.select(((Iterator)JavaConverters.asScalaIteratorConverter(cols.iterator()).asScala()).toSeq());
    }

    public static Dataset<Row> dropDuplicates(Dataset<Row> a, Arguments args) {
        LOG.info("duplicates before " + a.count());
        ArrayList<String> cols = new ArrayList<String>();
        for (FieldDefinition def : args.getFieldDefinition()) {
            if (def.getMatchType() == null || def.getMatchType().contains(MatchType.DONT_USE)) continue;
            String field = def.getFieldName();
            cols.add(field);
        }
        a = a.dropDuplicates((String[])cols.stream().toArray(String[]::new));
        LOG.info("duplicates after " + a.count());
        return a;
    }

    public static Dataset<Row> getTraining(SparkSession spark, Arguments args) throws ZinggClientException {
        return DSUtil.getTraining(spark, args, PipeUtil.getTrainingDataMarkedPipe(args));
    }

    public static Dataset<Row> getTrainingJdbc(SparkSession spark, Arguments args) throws ZinggClientException {
        return DSUtil.getTraining(spark, args, args.getOutput()[0]);
    }

    private static Dataset<Row> getTraining(SparkSession spark, Arguments args, Pipe p) throws ZinggClientException {
        Dataset trFile = null;
        try {
            trFile = PipeUtil.read(spark, false, false, p);
            LOG.warn("Read marked training samples ");
            trFile = trFile.drop("z_prediction");
            trFile = trFile.drop("z_score");
        }
        catch (ZinggClientException e) {
            LOG.warn("No preexisting marked training samples");
        }
        if (args.getTrainingSamples() != null) {
            Dataset trSamples = PipeUtil.read(spark, true, false, args.getTrainingSamples());
            LOG.warn("Read all training samples ");
            trFile = trFile == null ? trSamples : trFile.unionByName(trSamples, true);
        } else {
            LOG.warn("No configured training samples");
        }
        if (trFile == null) {
            LOG.warn("No training data found");
        }
        return trFile;
    }

    public static List<FieldDefinition> getFieldDefinitionFiltered(Arguments args, MatchType type) {
        return args.getFieldDefinition().stream().filter(f -> f.getMatchType() != null && !f.getMatchType().contains(type)).collect(Collectors.toList());
    }

    public static Dataset<Row> postprocess(Dataset<Row> actual, Dataset<Row> orig) {
        ArrayList<Column> cols = new ArrayList<Column>();
        cols.add(actual.col("z_cluster"));
        cols.add(actual.col("z_zid"));
        cols.add(actual.col("z_prediction"));
        cols.add(actual.col("z_score"));
        cols.add(functions.col((String)"z_isMatch"));
        Dataset zFieldsFromActual = actual.select(((Iterator)JavaConverters.asScalaIteratorConverter(cols.iterator()).asScala()).toSeq());
        Dataset joined = zFieldsFromActual.join(orig, "z_zid");
        return joined;
    }

    public static Dataset<Row> postprocessLinked(Dataset<Row> actual, Dataset<Row> orig) {
        ArrayList<Column> cols = new ArrayList<Column>();
        cols.add(actual.col("z_cluster"));
        cols.add(actual.col("z_zid"));
        cols.add(actual.col("z_score"));
        cols.add(actual.col("z_source"));
        Dataset zFieldsFromActual = actual.select(((Iterator)JavaConverters.asScalaIteratorConverter(cols.iterator()).asScala()).toSeq());
        Dataset joined = zFieldsFromActual.join(orig, zFieldsFromActual.col("z_zid").equalTo((Object)orig.col("z_zid")).and(zFieldsFromActual.col("z_source").equalTo((Object)orig.col("z_source")))).drop(zFieldsFromActual.col("z_source")).drop(zFieldsFromActual.col("z_zid")).drop(orig.col("z_zid"));
        return joined;
    }
}

