/*
 * Decompiled with CFR 0.152.
 */
package no.priv.garshol.duke;

import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import no.priv.garshol.duke.Configuration;
import no.priv.garshol.duke.DataSource;
import no.priv.garshol.duke.Database;
import no.priv.garshol.duke.Duke;
import no.priv.garshol.duke.DukeException;
import no.priv.garshol.duke.DummyLogger;
import no.priv.garshol.duke.Logger;
import no.priv.garshol.duke.Property;
import no.priv.garshol.duke.Record;
import no.priv.garshol.duke.RecordIterator;
import no.priv.garshol.duke.matchers.AbstractMatchListener;
import no.priv.garshol.duke.matchers.MatchListener;
import no.priv.garshol.duke.matchers.PrintMatchListener;
import no.priv.garshol.duke.utils.DefaultRecordIterator;
import no.priv.garshol.duke.utils.Utils;

public class Processor {
    private Configuration config;
    private Collection<MatchListener> listeners;
    private Logger logger;
    private List<Property> proporder;
    private double[] accprob;
    private int threads;
    private Database database1;
    private Database database2;
    private static final int DEFAULT_BATCH_SIZE = 40000;
    private long comparisons;
    private long srcread;
    private long indexing;
    private long searching;
    private long comparing;
    private long callbacks;
    private Profiler profiler;

    public Processor(Configuration config) {
        this(config, true);
    }

    public Processor(Configuration config, boolean overwrite) {
        this(config, config.getDatabase(1, overwrite));
        this.database2 = config.getDatabase(2, overwrite);
    }

    public Processor(Configuration config, Database database) {
        this.config = config;
        this.database1 = database;
        this.listeners = new CopyOnWriteArrayList<MatchListener>();
        this.logger = new DummyLogger();
        this.threads = 1;
        this.proporder = new ArrayList<Property>();
        for (Property p : config.getProperties()) {
            if (p.isIdProperty()) continue;
            this.proporder.add(p);
        }
        Collections.sort(this.proporder, new PropertyComparator());
        double prob = 0.5;
        this.accprob = new double[this.proporder.size()];
        for (int ix = this.proporder.size() - 1; ix >= 0; --ix) {
            this.accprob[ix] = prob = Utils.computeBayes(prob, this.proporder.get(ix).getHighProbability());
        }
    }

    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    public void setThreads(int threads) {
        this.threads = threads;
    }

    public int getThreads() {
        return this.threads;
    }

    public void addMatchListener(MatchListener listener) {
        this.listeners.add(listener);
    }

    public boolean removeMatchListener(MatchListener listener) {
        if (listener != null) {
            return this.listeners.remove(listener);
        }
        return true;
    }

    public Collection<MatchListener> getListeners() {
        return this.listeners;
    }

    public Database getDatabase() {
        return this.database1;
    }

    public Database getDatabase(int group) {
        if (group == 1) {
            return this.database1;
        }
        if (group == 2) {
            return this.database2;
        }
        throw new DukeException("Unknown group " + group);
    }

    public void setPerformanceProfiling(boolean profile) {
        if (profile) {
            if (this.profiler != null) {
                return;
            }
            this.profiler = new Profiler();
            this.addMatchListener(this.profiler);
        } else {
            if (this.profiler == null) {
                return;
            }
            this.removeMatchListener(this.profiler);
            this.profiler = null;
        }
    }

    public Profiler getProfiler() {
        return this.profiler;
    }

    public void deduplicate() {
        this.deduplicate(this.config.getDataSources(), 40000);
    }

    public void deduplicate(int batch_size) {
        this.deduplicate(this.config.getDataSources(), batch_size);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void deduplicate(Collection<DataSource> sources, int batch_size) {
        int count = 0;
        this.startProcessing();
        for (DataSource source : sources) {
            source.setLogger(this.logger);
            RecordIterator it2 = source.getRecords();
            try {
                ArrayList<Record> batch = new ArrayList<Record>();
                long start = System.currentTimeMillis();
                while (it2.hasNext()) {
                    Record record = (Record)it2.next();
                    batch.add(record);
                    if (++count % batch_size != 0) continue;
                    this.srcread += System.currentTimeMillis() - start;
                    this.deduplicate(batch);
                    it2.batchProcessed();
                    batch = new ArrayList();
                    start = System.currentTimeMillis();
                }
                if (batch.isEmpty()) continue;
                this.deduplicate(batch);
                it2.batchProcessed();
            }
            finally {
                it2.close();
            }
        }
        this.endProcessing();
    }

    public void deduplicate(Collection<Record> records) {
        this.logger.info("Deduplicating batch of " + records.size() + " records");
        this.batchReady(records.size());
        long start = System.currentTimeMillis();
        for (Record record : records) {
            this.database1.index(record);
        }
        this.database1.commit();
        this.indexing += System.currentTimeMillis() - start;
        this.match(records, true);
        this.batchDone();
    }

    private void match(Collection<Record> records, boolean matchall) {
        if (this.threads == 1) {
            for (Record record : records) {
                this.match(1, record, matchall);
            }
        } else {
            this.threadedmatch(records, matchall);
        }
    }

    private void threadedmatch(Collection<Record> records, boolean matchall) {
        int ix;
        MatchThread[] threads = new MatchThread[this.threads];
        for (ix = 0; ix < threads.length; ++ix) {
            threads[ix] = new MatchThread(ix, records.size() / threads.length, matchall);
        }
        ix = 0;
        for (Record record : records) {
            threads[ix++ % threads.length].addRecord(record);
        }
        for (ix = 0; ix < threads.length; ++ix) {
            threads[ix].start();
        }
        try {
            for (ix = 0; ix < threads.length; ++ix) {
                threads[ix].join();
            }
        }
        catch (InterruptedException interruptedException) {
            // empty catch block
        }
    }

    public void link() {
        this.link(this.config.getDataSources(1), this.config.getDataSources(2), 40000);
    }

    public void link(Collection<DataSource> sources1, Collection<DataSource> sources2, int batch_size) {
        this.link(sources1, sources2, true, batch_size);
    }

    public void link(Collection<DataSource> sources1, Collection<DataSource> sources2, boolean matchall, int batch_size) {
        this.startProcessing();
        for (Collection<Record> batch : this.makeBatches(sources1, batch_size)) {
            this.index(1, batch);
            if (!this.hasTwoDatabases()) continue;
            this.linkBatch(2, batch, matchall);
        }
        for (Collection<Record> batch : this.makeBatches(sources2, batch_size)) {
            if (this.hasTwoDatabases()) {
                this.index(2, batch);
            }
            this.linkBatch(1, batch, matchall);
        }
        this.endProcessing();
    }

    public void linkRecords(Collection<DataSource> sources) {
        this.linkRecords(sources, true);
    }

    public void linkRecords(Collection<DataSource> sources, boolean matchall) {
        this.linkRecords(sources, matchall, 40000);
    }

    public void linkRecords(Collection<DataSource> sources, boolean matchall, int batch_size) {
        this.linkRecords(1, sources, matchall, batch_size);
    }

    public void linkRecords(int dbno, Collection<DataSource> sources, boolean matchall, int batch_size) {
        for (DataSource source : sources) {
            source.setLogger(this.logger);
            ArrayList<Record> batch = new ArrayList<Record>(batch_size);
            RecordIterator it = source.getRecords();
            while (it.hasNext()) {
                batch.add((Record)it.next());
                if (batch.size() != batch_size) continue;
                this.linkBatch(dbno, batch, matchall);
                batch.clear();
            }
            it.close();
            if (batch.isEmpty()) continue;
            this.linkBatch(dbno, batch, matchall);
        }
        this.endProcessing();
    }

    private void linkBatch(int dbno, Collection<Record> batch, boolean matchall) {
        this.batchReady(batch.size());
        for (Record r : batch) {
            this.match(dbno, r, matchall);
        }
        this.batchDone();
    }

    public void index(Collection<DataSource> sources, int batch_size) {
        this.index(1, sources, batch_size);
    }

    public void index(int dbno, Collection<DataSource> sources, int batch_size) {
        Database thedb = this.getDB(dbno);
        int count = 0;
        for (DataSource source : sources) {
            source.setLogger(this.logger);
            RecordIterator it2 = source.getRecords();
            while (it2.hasNext()) {
                Record record = (Record)it2.next();
                if (this.logger.isDebugEnabled()) {
                    this.logger.debug("Indexing record " + record);
                }
                thedb.index(record);
                if (++count % batch_size != 0) continue;
                this.batchReady(batch_size);
            }
            it2.close();
        }
        if (count % batch_size == 0) {
            this.batchReady(count % batch_size);
        }
        thedb.commit();
    }

    public void index(int dbno, Collection<Record> batch) {
        Database thedb = this.getDB(dbno);
        for (Record r : batch) {
            if (this.logger.isDebugEnabled()) {
                this.logger.debug("Indexing record " + r);
            }
            thedb.index(r);
        }
        thedb.commit();
    }

    public long getComparisonCount() {
        return this.comparisons;
    }

    private void match(int dbno, Record record, boolean matchall) {
        long start = System.currentTimeMillis();
        Collection<Record> candidates = this.getDB(dbno).findCandidateMatches(record);
        this.searching += System.currentTimeMillis() - start;
        if (this.logger.isDebugEnabled()) {
            this.logger.debug("Matching record " + PrintMatchListener.toString(record, this.config.getProperties()) + " found " + candidates.size() + " candidates");
        }
        start = System.currentTimeMillis();
        if (matchall) {
            this.compareCandidatesSimple(record, candidates);
        } else {
            this.compareCandidatesBest(record, candidates);
        }
        this.comparing += System.currentTimeMillis() - start;
    }

    protected void compareCandidatesSimple(Record record, Collection<Record> candidates) {
        boolean found = false;
        for (Record candidate : candidates) {
            if (this.isSameAs(record, candidate)) continue;
            double prob = this.compare(record, candidate);
            if (prob > this.config.getThreshold()) {
                found = true;
                this.registerMatch(record, candidate, prob);
                continue;
            }
            if (this.config.getMaybeThreshold() == 0.0 || !(prob > this.config.getMaybeThreshold())) continue;
            found = true;
            this.registerMatchPerhaps(record, candidate, prob);
        }
        if (!found) {
            this.registerNoMatchFor(record);
        }
    }

    protected void compareCandidatesBest(Record record, Collection<Record> candidates) {
        double max = 0.0;
        Record best = null;
        for (Record candidate : candidates) {
            double prob;
            if (this.isSameAs(record, candidate) || !((prob = this.compare(record, candidate)) > max)) continue;
            max = prob;
            best = candidate;
        }
        if (this.logger.isDebugEnabled()) {
            this.logger.debug("Best candidate at " + max + " is " + best);
        }
        if (max > this.config.getThreshold()) {
            this.registerMatch(record, best, max);
        } else if (this.config.getMaybeThreshold() != 0.0 && max > this.config.getMaybeThreshold()) {
            this.registerMatchPerhaps(record, best, max);
        } else {
            this.registerNoMatchFor(record);
        }
    }

    public double compare(Record r1, Record r2) {
        ++this.comparisons;
        double prob = 0.5;
        for (String propname : r1.getProperties()) {
            Property prop = this.config.getPropertyByName(propname);
            if (prop == null || prop.isIdProperty() || prop.isIgnoreProperty()) continue;
            Collection<String> vs1 = r1.getValues(propname);
            Collection<String> vs2 = r2.getValues(propname);
            if (vs1 == null || vs1.isEmpty() || vs2 == null || vs2.isEmpty()) continue;
            double high = 0.0;
            for (String v1 : vs1) {
                if (v1.equals("")) continue;
                for (String v2 : vs2) {
                    if (v2.equals("")) continue;
                    try {
                        double p = prop.compare(v1, v2);
                        high = Math.max(high, p);
                    }
                    catch (Exception e) {
                        throw new DukeException("Comparison of values '" + v1 + "' and '" + v2 + "' with " + prop.getComparator() + " failed", e);
                    }
                }
            }
            prob = Utils.computeBayes(prob, high);
        }
        return prob;
    }

    public void close() {
        this.database1.close();
        if (this.hasTwoDatabases()) {
            this.database2.close();
        }
    }

    private Iterable<Collection<Record>> makeBatches(Collection<DataSource> sources, int batch_size) {
        return new BatchIterator(sources, batch_size);
    }

    public boolean hasTwoDatabases() {
        return this.database2 != null;
    }

    private Database getDB(int no) {
        if (no == 1) {
            return this.database1;
        }
        if (no == 2) {
            return this.database2;
        }
        throw new DukeException("Unknown database " + no);
    }

    private boolean isSameAs(Record r1, Record r2) {
        for (Property idp : this.config.getIdentityProperties()) {
            Collection<String> vs2 = r2.getValues(idp.getName());
            Collection<String> vs1 = r1.getValues(idp.getName());
            if (vs1 == null) continue;
            for (String v1 : vs1) {
                if (!vs2.contains(v1)) continue;
                return true;
            }
        }
        return false;
    }

    private void startProcessing() {
        if (this.logger.isDebugEnabled()) {
            this.logger.debug("Start processing with " + this.database1 + " and " + this.database2);
        }
        long start = System.currentTimeMillis();
        for (MatchListener listener : this.listeners) {
            listener.startProcessing();
        }
        this.callbacks += System.currentTimeMillis() - start;
    }

    private void endProcessing() {
        long start = System.currentTimeMillis();
        for (MatchListener listener : this.listeners) {
            listener.endProcessing();
        }
        this.callbacks += System.currentTimeMillis() - start;
    }

    private void batchReady(int size) {
        long start = System.currentTimeMillis();
        for (MatchListener listener : this.listeners) {
            listener.batchReady(size);
        }
        this.callbacks += System.currentTimeMillis() - start;
    }

    private void batchDone() {
        long start = System.currentTimeMillis();
        for (MatchListener listener : this.listeners) {
            listener.batchDone();
        }
        this.callbacks += System.currentTimeMillis() - start;
    }

    private void registerMatch(Record r1, Record r2, double confidence) {
        long start = System.currentTimeMillis();
        for (MatchListener listener : this.listeners) {
            listener.matches(r1, r2, confidence);
        }
        this.callbacks += System.currentTimeMillis() - start;
    }

    private void registerMatchPerhaps(Record r1, Record r2, double confidence) {
        long start = System.currentTimeMillis();
        for (MatchListener listener : this.listeners) {
            listener.matchesPerhaps(r1, r2, confidence);
        }
        this.callbacks += System.currentTimeMillis() - start;
    }

    private void registerNoMatchFor(Record current) {
        long start = System.currentTimeMillis();
        for (MatchListener listener : this.listeners) {
            listener.noMatchFor(current);
        }
        this.callbacks += System.currentTimeMillis() - start;
    }

    public class Profiler
    extends AbstractMatchListener {
        private long processing_start;
        private long batch_start;
        private int batch_size;
        private int records;
        private PrintWriter out = new PrintWriter(System.out);

        public void setOutput(Writer outw) {
            this.out = new PrintWriter(outw);
        }

        @Override
        public void startProcessing() {
            this.processing_start = System.currentTimeMillis();
            System.out.println("Duke version " + Duke.getVersionString());
            System.out.println(Processor.this.getDatabase());
            if (Processor.this.hasTwoDatabases()) {
                System.out.println(Processor.this.database2);
            }
            System.out.println("Threads: " + Processor.this.getThreads());
        }

        @Override
        public void batchReady(int size) {
            this.batch_start = System.currentTimeMillis();
            this.batch_size = size;
        }

        @Override
        public void batchDone() {
            this.records += this.batch_size;
            int rs = (int)(1000.0 * (double)this.batch_size / (double)(System.currentTimeMillis() - this.batch_start));
            System.out.println("" + this.records + " processed, " + rs + " records/second; comparisons: " + Processor.this.getComparisonCount());
        }

        @Override
        public void endProcessing() {
            long end = System.currentTimeMillis();
            double rs = 1000.0 * (double)this.records / (double)(end - this.processing_start);
            System.out.println("Run completed, " + (int)rs + " records/second");
            System.out.println("" + this.records + " records total in " + (end - this.processing_start) / 1000L + " seconds");
            long total = Processor.this.srcread + Processor.this.indexing + Processor.this.searching + Processor.this.comparing + Processor.this.callbacks;
            System.out.println("Reading from source: " + this.seconds(Processor.this.srcread) + " (" + this.percent(Processor.this.srcread, total) + "%)");
            System.out.println("Indexing: " + this.seconds(Processor.this.indexing) + " (" + this.percent(Processor.this.indexing, total) + "%)");
            System.out.println("Searching: " + this.seconds(Processor.this.searching) + " (" + this.percent(Processor.this.searching, total) + "%)");
            System.out.println("Comparing: " + this.seconds(Processor.this.comparing) + " (" + this.percent(Processor.this.comparing, total) + "%)");
            System.out.println("Callbacks: " + this.seconds(Processor.this.callbacks) + " (" + this.percent(Processor.this.callbacks, total) + "%)");
            System.out.println();
            Runtime r = Runtime.getRuntime();
            System.out.println("Total memory: " + r.totalMemory() + ", free memory: " + r.freeMemory() + ", used memory: " + (r.totalMemory() - r.freeMemory()));
        }

        private String seconds(long ms) {
            return "" + (int)(ms / 1000L);
        }

        private String percent(long ms, long total) {
            return "" + (int)((double)(ms * 100L) / (double)total);
        }
    }

    class MatchThread
    extends Thread {
        private Collection<Record> records;
        private boolean matchall;

        public MatchThread(int threadno, int recordcount, boolean matchall) {
            super("MatchThread " + threadno);
            this.records = new ArrayList<Record>(recordcount);
            this.matchall = matchall;
        }

        @Override
        public void run() {
            for (Record record : this.records) {
                Processor.this.match(1, record, this.matchall);
            }
        }

        public void addRecord(Record record) {
            this.records.add(record);
        }
    }

    static class PropertyComparator
    implements Comparator<Property> {
        PropertyComparator() {
        }

        @Override
        public int compare(Property p1, Property p2) {
            double diff = p1.getLowProbability() - p2.getLowProbability();
            if (diff < 0.0) {
                return -1;
            }
            if (diff > 0.0) {
                return 1;
            }
            return 0;
        }
    }

    static class BasicIterator
    implements Iterator<Record> {
        private Iterator<DataSource> srcit;
        private RecordIterator recit;

        public BasicIterator(Collection<DataSource> sources) {
            this.srcit = sources.iterator();
            this.findNextIterator();
        }

        @Override
        public boolean hasNext() {
            return this.recit.hasNext();
        }

        @Override
        public Record next() {
            Record r = (Record)this.recit.next();
            if (!this.recit.hasNext()) {
                this.findNextIterator();
            }
            return r;
        }

        private void findNextIterator() {
            if (this.srcit.hasNext()) {
                DataSource src = this.srcit.next();
                this.recit = src.getRecords();
            } else {
                this.recit = new DefaultRecordIterator(Collections.EMPTY_SET.iterator());
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    static class BatchIterator
    implements Iterable<Collection<Record>>,
    Iterator<Collection<Record>> {
        private BasicIterator it;
        private int batch_size;

        public BatchIterator(Collection<DataSource> sources, int batch_size) {
            this.it = new BasicIterator(sources);
            this.batch_size = batch_size;
        }

        @Override
        public boolean hasNext() {
            return this.it.hasNext();
        }

        @Override
        public Collection<Record> next() {
            ArrayList<Record> batch = new ArrayList<Record>();
            while (this.it.hasNext()) {
                batch.add(this.it.next());
            }
            return batch;
        }

        @Override
        public Iterator<Collection<Record>> iterator() {
            return this;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }
}

