/*
 * Decompiled with CFR 0.152.
 */
package com.intel.analytics.bigdl.dllib.models.rnn;

import com.intel.analytics.bigdl.dllib.feature.dataset.DataSet$;
import com.intel.analytics.bigdl.dllib.feature.dataset.text.Dictionary;
import com.intel.analytics.bigdl.dllib.feature.dataset.text.Dictionary$;
import com.intel.analytics.bigdl.dllib.feature.dataset.text.SentenceBiPadding$;
import com.intel.analytics.bigdl.dllib.feature.dataset.text.SentenceSplitter;
import com.intel.analytics.bigdl.dllib.feature.dataset.text.SentenceSplitter$;
import com.intel.analytics.bigdl.dllib.feature.dataset.text.SentenceTokenizer;
import com.intel.analytics.bigdl.dllib.feature.dataset.text.SentenceTokenizer$;
import com.intel.analytics.bigdl.dllib.models.rnn.SequencePreprocess$;
import com.intel.analytics.bigdl.dllib.utils.Log4Error$;
import java.io.File;
import org.apache.spark.SparkContext;
import org.apache.spark.rdd.RDD;
import scala.Array$;
import scala.Function1;
import scala.Option;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.Tuple4;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayOps;
import scala.io.Codec$;
import scala.io.Source$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.ScalaRunTime$;

public final class SequencePreprocess$ {
    public static final SequencePreprocess$ MODULE$;

    static {
        new SequencePreprocess$();
    }

    public RDD<String[]> apply(String fileName, SparkContext sc, Option<String> sentBin, Option<String> tokenBin) {
        SentenceSplitter sentenceSplitter = SentenceSplitter$.MODULE$.apply(sentBin);
        SentenceTokenizer sentenceTokenizer = SentenceTokenizer$.MODULE$.apply(tokenBin);
        String[] lines = this.load(fileName);
        RDD qual$1 = ((RDD)DataSet$.MODULE$.array(lines, sc, ClassTag$.MODULE$.apply(String.class)).transform(sentenceSplitter, ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class))).toDistributed().data(false)).flatMap((Function1)new Serializable(){
            public static final long serialVersionUID = 0L;

            public final ArrayOps<String> apply(String[] x) {
                return Predef$.MODULE$.refArrayOps((Object[])x);
            }
        }, ClassTag$.MODULE$.apply(String.class));
        Serializable x$351 = new Serializable(){
            public static final long serialVersionUID = 0L;

            public final Iterator<String> apply(Iterator<String> x) {
                return SentenceBiPadding$.MODULE$.apply(SentenceBiPadding$.MODULE$.apply$default$1(), SentenceBiPadding$.MODULE$.apply$default$2()).apply(x);
            }
        };
        boolean x$352 = qual$1.mapPartitions$default$2();
        RDD qual$2 = qual$1.mapPartitions((Function1)x$351, x$352, ClassTag$.MODULE$.apply(String.class));
        Serializable x$353 = new Serializable(sentenceTokenizer){
            public static final long serialVersionUID = 0L;
            private final SentenceTokenizer sentenceTokenizer$1;

            public final Iterator<String[]> apply(Iterator<String> x) {
                return this.sentenceTokenizer$1.apply(x);
            }
            {
                this.sentenceTokenizer$1 = sentenceTokenizer$1;
            }
        };
        boolean x$354 = qual$2.mapPartitions$default$2();
        RDD tokens = qual$2.mapPartitions((Function1)x$353, x$354, ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(String.class)));
        return tokens;
    }

    public Tuple4<float[], float[], float[], Dictionary> apply(String fileDirect, int vocabSize) {
        String trainPath = new File(fileDirect, "ptb.train.txt").toString();
        String validPath = new File(fileDirect, "ptb.valid.txt").toString();
        String testPath = new File(fileDirect, "ptb.test.txt").toString();
        Dictionary dictionary = Dictionary$.MODULE$.apply((String[])this.readWords(trainPath).toArray(ClassTag$.MODULE$.apply(String.class)), vocabSize - 1);
        Iterator<Object> trainData = this.fileToWordIdx(trainPath, dictionary);
        Iterator<Object> validData = this.fileToWordIdx(validPath, dictionary);
        Iterator<Object> testData = this.fileToWordIdx(testPath, dictionary);
        return new Tuple4(trainData.toArray(ClassTag$.MODULE$.Float()), validData.toArray(ClassTag$.MODULE$.Float()), testData.toArray(ClassTag$.MODULE$.Float()), (Object)dictionary);
    }

    public float[][] reader(float[] rawData, int numSteps) {
        int length = rawData.length - 1 - numSteps;
        ArrayBuffer buffer = new ArrayBuffer();
        for (int offset = 0; offset <= length; offset += numSteps) {
            float[] slice = new float[numSteps + 1];
            Array$.MODULE$.copy((Object)rawData, offset, (Object)slice, 0, numSteps + 1);
            buffer.append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new float[][]{slice}));
        }
        return (float[][])buffer.toArray(ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Float.TYPE)));
    }

    public Iterator<Object> fileToWordIdx(String fileName, Dictionary dictionary) {
        Iterator<String> words = this.readWords(fileName);
        return words.map((Function1)new Serializable(dictionary){
            public static final long serialVersionUID = 0L;
            private final Dictionary dictionary$1;

            public final float apply(String x) {
                return (float)this.dictionary$1.getIndex(x) + 1.0f;
            }
            {
                this.dictionary$1 = dictionary$1;
            }
        });
    }

    public Iterator<String> readWords(String fileName) {
        ArrayBuffer buffer = new ArrayBuffer();
        Source$.MODULE$.fromFile(fileName, Codec$.MODULE$.fallbackSystemCodec()).getLines().foreach((Function1)new Serializable(buffer){
            public static final long serialVersionUID = 0L;
            public final ArrayBuffer buffer$1;

            public final void apply(String x) {
                Predef$.MODULE$.refArrayOps((Object[])x.split(" ")).foreach((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;
                    private final /* synthetic */ anonfun.30 $outer;

                    public final void apply(String t2) {
                        this.$outer.buffer$1.append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{t2}));
                    }
                    {
                        if ($outer == null) {
                            throw null;
                        }
                        this.$outer = $outer;
                    }
                });
                BoxedUnit words = BoxedUnit.UNIT;
                this.buffer$1.append((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"<eos>"}));
            }
            {
                this.buffer$1 = buffer$1;
            }
        });
        BoxedUnit readWords = BoxedUnit.UNIT;
        return buffer.toIterator();
    }

    /*
     * WARNING - void declaration
     */
    public String[] load(String fileName) {
        void var2_2;
        Log4Error$.MODULE$.invalidInputError(new File(fileName).exists(), new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"data file ", " not exists!"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{fileName})), Log4Error$.MODULE$.invalidInputError$default$3());
        String[] lines = (String[])Source$.MODULE$.fromFile(fileName, Codec$.MODULE$.fallbackSystemCodec()).getLines().toArray(ClassTag$.MODULE$.apply(String.class));
        return var2_2;
    }

    private SequencePreprocess$() {
        MODULE$ = this;
    }
}

