/*
 * Decompiled with CFR 0.152.
 */
package org.tugraz.sysds.runtime.instructions.spark.utils;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.tugraz.sysds.common.Types;
import org.tugraz.sysds.runtime.DMLRuntimeException;
import org.tugraz.sysds.runtime.controlprogram.caching.MatrixObject;
import org.tugraz.sysds.runtime.instructions.spark.data.FrameReblockBuffer;
import org.tugraz.sysds.runtime.instructions.spark.data.SerLongWritable;
import org.tugraz.sysds.runtime.instructions.spark.data.SerText;
import org.tugraz.sysds.runtime.instructions.spark.functions.ConvertFrameBlockToIJVLines;
import org.tugraz.sysds.runtime.instructions.spark.utils.FrameRDDAggregateUtils;
import org.tugraz.sysds.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.tugraz.sysds.runtime.instructions.spark.utils.RDDConverterUtils;
import org.tugraz.sysds.runtime.instructions.spark.utils.SparkUtils;
import org.tugraz.sysds.runtime.io.FileFormatPropertiesCSV;
import org.tugraz.sysds.runtime.io.IOUtilFunctions;
import org.tugraz.sysds.runtime.matrix.data.FrameBlock;
import org.tugraz.sysds.runtime.matrix.data.MatrixBlock;
import org.tugraz.sysds.runtime.matrix.data.MatrixIndexes;
import org.tugraz.sysds.runtime.matrix.data.Pair;
import org.tugraz.sysds.runtime.meta.DataCharacteristics;
import org.tugraz.sysds.runtime.meta.MatrixCharacteristics;
import org.tugraz.sysds.runtime.util.DataConverter;
import org.tugraz.sysds.runtime.util.FastStringTokenizer;
import org.tugraz.sysds.runtime.util.UtilFunctions;
import scala.Tuple2;

public class FrameRDDConverterUtils {
    private static final Log LOG = LogFactory.getLog((String)FrameRDDConverterUtils.class.getName());

    public static JavaPairRDD<Long, FrameBlock> csvToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, DataCharacteristics mc, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue) {
        if (!mc.dimsKnown()) {
            JavaRDD tmp = input.values().map((Function)new TextToStringFunction());
            String tmpStr = (String)tmp.first();
            boolean metaHeader = tmpStr.startsWith("#Meta\u00b7MV") || tmpStr.startsWith("#Meta\u00b7ND");
            tmpStr = metaHeader ? tmpStr.substring(tmpStr.indexOf(delim) + 1) : tmpStr;
            long rlen = tmp.count() - (long)(hasHeader ? 1 : 0) - (long)(metaHeader ? 2 : 0);
            long clen = IOUtilFunctions.splitCSV(tmpStr, delim).length;
            mc.set(rlen, clen, mc.getBlocksize(), -1L);
        }
        JavaPairRDD prepinput = input.values().zipWithIndex();
        if (schema == null || schema.length == 1) {
            schema = UtilFunctions.nCopies((int)mc.getCols(), Types.ValueType.STRING);
        }
        JavaPairRDD out = prepinput.mapPartitionsToPair((PairFlatMapFunction)new CSVToBinaryBlockFunction(mc, schema, hasHeader, delim));
        return out;
    }

    public static JavaPairRDD<Long, FrameBlock> csvToBinaryBlock(JavaSparkContext sc, JavaRDD<String> input, DataCharacteristics mcOut, Types.ValueType[] schema, boolean hasHeader, String delim, boolean fill, double fillValue) {
        JavaPairRDD prepinput = input.mapToPair((PairFunction)new StringToSerTextFunction());
        return FrameRDDConverterUtils.csvToBinaryBlock(sc, (JavaPairRDD<LongWritable, Text>)prepinput, mcOut, schema, hasHeader, delim, fill, fillValue);
    }

    public static JavaRDD<String> binaryBlockToCsv(JavaPairRDD<Long, FrameBlock> in, DataCharacteristics mcIn, FileFormatPropertiesCSV props, boolean strict) {
        JavaPairRDD input = in;
        if (strict && !FrameRDDConverterUtils.isSorted(input)) {
            input = input.sortByKey(true);
        }
        return input.flatMap((FlatMapFunction)new BinaryBlockToCSVFunction(props));
    }

    public static JavaPairRDD<Long, FrameBlock> textCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> in, DataCharacteristics mcOut, Types.ValueType[] schema) {
        JavaPairRDD input = in.mapToPair((PairFunction)new LongWritableTextToLongTextFunction());
        return FrameRDDConverterUtils.textCellToBinaryBlockLongIndex(sc, (JavaPairRDD<Long, Text>)input, mcOut, schema);
    }

    public static JavaPairRDD<Long, FrameBlock> textCellToBinaryBlockLongIndex(JavaSparkContext sc, JavaPairRDD<Long, Text> input, DataCharacteristics mc, Types.ValueType[] schema) {
        if (schema == null || schema.length == 1) {
            schema = UtilFunctions.nCopies((int)mc.getCols(), schema != null ? schema[0] : Types.ValueType.STRING);
        }
        JavaPairRDD output = input.values().mapPartitionsToPair((PairFlatMapFunction)new TextToBinaryBlockFunction(mc, schema));
        return FrameRDDAggregateUtils.mergeByKey((JavaPairRDD<Long, FrameBlock>)output);
    }

    public static JavaRDD<String> binaryBlockToTextCell(JavaPairRDD<Long, FrameBlock> input, DataCharacteristics mcIn) {
        return input.flatMap((FlatMapFunction)new ConvertFrameBlockToIJVLines());
    }

    public static JavaPairRDD<LongWritable, FrameBlock> matrixBlockToBinaryBlock(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixBlock> input, DataCharacteristics mcIn) {
        return FrameRDDConverterUtils.matrixBlockToBinaryBlockLongIndex(sc, input, mcIn).mapToPair((PairFunction)new LongFrameToLongWritableFrameFunction());
    }

    public static JavaPairRDD<Long, FrameBlock> matrixBlockToBinaryBlockLongIndex(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixBlock> input, DataCharacteristics dcIn) {
        JavaPairRDD<MatrixIndexes, MatrixBlock> in = input;
        MatrixCharacteristics mc = new MatrixCharacteristics(dcIn);
        if (dcIn.getCols() > (long)dcIn.getBlocksize()) {
            in = in.flatMapToPair((PairFlatMapFunction)new MatrixFrameReblockFunction(dcIn));
            mc.setBlocksize(MatrixFrameReblockFunction.computeBlockSize(mc));
            in = RDDAggregateUtils.mergeByKey(in, false);
        }
        return in.mapToPair((PairFunction)new MatrixToFrameBlockFunction(mc));
    }

    public static JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlockToMatrixBlock(JavaPairRDD<Long, FrameBlock> input, DataCharacteristics mcIn, DataCharacteristics mcOut) {
        JavaPairRDD out = input.flatMapToPair((PairFlatMapFunction)new BinaryBlockToMatrixBlockFunction(mcIn, mcOut));
        return RDDAggregateUtils.mergeByKey((JavaPairRDD<MatrixIndexes, MatrixBlock>)out, false);
    }

    public static JavaPairRDD<Long, FrameBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID) {
        return FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc, containsID, new Pair<String[], Types.ValueType[]>());
    }

    public static JavaPairRDD<Long, FrameBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, DataCharacteristics mc, boolean containsID, Pair<String[], Types.ValueType[]> out) {
        if (!mc.dimsKnown()) {
            int colVect = FrameRDDConverterUtils.getColVectFromDFSchema(df.schema(), containsID);
            int off = containsID ? 1 : 0;
            long rlen = df.count();
            long clen = df.columns().length - off + (colVect >= 0 ? ((Vector)((Row)df.first()).get(off + colVect)).size() - 1 : 0);
            mc.set(rlen, clen, mc.getBlocksize(), -1L);
        }
        JavaPairRDD prepinput = containsID ? df.javaRDD().mapToPair((PairFunction)new RDDConverterUtils.DataFrameExtractIDFunction(df.schema().fieldIndex("__INDEX"))) : df.javaRDD().zipWithIndex();
        String[] colnames = new String[(int)mc.getCols()];
        Types.ValueType[] fschema = new Types.ValueType[(int)mc.getCols()];
        int colVect = FrameRDDConverterUtils.convertDFSchemaToFrameSchema(df.schema(), colnames, fschema, containsID);
        out.set(colnames, fschema);
        return prepinput.mapPartitionsToPair((PairFlatMapFunction)new DataFrameToBinaryBlockFunction(mc, colnames, fschema, containsID, colVect));
    }

    public static Dataset<Row> binaryBlockToDataFrame(SparkSession sparkSession, JavaPairRDD<Long, FrameBlock> in, DataCharacteristics mc, Types.ValueType[] schema) {
        if (!mc.colsKnown()) {
            throw new RuntimeException("Number of columns needed to convert binary block to data frame.");
        }
        JavaRDD rowRDD = in.flatMap((FlatMapFunction)new BinaryBlockToDataFrameFunction());
        if (schema == null) {
            schema = UtilFunctions.nCopies((int)mc.getCols(), Types.ValueType.STRING);
        }
        StructType dfSchema = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(schema, true);
        return sparkSession.createDataFrame(rowRDD, dfSchema);
    }

    @Deprecated
    public static Dataset<Row> binaryBlockToDataFrame(SQLContext sqlContext, JavaPairRDD<Long, FrameBlock> in, DataCharacteristics mc, Types.ValueType[] schema) {
        return FrameRDDConverterUtils.binaryBlockToDataFrame(sqlContext.sparkSession(), in, mc, schema);
    }

    public static StructType convertFrameSchemaToDFSchema(Types.ValueType[] fschema, boolean containsID) {
        ArrayList<StructField> fields = new ArrayList<StructField>();
        if (containsID) {
            fields.add(DataTypes.createStructField((String)"__INDEX", (DataType)DataTypes.DoubleType, (boolean)true));
        }
        int col = 1;
        for (Types.ValueType schema : fschema) {
            DataType dt = null;
            switch (schema) {
                case STRING: {
                    dt = DataTypes.StringType;
                    break;
                }
                case FP64: {
                    dt = DataTypes.DoubleType;
                    break;
                }
                case INT64: {
                    dt = DataTypes.LongType;
                    break;
                }
                case BOOLEAN: {
                    dt = DataTypes.BooleanType;
                    break;
                }
                default: {
                    dt = DataTypes.StringType;
                    LOG.warn((Object)("Using default type String for " + schema.toString()));
                }
            }
            fields.add(DataTypes.createStructField((String)("C" + col++), (DataType)dt, (boolean)true));
        }
        return DataTypes.createStructType(fields);
    }

    public static int convertDFSchemaToFrameSchema(StructType dfschema, String[] colnames, Types.ValueType[] fschema, boolean containsID) {
        int off = containsID ? 1 : 0;
        boolean containsVect = false;
        int lenVect = fschema.length - (dfschema.fields().length - off) + 1;
        int colVect = -1;
        int pos = 0;
        for (int i = off; i < dfschema.fields().length; ++i) {
            StructField structType = dfschema.apply(i);
            colnames[pos] = structType.name();
            if (structType.dataType() == DataTypes.DoubleType || structType.dataType() == DataTypes.FloatType) {
                fschema[pos++] = Types.ValueType.FP64;
                continue;
            }
            if (structType.dataType() == DataTypes.LongType || structType.dataType() == DataTypes.IntegerType) {
                fschema[pos++] = Types.ValueType.INT64;
                continue;
            }
            if (structType.dataType() == DataTypes.BooleanType) {
                fschema[pos++] = Types.ValueType.BOOLEAN;
                continue;
            }
            if (structType.dataType() instanceof VectorUDT) {
                if (containsVect) {
                    throw new RuntimeException("Found invalid second vector column.");
                }
                String name = colnames[pos];
                colVect = pos;
                for (int j = 0; j < lenVect; ++j) {
                    colnames[pos] = name + "v" + j;
                    fschema[pos++] = Types.ValueType.FP64;
                }
                containsVect = true;
                continue;
            }
            fschema[pos++] = Types.ValueType.STRING;
        }
        return colVect;
    }

    private static int getColVectFromDFSchema(StructType dfschema, boolean containsID) {
        int off;
        for (int i = off = containsID ? 1 : 0; i < dfschema.fields().length; ++i) {
            StructField structType = dfschema.apply(i);
            if (!(structType.dataType() instanceof VectorUDT)) continue;
            return i - off;
        }
        return -1;
    }

    public static JavaRDD<Row> csvToRowRDD(JavaSparkContext sc, String fnameIn, String delim, Types.ValueType[] schema) {
        JavaRDD dataRdd = sc.textFile(fnameIn);
        return dataRdd.map((Function)new RowGenerator(schema, delim));
    }

    public static JavaRDD<Row> csvToRowRDD(JavaSparkContext sc, JavaRDD<String> dataRdd, String delim, Types.ValueType[] schema) {
        return dataRdd.map((Function)new RowGenerator(schema, delim));
    }

    private static boolean isSorted(JavaPairRDD<Long, FrameBlock> in) {
        List keys = in.keys().mapPartitions((FlatMapFunction)new SortingAnalysisFunction()).collect();
        long max = 0L;
        for (Long val : keys) {
            if (val < max) {
                return false;
            }
            max = val;
        }
        return true;
    }

    private static class BinaryBlockToMatrixBlockFunction
    implements PairFlatMapFunction<Tuple2<Long, FrameBlock>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = -2654986510471835933L;
        private DataCharacteristics _mcIn;
        private DataCharacteristics _mcOut;

        public BinaryBlockToMatrixBlockFunction(DataCharacteristics mcIn, DataCharacteristics mcOut) {
            this._mcIn = mcIn;
            this._mcOut = mcOut;
        }

        public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Tuple2<Long, FrameBlock> arg0) throws Exception {
            long rowIndex = (Long)arg0._1();
            FrameBlock blk = (FrameBlock)arg0._2();
            ArrayList<Tuple2> ret = new ArrayList<Tuple2>();
            long rlen = this._mcIn.getRows();
            long clen = this._mcIn.getCols();
            int blen = this._mcOut.getBlocksize();
            long rstartix = UtilFunctions.computeBlockIndex(rowIndex, blen);
            long rendix = UtilFunctions.computeBlockIndex(rowIndex + (long)blk.getNumRows() - 1L, blen);
            long cendix = UtilFunctions.computeBlockIndex(blk.getNumColumns(), blen);
            for (long rix = rstartix; rix <= rendix; ++rix) {
                long rpos = UtilFunctions.computeCellIndex(rix, blen, 0);
                int lrlen = UtilFunctions.computeBlockSize(rlen, rix, blen);
                int fix = (int)(rpos - rowIndex >= 0L ? rpos - rowIndex : 0L);
                int fix2 = (int)Math.min(rpos + (long)lrlen - rowIndex - 1L, (long)(blk.getNumRows() - 1));
                int mix = UtilFunctions.computeCellInBlock(rowIndex + (long)fix, blen);
                int mix2 = mix + (fix2 - fix);
                for (long cix = 1L; cix <= cendix; ++cix) {
                    long cpos = UtilFunctions.computeCellIndex(cix, blen, 0);
                    int lclen = UtilFunctions.computeBlockSize(clen, cix, blen);
                    MatrixBlock matrix = new MatrixBlock(lrlen, lclen, false);
                    FrameBlock frame = blk.slice(fix, fix2, (int)cpos - 1, (int)cpos + lclen - 2, new FrameBlock());
                    MatrixBlock mframe = DataConverter.convertToMatrixBlock(frame);
                    ret.add(new Tuple2((Object)new MatrixIndexes(rix, cix), (Object)matrix.leftIndexingOperations(mframe, mix, mix2, 0, lclen - 1, new MatrixBlock(), MatrixObject.UpdateType.INPLACE_PINNED)));
                }
            }
            return ret.iterator();
        }
    }

    private static class MatrixToFrameBlockFunction
    implements PairFunction<Tuple2<MatrixIndexes, MatrixBlock>, Long, FrameBlock> {
        private static final long serialVersionUID = 3716019666116660815L;
        private int _blen = -1;

        public MatrixToFrameBlockFunction(DataCharacteristics mc) {
            this._blen = mc.getBlocksize();
        }

        public Tuple2<Long, FrameBlock> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
            FrameBlock fb = DataConverter.convertToFrameBlock((MatrixBlock)arg0._2());
            return new Tuple2((Object)((((MatrixIndexes)arg0._1()).getRowIndex() - 1L) * (long)this._blen + 1L), (Object)fb);
        }
    }

    private static class MatrixFrameReblockFunction
    implements PairFlatMapFunction<Tuple2<MatrixIndexes, MatrixBlock>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = 6205071301074768437L;
        private int _blen = -1;
        private long _clen = -1L;
        private int _maxRowsPerBlock = -1;
        private boolean _sparse = false;

        public MatrixFrameReblockFunction(DataCharacteristics dc) {
            this._blen = dc.getBlocksize();
            this._blen = dc.getBlocksize();
            this._clen = dc.getCols();
            this._maxRowsPerBlock = MatrixFrameReblockFunction.computeBlockSize(dc);
            this._sparse = dc.dimsKnown() && MatrixBlock.evalSparseFormatInMemory(dc.getRows(), dc.getCols(), dc.getNonZeros() / (this._clen / (long)this._blen));
        }

        public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
            ArrayList<Tuple2> ret = new ArrayList<Tuple2>();
            MatrixIndexes ix = (MatrixIndexes)arg0._1();
            MatrixBlock mb = (MatrixBlock)arg0._2();
            MatrixBlock mbreuse = new MatrixBlock();
            boolean sparse = this._sparse || mb.isInSparseFormat();
            long rowix = (ix.getRowIndex() - 1L) * (long)this._blen + 1L;
            long cl = (int)((ix.getColumnIndex() - 1L) * (long)this._blen);
            long cu = Math.min(cl + (long)mb.getNumColumns() - 1L, this._clen);
            for (int i = 0; i < mb.getNumRows(); i += this._maxRowsPerBlock) {
                int ru = Math.min(i + this._maxRowsPerBlock, mb.getNumRows()) - 1;
                long rix = UtilFunctions.computeBlockIndex(rowix + (long)i, this._maxRowsPerBlock);
                MatrixIndexes ixout = new MatrixIndexes(rix, 1L);
                MatrixBlock out = new MatrixBlock(ru - i + 1, (int)this._clen, sparse);
                out.copy(0, out.getNumRows() - 1, (int)cl, (int)cu, mb.slice(i, ru, 0, mb.getNumColumns() - 1, mbreuse), true);
                out.examSparsity();
                ret.add(new Tuple2((Object)ixout, (Object)out));
            }
            return ret.iterator();
        }

        public static int computeBlockSize(DataCharacteristics dc) {
            int blen = dc.getBlocksize();
            int basic = Math.max((int)(1000000L / dc.getCols()), 1);
            int div = (int)Math.ceil((double)blen / (double)basic);
            while (blen % div != 0) {
                ++div;
            }
            return blen / div;
        }
    }

    private static class TextToBinaryBlockFunction
    extends CellToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Text>, Long, FrameBlock> {
        private static final long serialVersionUID = -2042208027876880588L;
        Types.ValueType[] _schema = null;

        protected TextToBinaryBlockFunction(DataCharacteristics mc, Types.ValueType[] schema) {
            super(mc);
            this._schema = schema;
        }

        public Iterator<Tuple2<Long, FrameBlock>> call(Iterator<Text> arg0) throws Exception {
            ArrayList<Tuple2<Long, FrameBlock>> ret = new ArrayList<Tuple2<Long, FrameBlock>>();
            FrameReblockBuffer rbuff = new FrameReblockBuffer(this._bufflen, this._rlen, this._clen, this._schema);
            FastStringTokenizer st = new FastStringTokenizer(' ');
            while (arg0.hasNext()) {
                String strVal = arg0.next().toString();
                if (strVal.startsWith("%")) continue;
                st.reset(strVal);
                long row = st.nextLong();
                long col = st.nextLong();
                Object val = UtilFunctions.stringToObject(this._schema[(int)col - 1], st.nextToken());
                if (rbuff.getSize() >= rbuff.getCapacity()) {
                    this.flushBufferToList(rbuff, ret);
                }
                rbuff.appendCell(row, col, val);
            }
            this.flushBufferToList(rbuff, ret);
            return ret.iterator();
        }
    }

    private static abstract class CellToBinaryBlockFunction
    implements Serializable {
        private static final long serialVersionUID = -729614449626680946L;
        protected int _bufflen = -1;
        protected long _rlen = -1L;
        protected long _clen = -1L;

        protected CellToBinaryBlockFunction(DataCharacteristics mc) {
            this._rlen = mc.getRows();
            this._clen = mc.getCols();
            this._bufflen = (int)Math.min(this._rlen * this._clen, 1000000L);
        }

        protected void flushBufferToList(FrameReblockBuffer rbuff, ArrayList<Tuple2<Long, FrameBlock>> ret) throws DMLRuntimeException {
            ArrayList<Pair<Long, FrameBlock>> rettmp = new ArrayList<Pair<Long, FrameBlock>>();
            rbuff.flushBufferToBinaryBlocks(rettmp);
            ret.addAll(SparkUtils.fromIndexedFrameBlock(rettmp));
        }
    }

    private static class BinaryBlockToDataFrameFunction
    implements FlatMapFunction<Tuple2<Long, FrameBlock>, Row> {
        private static final long serialVersionUID = 8093340778966667460L;

        private BinaryBlockToDataFrameFunction() {
        }

        public Iterator<Row> call(Tuple2<Long, FrameBlock> arg0) throws Exception {
            long rowIndex = (Long)arg0._1();
            FrameBlock blk = (FrameBlock)arg0._2();
            ArrayList<Row> ret = new ArrayList<Row>();
            int rows = blk.getNumRows();
            int cols = blk.getNumColumns();
            for (int i = 0; i < rows; ++i) {
                Object[] row = new Object[cols + 1];
                row[0] = (double)rowIndex++;
                for (int j = 0; j < cols; ++j) {
                    row[j + 1] = blk.get(i, j);
                }
                ret.add(RowFactory.create((Object[])row));
            }
            return ret.iterator();
        }
    }

    private static class DataFrameToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Tuple2<Row, Long>>, Long, FrameBlock> {
        private static final long serialVersionUID = 2269315691094111843L;
        private long _clen = -1L;
        private String[] _colnames = null;
        private Types.ValueType[] _schema = null;
        private boolean _containsID = false;
        private int _colVect = -1;
        private int _maxRowsPerBlock = -1;

        public DataFrameToBinaryBlockFunction(DataCharacteristics mc, String[] colnames, Types.ValueType[] schema, boolean containsID, int colVect) {
            this._clen = mc.getCols();
            this._colnames = colnames;
            this._schema = schema;
            this._containsID = containsID;
            this._colVect = colVect;
            this._maxRowsPerBlock = Math.max((int)(1000000L / this._clen), 1);
        }

        public Iterator<Tuple2<Long, FrameBlock>> call(Iterator<Tuple2<Row, Long>> arg0) throws Exception {
            ArrayList<Tuple2<Long, FrameBlock>> ret = new ArrayList<Tuple2<Long, FrameBlock>>();
            long ix = -1L;
            FrameBlock fb = null;
            Object[] tmprow = new Object[(int)this._clen];
            while (arg0.hasNext()) {
                int off;
                Tuple2<Row, Long> tmp = arg0.next();
                Row row = (Row)tmp._1();
                long rowix = (Long)tmp._2() + 1L;
                if (fb == null || fb.getNumRows() == this._maxRowsPerBlock) {
                    if (fb != null) {
                        DataFrameToBinaryBlockFunction.flushBlocksToList(ix, fb, ret);
                    }
                    ix = rowix;
                    fb = new FrameBlock(this._schema, this._colnames);
                }
                int pos = 0;
                for (int i = off = this._containsID ? 1 : 0; i < row.size(); ++i) {
                    if (i - off == this._colVect) {
                        Vector vect = (Vector)row.get(i);
                        for (int j = 0; j < vect.size(); ++j) {
                            tmprow[pos++] = vect.apply(j);
                        }
                        continue;
                    }
                    tmprow[pos] = UtilFunctions.objectToObject(this._schema[pos], row.get(i));
                    ++pos;
                }
                fb.appendRow(tmprow);
            }
            DataFrameToBinaryBlockFunction.flushBlocksToList(ix, fb, ret);
            return ret.iterator();
        }

        private static void flushBlocksToList(Long ix, FrameBlock fb, ArrayList<Tuple2<Long, FrameBlock>> ret) {
            if (fb != null && fb.getNumRows() >= 0) {
                ret.add((Tuple2<Long, FrameBlock>)new Tuple2((Object)ix, (Object)fb));
            }
        }
    }

    private static class BinaryBlockToCSVFunction
    implements FlatMapFunction<Tuple2<Long, FrameBlock>, String> {
        private static final long serialVersionUID = 8020608184930291069L;
        private FileFormatPropertiesCSV _props = null;

        public BinaryBlockToCSVFunction(FileFormatPropertiesCSV props) {
            this._props = props;
        }

        public Iterator<String> call(Tuple2<Long, FrameBlock> arg0) throws Exception {
            Long ix = (Long)arg0._1();
            FrameBlock blk = (FrameBlock)arg0._2();
            ArrayList<String> ret = new ArrayList<String>();
            StringBuilder sb = new StringBuilder();
            if (ix == 1L) {
                int j;
                if (this._props.hasHeader()) {
                    for (j = 1; j <= blk.getNumColumns(); ++j) {
                        sb.append(blk.getColumnNames()[j] + (j < blk.getNumColumns() - 1 ? this._props.getDelim() : ""));
                    }
                    ret.add(sb.toString());
                    sb.setLength(0);
                }
                if (!blk.isColumnMetadataDefault()) {
                    sb.append("#Meta\u00b7MV" + this._props.getDelim());
                    for (j = 0; j < blk.getNumColumns(); ++j) {
                        sb.append(blk.getColumnMetadata(j).getMvValue() + (j < blk.getNumColumns() - 1 ? this._props.getDelim() : ""));
                    }
                    ret.add(sb.toString());
                    sb.setLength(0);
                    sb.append("#Meta\u00b7ND" + this._props.getDelim());
                    for (j = 0; j < blk.getNumColumns(); ++j) {
                        sb.append(blk.getColumnMetadata(j).getNumDistinct() + (j < blk.getNumColumns() - 1 ? this._props.getDelim() : ""));
                    }
                    ret.add(sb.toString());
                    sb.setLength(0);
                }
            }
            Iterator<String[]> iter = blk.getStringRowIterator();
            while (iter.hasNext()) {
                String[] row = iter.next();
                for (int j = 0; j < row.length; ++j) {
                    if (j != 0) {
                        sb.append(this._props.getDelim());
                    }
                    if (row[j] == null) continue;
                    sb.append(row[j]);
                }
                ret.add(sb.toString());
                sb.setLength(0);
            }
            return ret.iterator();
        }
    }

    private static class CSVToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Tuple2<Text, Long>>, Long, FrameBlock> {
        private static final long serialVersionUID = -1976803898174960086L;
        private long _clen = -1L;
        private boolean _hasHeader = false;
        private String _delim = null;
        private int _maxRowsPerBlock = -1;
        private Types.ValueType[] _schema = null;
        private String[] _colnames = null;
        private List<String> _mvMeta = null;
        private List<String> _ndMeta = null;

        public CSVToBinaryBlockFunction(DataCharacteristics mc, Types.ValueType[] schema, boolean hasHeader, String delim) {
            this._clen = mc.getCols();
            this._schema = schema;
            this._hasHeader = hasHeader;
            this._delim = delim;
            this._maxRowsPerBlock = Math.max((int)(1000000L / this._clen), 1);
        }

        public Iterator<Tuple2<Long, FrameBlock>> call(Iterator<Tuple2<Text, Long>> arg0) throws Exception {
            ArrayList<Tuple2<Long, FrameBlock>> ret = new ArrayList<Tuple2<Long, FrameBlock>>();
            long ix = -1L;
            FrameBlock fb = null;
            String[] tmprow = new String[(int)this._clen];
            while (arg0.hasNext()) {
                Tuple2<Text, Long> tmp = arg0.next();
                String row = ((Text)tmp._1()).toString().trim();
                long rowix = (Long)tmp._2();
                if (this._hasHeader && rowix == 0L) {
                    this._colnames = row.split(this._delim);
                    continue;
                }
                if (row.startsWith("#Meta\u00b7MV")) {
                    this._mvMeta = Arrays.asList(Arrays.copyOfRange(IOUtilFunctions.splitCSV(row, this._delim), 1, (int)this._clen + 1));
                    continue;
                }
                if (row.startsWith("#Meta\u00b7ND")) {
                    this._ndMeta = Arrays.asList(Arrays.copyOfRange(IOUtilFunctions.splitCSV(row, this._delim), 1, (int)this._clen + 1));
                    continue;
                }
                rowix += (long)((this._hasHeader ? 0 : 1) - (this._mvMeta == null ? 0 : 2));
                if (fb == null || fb.getNumRows() == this._maxRowsPerBlock) {
                    if (fb != null) {
                        CSVToBinaryBlockFunction.flushBlocksToList(ix, fb, ret);
                    }
                    ix = rowix;
                    fb = this.createFrameBlock();
                }
                fb.appendRow(IOUtilFunctions.splitCSV(row, this._delim, tmprow));
            }
            CSVToBinaryBlockFunction.flushBlocksToList(ix, fb, ret);
            return ret.iterator();
        }

        private FrameBlock createFrameBlock() {
            int j;
            FrameBlock fb = new FrameBlock(this._schema);
            fb.ensureAllocatedColumns(this._maxRowsPerBlock);
            fb.reset(0, false);
            fb.setNumRows(0);
            if (this._colnames != null) {
                fb.setColumnNames(this._colnames);
            }
            if (this._mvMeta != null) {
                j = 0;
                while ((long)j < this._clen) {
                    fb.getColumnMetadata(j).setMvValue(this._mvMeta.get(j));
                    ++j;
                }
            }
            if (this._ndMeta != null) {
                j = 0;
                while ((long)j < this._clen) {
                    fb.getColumnMetadata(j).setNumDistinct(Long.parseLong(this._ndMeta.get(j)));
                    ++j;
                }
            }
            return fb;
        }

        private static void flushBlocksToList(Long ix, FrameBlock fb, ArrayList<Tuple2<Long, FrameBlock>> ret) {
            if (fb != null && fb.getNumRows() >= 0) {
                ret.add((Tuple2<Long, FrameBlock>)new Tuple2((Object)ix, (Object)fb));
            }
        }
    }

    private static class TextToStringFunction
    implements Function<Text, String> {
        private static final long serialVersionUID = -2744814934501782747L;

        private TextToStringFunction() {
        }

        public String call(Text v1) throws Exception {
            return v1.toString();
        }
    }

    public static class LongWritableFrameToLongFrameFunction
    implements PairFunction<Tuple2<LongWritable, FrameBlock>, Long, FrameBlock> {
        private static final long serialVersionUID = -1232439643533739078L;

        public Tuple2<Long, FrameBlock> call(Tuple2<LongWritable, FrameBlock> arg0) throws Exception {
            return new Tuple2((Object)((LongWritable)arg0._1).get(), arg0._2);
        }
    }

    public static class LongFrameToLongWritableFrameFunction
    implements PairFunction<Tuple2<Long, FrameBlock>, LongWritable, FrameBlock> {
        private static final long serialVersionUID = -1467314923206783333L;

        public Tuple2<LongWritable, FrameBlock> call(Tuple2<Long, FrameBlock> arg0) throws Exception {
            return new Tuple2((Object)new LongWritable(((Long)arg0._1).longValue()), arg0._2);
        }
    }

    public static class LongWritableTextToLongTextFunction
    implements PairFunction<Tuple2<LongWritable, Text>, Long, Text> {
        private static final long serialVersionUID = -5408386071466175348L;

        public Tuple2<Long, Text> call(Tuple2<LongWritable, Text> arg0) throws Exception {
            return new Tuple2((Object)new Long(((LongWritable)arg0._1).get()), arg0._2);
        }
    }

    public static class LongWritableToSerFunction
    implements PairFunction<Tuple2<LongWritable, FrameBlock>, LongWritable, FrameBlock> {
        private static final long serialVersionUID = 2286037080400222528L;

        public Tuple2<LongWritable, FrameBlock> call(Tuple2<LongWritable, FrameBlock> arg0) throws Exception {
            return new Tuple2((Object)new SerLongWritable(((LongWritable)arg0._1).get()), arg0._2);
        }
    }

    private static class StringToSerTextFunction
    implements PairFunction<String, LongWritable, Text> {
        private static final long serialVersionUID = 8683232211035837695L;

        private StringToSerTextFunction() {
        }

        public Tuple2<LongWritable, Text> call(String arg0) throws Exception {
            return new Tuple2((Object)new SerLongWritable(1L), (Object)new SerText(arg0));
        }
    }

    private static class SortingAnalysisFunction
    implements FlatMapFunction<Iterator<Long>, Long> {
        private static final long serialVersionUID = -5789003262381127469L;

        private SortingAnalysisFunction() {
        }

        public Iterator<Long> call(Iterator<Long> arg0) throws Exception {
            long max = 0L;
            while (max >= 0L && arg0.hasNext()) {
                long val = arg0.next();
                max = val < max ? -1L : val;
            }
            ArrayList<Long> ret = new ArrayList<Long>();
            ret.add(max);
            return ret.iterator();
        }
    }

    private static class RowGenerator
    implements Function<String, Row> {
        private static final long serialVersionUID = -6736256507697511070L;
        private Types.ValueType[] _schema = null;
        private String _delim = null;

        public RowGenerator(Types.ValueType[] schema, String delim) {
            this._schema = schema;
            this._delim = delim;
        }

        public Row call(String record) throws Exception {
            String[] fields = IOUtilFunctions.splitCSV(record, this._delim);
            Object[] objects = new Object[fields.length];
            for (int i = 0; i < fields.length; ++i) {
                objects[i] = UtilFunctions.stringToObject(this._schema[i], fields[i]);
            }
            return RowFactory.create((Object[])objects);
        }
    }
}

