/*
 * Decompiled with CFR 0.152.
 */
package org.tugraz.sysds.runtime.matrix.data;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.Callable;
import org.tugraz.sysds.hops.OptimizerUtils;
import org.tugraz.sysds.runtime.codegen.LibSpoofPrimitives;
import org.tugraz.sysds.runtime.data.SparseBlock;
import org.tugraz.sysds.runtime.matrix.data.DnnParameters;
import org.tugraz.sysds.runtime.matrix.data.LibMatrixDNN;
import org.tugraz.sysds.runtime.matrix.data.LibMatrixDNNHelper;
import org.tugraz.sysds.runtime.matrix.data.MatrixBlock;

public class LibMatrixDNNPooling {
    public static ArrayList<Callable<Long>> getPoolingWorkers(DnnParameters params, LibMatrixDNN.PoolingType poolType) {
        ArrayList<Callable<Long>> ret = new ArrayList<Callable<Long>>();
        int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
        int taskSize = (int)Math.ceil((double)params.N / (double)k / 2.0);
        int i = 0;
        while (i * taskSize < params.N) {
            if (params.input1.isInSparseFormat()) {
                ret.add(new SparsePooling(i * taskSize, Math.min((i + 1) * taskSize, params.N), params, poolType));
            } else {
                ret.add(new DensePooling(i * taskSize, Math.min((i + 1) * taskSize, params.N), params, poolType));
            }
            ++i;
        }
        return ret;
    }

    public static ArrayList<Callable<Long>> getPoolingBackwardWorkers(DnnParameters params, boolean performReluBackward, LibMatrixDNN.PoolingType poolType) {
        ArrayList<Callable<Long>> ret = new ArrayList<Callable<Long>>();
        int k = OptimizerUtils.getConstrainedNumThreads(params.numThreads);
        int taskSize = (int)Math.ceil((double)params.N / (double)k / 2.0);
        if (poolType == LibMatrixDNN.PoolingType.MAX) {
            boolean sparse1 = params.input1.isInSparseFormat();
            boolean sparse2 = params.input2.isInSparseFormat();
            int i = 0;
            while (i * taskSize < params.N) {
                if (!sparse1 && !sparse2) {
                    ret.add(new PoolingBackwardDenseDense(i * taskSize, Math.min((i + 1) * taskSize, params.N), params, performReluBackward));
                } else if (!sparse1 && sparse2) {
                    ret.add(new PoolingBackwardDenseSparse(i * taskSize, Math.min((i + 1) * taskSize, params.N), params, performReluBackward));
                } else if (sparse1 && !sparse2) {
                    ret.add(new PoolingBackwardSparseDense(i * taskSize, Math.min((i + 1) * taskSize, params.N), params, performReluBackward));
                } else if (sparse1 && sparse2) {
                    ret.add(new PoolingBackwardSparseSparse(i * taskSize, Math.min((i + 1) * taskSize, params.N), params, performReluBackward));
                }
                ++i;
            }
        } else {
            boolean sparse = params.input2.isInSparseFormat();
            int i = 0;
            while (i * taskSize < params.N) {
                if (!sparse) {
                    ret.add(new AvgPoolingBackwardDense(i * taskSize, Math.min((i + 1) * taskSize, params.N), params));
                } else {
                    ret.add(new AvgPoolingBackwardSparse(i * taskSize, Math.min((i + 1) * taskSize, params.N), params));
                }
                ++i;
            }
        }
        return ret;
    }

    public static void poolingDenseStride1Pad0(LibMatrixDNN.PoolingType pType, double minVal, double pFact, double[] in, double[] out, int rl, int ru, int ii, int oi, int C, int P, int Q, int R, int S, int H, int W) {
        boolean max = pType == LibMatrixDNN.PoolingType.MAX;
        int CHW = C * H * W;
        if (P == 1 && Q == 1 && W == 1) {
            int lenh = Math.min(R, H);
            int i = rl;
            while (i < ru) {
                int c = 0;
                int off = ii + (i - rl) * CHW;
                while (c < C) {
                    out[oi + c] = max ? LibMatrixDNNPooling.max(minVal, in, off, lenh) : LibMatrixDNNPooling.avg(minVal, in, off, lenh, pFact);
                    ++c;
                    off += H;
                }
                ++i;
                oi += C;
            }
        } else {
            int CPQ = C * P * Q;
            int HW = H * W;
            Arrays.fill(out, rl * CPQ, ru * CPQ, minVal);
            for (int i = rl; i < ru; ++i) {
                int c = 0;
                int off = ii + (i - rl) * CHW;
                int oix = oi + (i - rl) * CPQ;
                while (c < C) {
                    int p = 0;
                    while (p < P) {
                        for (int h = p; h < Math.min(p + R, H); ++h) {
                            int off2 = off + h * W;
                            for (int q = 0; q < Q; ++q) {
                                out[oix + q] = max ? LibMatrixDNNPooling.max(out[oix + q], in, off2 + q, Math.min(S, W - q)) : LibMatrixDNNPooling.avg(out[oix + q], in, off2 + q, Math.min(S, W - q), pFact);
                            }
                        }
                        ++p;
                        oix += Q;
                    }
                    ++c;
                    off += HW;
                }
            }
        }
    }

    private static double avg(double aval, double[] b, int bi, int len, double poolingMultiplier) {
        return LibSpoofPrimitives.vectSum(b, bi, len) * poolingMultiplier + aval;
    }

    private static double max(double aval, double[] b, int bi, int len) {
        double ret = aval;
        for (int i = bi; i < bi + len; ++i) {
            ret = Math.max(ret, b[i]);
        }
        return ret;
    }

    private static int getMaxIndex(int p, int q, int inputOffset, double[] inputArray, DnnParameters params, boolean performReluBackward) {
        int start_index_h = params.start_indexes_h[p];
        int end_index_h = params.end_indexes_h[p];
        int start_index_w = params.start_indexes_w[q];
        int end_index_w = params.end_indexes_w[q];
        int maxIndex = -1;
        double maxVal = -1.7976931348623157E308;
        double currDoutVal = -1.0;
        for (int h = start_index_h; h < end_index_h; ++h) {
            for (int w = start_index_w; w < end_index_w; ++w) {
                currDoutVal = inputArray[inputOffset + h * params.W + w];
                double d = currDoutVal = performReluBackward && currDoutVal < 0.0 ? 0.0 : currDoutVal;
                if (!(maxVal < currDoutVal)) continue;
                maxIndex = inputOffset + h * params.W + w;
                maxVal = currDoutVal;
            }
        }
        return maxIndex;
    }

    private static class PoolingBackwardSparseSparse
    extends PoolingBackwardSparseDense {
        public PoolingBackwardSparseSparse(int rl, int ru, DnnParameters params, boolean relu) {
            super(rl, ru, params, relu, params.input2, params.output);
            if (this.output.getDenseBlock() == null) {
                throw new RuntimeException("Incorrect usage: empty outputs");
            }
            if (!params.input1.isInSparseFormat() || !params.input2.isInSparseFormat()) {
                throw new RuntimeException("Incorrect usage: Call optimized versions");
            }
        }

        @Override
        protected void maxpoolingBackward(int[] maxIx, int outOffset, int n, int c, int C, int Q, int PQ, int CPQ) {
            SparseBlock sblock = this.doutput.getSparseBlock();
            double[] out = this.output.getDenseBlockValues();
            if (sblock.isEmpty(n)) {
                return;
            }
            int apos = sblock.pos(n);
            int alen = sblock.size(n);
            int[] aix = sblock.indexes(n);
            double[] avals = sblock.values(n);
            int cpos = c == 0 ? 0 : sblock.posFIndexGTE(n, c * PQ);
            int cpos2 = c + 1 == C ? alen : sblock.posFIndexGTE(n, (c + 1) * PQ);
            cpos = cpos >= 0 ? cpos : alen;
            cpos2 = cpos2 >= 0 ? cpos2 : alen;
            for (int j = apos + cpos; j < apos + cpos2; ++j) {
                int p = aix[j] % PQ / Q;
                int q = aix[j] % Q;
                int pq = p * Q + q;
                int n2 = outOffset + maxIx[pq];
                out[n2] = out[n2] + avals[j];
            }
        }
    }

    private static class PoolingBackwardSparseDense
    implements Callable<Long> {
        private final int _rl;
        private final int _ru;
        private final DnnParameters _params;
        private final boolean reluBack;
        protected final MatrixBlock doutput;
        protected final MatrixBlock output;

        protected PoolingBackwardSparseDense(int rl, int ru, DnnParameters params, boolean relu, MatrixBlock dout, MatrixBlock out) {
            this._rl = rl;
            this._ru = ru;
            this._params = params;
            this.reluBack = relu;
            this.doutput = dout;
            this.output = out;
        }

        public PoolingBackwardSparseDense(int rl, int ru, DnnParameters params, boolean relu) {
            this(rl, ru, params, relu, params.input2, params.output);
            if (this.doutput.getDenseBlock() == null || this.output.getDenseBlock() == null) {
                throw new RuntimeException("Incorrect usage: empty inputs");
            }
            if (!params.input1.isInSparseFormat()) {
                throw new RuntimeException("Incorrect usage: sparse input1 expected");
            }
        }

        @Override
        public Long call() throws Exception {
            int P = this._params.P;
            int Q = this._params.Q;
            int W = this._params.W;
            int C = this._params.C;
            int R = this._params.R;
            int S = this._params.S;
            int padh = this._params.pad_h;
            int padw = this._params.pad_w;
            int strideh = this._params.stride_h;
            int stridew = this._params.stride_w;
            int PQ = this._params.P * this._params.Q;
            int CPQ = this._params.C * this._params.P * this._params.Q;
            int HW = this._params.H * this._params.W;
            int CHW = this._params.C * this._params.H * this._params.W;
            double[] maxVal = new double[PQ];
            int[] maxIx = new int[PQ];
            for (int n = this._rl; n < this._ru; ++n) {
                for (int c = 0; c < C; ++c) {
                    int outOffset = n * CHW + c * HW;
                    this.maxpoolingForward(maxVal, maxIx, n, c, padh, padw, strideh, stridew, C, P, Q, R, S, HW, W);
                    this.maxpoolingBackward(maxIx, outOffset, n, c, C, Q, PQ, CPQ);
                }
            }
            return this.output.recomputeNonZeros(this._rl, this._ru - 1);
        }

        protected void maxpoolingForward(double[] maxVal, int[] maxIx, int n, int c, int padh, int padw, int strideh, int stridew, int C, int P, int Q, int R, int S, int HW, int W) {
            SparseBlock sblock = this._params.input1.getSparseBlock();
            if (!sblock.isEmpty(n)) {
                Arrays.fill(maxVal, -1.7976931348623157E308);
                int apos = sblock.pos(n);
                int alen = sblock.size(n);
                int[] aix = sblock.indexes(n);
                double[] avals = sblock.values(n);
                int cpos = c == 0 ? 0 : sblock.posFIndexGTE(n, c * HW);
                int cpos2 = c + 1 == C ? alen : sblock.posFIndexGTE(n, (c + 1) * HW);
                cpos = cpos >= 0 ? cpos : alen;
                cpos2 = cpos2 >= 0 ? cpos2 : alen;
                int lastix = c * HW - 1;
                for (int j = apos + cpos; j < apos + cpos2; ++j) {
                    PoolingBackwardSparseDense.update0(lastix + 1, aix[j], maxVal, maxIx, padh, padw, strideh, stridew, P, Q, R, S, HW, W);
                    int h = aix[j] % HW / W;
                    int w = aix[j] % W;
                    double val = this.reluBack && avals[j] < 0.0 ? 0.0 : avals[j];
                    PoolingBackwardSparseDense.update(val, maxVal, maxIx, h, w, padh, padw, strideh, stridew, P, Q, R, S, W);
                    lastix = aix[j];
                }
                PoolingBackwardSparseDense.update0(lastix + 1, (c + 1) * HW, maxVal, maxIx, padh, padw, strideh, stridew, P, Q, R, S, HW, W);
            } else {
                Arrays.fill(maxVal, 0.0);
                int ix = 0;
                for (int p = 0; p < P; ++p) {
                    int h = Math.max(-padh + p * strideh, 0);
                    int q = 0;
                    while (q < Q) {
                        int w = Math.max(-padw + q * stridew, 0);
                        maxIx[ix] = h * W + w;
                        ++q;
                        ++ix;
                    }
                }
            }
        }

        protected void maxpoolingBackward(int[] maxIx, int outOffset, int n, int c, int C, int Q, int PQ, int CPQ) {
            double[] dout = this.doutput.getDenseBlockValues();
            double[] out = this.output.getDenseBlockValues();
            int doutOffset = n * CPQ + c * PQ;
            for (int pq = 0; pq < PQ; ++pq) {
                int n2 = outOffset + maxIx[pq];
                out[n2] = out[n2] + dout[doutOffset + pq];
            }
        }

        private static void update0(int lix, int uix, double[] maxVal, int[] maxIx, int padh, int padw, int strideh, int stridew, int P, int Q, int R, int S, int HW, int W) {
            for (int i = lix; i < uix; ++i) {
                PoolingBackwardSparseDense.update(0.0, maxVal, maxIx, i % HW / W, i % W, padh, padw, strideh, stridew, P, Q, R, S, W);
            }
        }

        private static void update(double val, double[] maxVal, int[] maxIx, int h, int w, int padh, int padw, int strideh, int stridew, int P, int Q, int R, int S, int W) {
            int lp = Math.max((h + padh - R + strideh) / strideh, 0);
            int up = Math.min((h + padh + strideh) / strideh, P);
            int lq = Math.max((w + padw - S + stridew) / stridew, 0);
            int uq = Math.min((w + padw + stridew) / stridew, Q);
            int maxIndex = h * W + w;
            for (int p = lp; p < up; ++p) {
                for (int q = lq; q < uq; ++q) {
                    int ix = p * Q + q;
                    if (!(maxVal[ix] < val)) continue;
                    maxVal[ix] = val;
                    maxIx[ix] = maxIndex;
                }
            }
        }
    }

    private static class AvgPoolingBackwardSparse
    implements Callable<Long> {
        public int _rl;
        public int _ru;
        private final DnnParameters _params;
        MatrixBlock output;
        MatrixBlock dout;
        int CHW;
        int P;
        int Q;
        int HW;
        final double _poolingMultiplier;

        public AvgPoolingBackwardSparse(int rl, int ru, DnnParameters params) {
            this._rl = rl;
            this._ru = ru;
            this._params = params;
            this.dout = params.input2;
            this.output = params.output;
            this.CHW = params.C * params.H * params.W;
            this.HW = params.H * params.W;
            this.P = params.P;
            this.Q = params.Q;
            this._poolingMultiplier = Math.pow(params.R * params.S, -1.0);
            if (this.output.getDenseBlock() == null) {
                throw new RuntimeException("Incorrect usage: empty inputs");
            }
        }

        @Override
        public Long call() throws Exception {
            LibMatrixDNNHelper.CellIndex3 ix = new LibMatrixDNNHelper.CellIndex3();
            double[] out = this.output.getDenseBlockValues();
            SparseBlock sblock = this.dout.sparseBlock;
            for (int n = this._rl; n < this._ru; ++n) {
                if (sblock.isEmpty(n)) continue;
                int apos = sblock.pos(n);
                int alen = sblock.size(n);
                int[] aix = sblock.indexes(n);
                double[] avals = sblock.values(n);
                for (int j = apos; j < apos + alen; ++j) {
                    ix = LibMatrixDNNHelper.computeTensorIndexes(aix[j], this.P, this.Q, ix);
                    int c = ix.ix1;
                    int p = ix.ix2;
                    int q = ix.ix3;
                    int inputOffset = n * this.CHW + c * this.HW;
                    int start_index_h = this._params.start_indexes_h[p];
                    int end_index_h = this._params.end_indexes_h[p];
                    int start_index_w = this._params.start_indexes_w[q];
                    int end_index_w = this._params.end_indexes_w[q];
                    for (int h = start_index_h; h < end_index_h; ++h) {
                        for (int w = start_index_w; w < end_index_w; ++w) {
                            int n2 = inputOffset + h * this._params.W + w;
                            out[n2] = out[n2] + this._poolingMultiplier * avals[j];
                        }
                    }
                }
            }
            return this.output.recomputeNonZeros(this._rl, this._ru - 1);
        }
    }

    private static class PoolingBackwardDenseSparse
    implements Callable<Long> {
        public int _rl;
        public int _ru;
        private final DnnParameters _params;
        MatrixBlock output;
        boolean performReluBackward;
        double[] inputArray;
        MatrixBlock dout;
        int CHW;
        int P;
        int Q;
        int HW;

        public PoolingBackwardDenseSparse(int rl, int ru, DnnParameters params, boolean performReluBackward) {
            this._rl = rl;
            this._ru = ru;
            this._params = params;
            this.performReluBackward = performReluBackward;
            this.inputArray = params.input1.getDenseBlockValues();
            this.dout = params.input2;
            this.output = params.output;
            this.CHW = params.C * params.H * params.W;
            this.HW = params.H * params.W;
            this.P = params.P;
            this.Q = params.Q;
            if (this.inputArray == null || this.output.getDenseBlock() == null) {
                throw new RuntimeException("Incorrect usage: empty inputs");
            }
            if (!params.input2.isInSparseFormat()) {
                throw new RuntimeException("Incorrect usage: Call optimized versions");
            }
        }

        @Override
        public Long call() throws Exception {
            LibMatrixDNNHelper.CellIndex3 ix = new LibMatrixDNNHelper.CellIndex3();
            double[] out = this.output.getDenseBlockValues();
            SparseBlock sblock = this.dout.sparseBlock;
            for (int n = this._rl; n < this._ru; ++n) {
                if (sblock.isEmpty(n)) continue;
                int apos = sblock.pos(n);
                int alen = sblock.size(n);
                int[] aix = sblock.indexes(n);
                double[] avals = sblock.values(n);
                for (int j = apos; j < apos + alen; ++j) {
                    ix = LibMatrixDNNHelper.computeTensorIndexes(aix[j], this.P, this.Q, ix);
                    int inputOffset = n * this.CHW + ix.ix1 * this.HW;
                    int maxIndex = LibMatrixDNNPooling.getMaxIndex(ix.ix2, ix.ix3, inputOffset, this.inputArray, this._params, this.performReluBackward);
                    if (maxIndex == -1) continue;
                    int n2 = maxIndex;
                    out[n2] = out[n2] + avals[j];
                }
            }
            return this.output.recomputeNonZeros(this._rl, this._ru - 1);
        }
    }

    private static class PoolingBackwardDenseDense
    implements Callable<Long> {
        public int _rl;
        public int _ru;
        private final DnnParameters _params;
        boolean performReluBackward;
        double[] inputArray;
        double[] doutArray;
        MatrixBlock output;
        int C;
        int CHW;
        int P;
        int Q;
        int HW;
        int CPQ;
        int PQ;

        public PoolingBackwardDenseDense(int rl, int ru, DnnParameters params, boolean performReluBackward) {
            this._rl = rl;
            this._ru = ru;
            this._params = params;
            this.performReluBackward = performReluBackward;
            this.inputArray = params.input1.getDenseBlockValues();
            this.doutArray = params.input2.getDenseBlockValues();
            this.output = params.output;
            this.C = params.C;
            this.CHW = params.C * params.H * params.W;
            this.HW = params.H * params.W;
            this.P = params.P;
            this.Q = params.Q;
            this.CPQ = params.C * params.P * params.Q;
            this.PQ = params.P * params.Q;
            if (this.inputArray == null || this.doutArray == null || this.output.getDenseBlock() == null) {
                throw new RuntimeException("Incorrect usage: empty inputs");
            }
        }

        @Override
        public Long call() throws Exception {
            double[] out = this.output.getDenseBlockValues();
            for (int n = this._rl; n < this._ru; ++n) {
                for (int c = 0; c < this.C; ++c) {
                    int inputOffset = n * this.CHW + c * this.HW;
                    int outputOffset = n * this.CPQ + c * this.PQ;
                    for (int p = 0; p < this.P; ++p) {
                        for (int q = 0; q < this.Q; ++q) {
                            int maxIndex = LibMatrixDNNPooling.getMaxIndex(p, q, inputOffset, this.inputArray, this._params, this.performReluBackward);
                            if (maxIndex == -1) continue;
                            int n2 = maxIndex;
                            out[n2] = out[n2] + this.doutArray[outputOffset + p * this.Q + q];
                        }
                    }
                }
            }
            return this.output.recomputeNonZeros(this._rl, this._ru - 1);
        }
    }

    private static class AvgPoolingBackwardDense
    implements Callable<Long> {
        public int _rl;
        public int _ru;
        private final DnnParameters _params;
        double[] doutArray;
        MatrixBlock output;
        final int C;
        final int CHW;
        final int P;
        final int Q;
        final int HW;
        final int CPQ;
        final int PQ;
        final double _poolingMultiplier;

        public AvgPoolingBackwardDense(int rl, int ru, DnnParameters params) {
            this._rl = rl;
            this._ru = ru;
            this._params = params;
            this.doutArray = params.input2.getDenseBlockValues();
            this.output = params.output;
            this.C = params.C;
            this.CHW = params.C * params.H * params.W;
            this.HW = params.H * params.W;
            this.P = params.P;
            this.Q = params.Q;
            this.CPQ = params.C * params.P * params.Q;
            this.PQ = params.P * params.Q;
            this._poolingMultiplier = Math.pow(params.R * params.S, -1.0);
            if (this.doutArray == null || this.output.getDenseBlock() == null) {
                throw new RuntimeException("Incorrect usage: empty inputs");
            }
        }

        @Override
        public Long call() throws Exception {
            double[] out = this.output.getDenseBlockValues();
            for (int n = this._rl; n < this._ru; ++n) {
                for (int c = 0; c < this.C; ++c) {
                    int inputOffset = n * this.CHW + c * this.HW;
                    int outputOffset = n * this.CPQ + c * this.PQ;
                    for (int p = 0; p < this.P; ++p) {
                        for (int q = 0; q < this.Q; ++q) {
                            int start_index_h = this._params.start_indexes_h[p];
                            int end_index_h = this._params.end_indexes_h[p];
                            int start_index_w = this._params.start_indexes_w[q];
                            int end_index_w = this._params.end_indexes_w[q];
                            for (int h = start_index_h; h < end_index_h; ++h) {
                                for (int w = start_index_w; w < end_index_w; ++w) {
                                    int n2 = inputOffset + h * this._params.W + w;
                                    out[n2] = out[n2] + this._poolingMultiplier * this.doutArray[outputOffset + p * this.Q + q];
                                }
                            }
                        }
                    }
                }
            }
            return this.output.recomputeNonZeros(this._rl, this._ru - 1);
        }
    }

    private static class SparsePooling
    implements Callable<Long> {
        private final int _rl;
        private final int _ru;
        private final DnnParameters _params;
        private double[] outputArray;
        private final int C;
        private final int P;
        private final int Q;
        private final int W;
        private final int H;
        private final int CPQ;
        private final int PQ;
        private final LibMatrixDNN.PoolingType _poolingType;
        private final double _poolingMultiplier;

        public SparsePooling(int rl, int ru, DnnParameters params, LibMatrixDNN.PoolingType poolingType) {
            this._rl = rl;
            this._ru = ru;
            this._params = params;
            this.outputArray = params.output.getDenseBlockValues();
            this.C = params.C;
            this.P = params.P;
            this.Q = params.Q;
            this.H = params.H;
            this.W = params.W;
            this.CPQ = this.C * this.P * this.Q;
            this.PQ = this.P * this.Q;
            this._poolingType = poolingType;
            this._poolingMultiplier = Math.pow(params.R * params.S, -1.0);
        }

        @Override
        public Long call() throws Exception {
            if (this._poolingType == LibMatrixDNN.PoolingType.MAX) {
                Arrays.fill(this.outputArray, this._rl * this.CPQ, this._ru * this.CPQ, this._params.minValForMaxPoolOperations);
            }
            for (int n = this._rl; n < this._ru; ++n) {
                if (!this._params.input1.sparseBlock.isEmpty(n)) {
                    int apos = this._params.input1.sparseBlock.pos(n);
                    int alen = this._params.input1.sparseBlock.size(n);
                    int[] aix = this._params.input1.sparseBlock.indexes(n);
                    double[] avals = this._params.input1.sparseBlock.values(n);
                    int chw = 0;
                    int index = apos;
                    for (int c = 0; c < this.C; ++c) {
                        int outOffset = n * this.CPQ + c * this.PQ;
                        for (int h = 0; h < this.H; ++h) {
                            int w = 0;
                            while (w < this.W) {
                                int q;
                                int outOffsetWithp;
                                int p;
                                double nchwVal = 0.0;
                                if (aix[index] == chw) {
                                    nchwVal = avals[index++];
                                    if (index >= apos + alen) {
                                        --index;
                                    }
                                }
                                if (this._poolingType == LibMatrixDNN.PoolingType.MAX) {
                                    for (p = 0; p < this.P; ++p) {
                                        if (h < this._params.start_indexes_h[p] || h >= this._params.end_indexes_h[p]) continue;
                                        outOffsetWithp = outOffset + p * this.Q;
                                        for (q = 0; q < this.Q; ++q) {
                                            if (w < this._params.start_indexes_w[q] || w >= this._params.end_indexes_w[q]) continue;
                                            this.outputArray[outOffsetWithp + q] = Math.max(this.outputArray[outOffsetWithp + q], nchwVal);
                                        }
                                    }
                                } else {
                                    for (p = 0; p < this.P; ++p) {
                                        if (h < this._params.start_indexes_h[p] || h >= this._params.end_indexes_h[p]) continue;
                                        outOffsetWithp = outOffset + p * this.Q;
                                        for (q = 0; q < this.Q; ++q) {
                                            if (w < this._params.start_indexes_w[q] || w >= this._params.end_indexes_w[q]) continue;
                                            int n2 = outOffsetWithp + q;
                                            this.outputArray[n2] = this.outputArray[n2] + this._poolingMultiplier * nchwVal;
                                        }
                                    }
                                }
                                ++w;
                                ++chw;
                            }
                        }
                    }
                    continue;
                }
                Arrays.fill(this.outputArray, n * this.CPQ, (n + 1) * this.CPQ, 0.0);
            }
            return this._params.output.recomputeNonZeros(this._rl, this._ru - 1);
        }
    }

    private static class DensePooling
    implements Callable<Long> {
        private final int _rl;
        private final int _ru;
        private final DnnParameters _params;
        private final LibMatrixDNN.PoolingType _poolingType;
        private final double _poolingMultiplier;

        public DensePooling(int rl, int ru, DnnParameters params, LibMatrixDNN.PoolingType poolingType) {
            this._rl = rl;
            this._ru = ru;
            this._params = params;
            this._poolingType = poolingType;
            this._poolingMultiplier = 1.0 / (double)(params.R * params.S);
        }

        @Override
        public Long call() throws Exception {
            boolean max;
            int C = this._params.C;
            int P = this._params.P;
            int Q = this._params.Q;
            int R = this._params.R;
            int S = this._params.S;
            int H = this._params.H;
            int W = this._params.W;
            int HW = this._params.H * this._params.W;
            int CHW = this._params.C * this._params.H * this._params.W;
            int CPQ = C * P * Q;
            double[] in = this._params.input1.getDenseBlockValues();
            double[] out = this._params.output.getDenseBlockValues();
            double minValForMaxPoolOperations = this._poolingType == LibMatrixDNN.PoolingType.AVG ? 0.0 : this._params.minValForMaxPoolOperations;
            boolean bl = max = this._poolingType == LibMatrixDNN.PoolingType.MAX;
            if (this._params.isStride1Pad0()) {
                LibMatrixDNNPooling.poolingDenseStride1Pad0(this._poolingType, minValForMaxPoolOperations, this._poolingMultiplier, in, out, this._rl, this._ru, this._rl * CHW, this._rl * CPQ, C, P, Q, R, S, H, W);
            } else {
                Arrays.fill(out, this._rl * CPQ, this._ru * CPQ, minValForMaxPoolOperations);
                int[] hl = this._params.start_indexes_h;
                int[] hu = this._params.end_indexes_h;
                int[] wl = this._params.start_indexes_w;
                int[] wu = this._params.end_indexes_w;
                for (int i = this._rl; i < this._ru; ++i) {
                    int c = 0;
                    int off = i * CHW;
                    int oix = i * CPQ;
                    while (c < C) {
                        int p = 0;
                        while (p < P) {
                            for (int h = hl[p]; h < hu[p]; ++h) {
                                int off2 = off + h * W;
                                for (int q = 0; q < Q; ++q) {
                                    out[oix + q] = max ? LibMatrixDNNPooling.max(out[oix + q], in, off2 + wl[q], wu[q] - wl[q]) : LibMatrixDNNPooling.avg(out[oix + q], in, off2 + wl[q], wu[q] - wl[q], this._poolingMultiplier);
                                }
                            }
                            ++p;
                            oix += Q;
                        }
                        ++c;
                        off += HW;
                    }
                }
            }
            return this._params.output.recomputeNonZeros(this._rl, this._ru - 1);
        }
    }
}

