import pytest

import numpy as np

from suncal.datasets.dataset_model import DataSet, DataSetSummary


def test_dsetsummary():
    ''' Test DataSet summary '''
    # DataSet from array data, all same length
    np.random.seed(100)
    x = np.arange(10)
    ynom = 3*x + 2
    samples = 8
    y = np.random.normal(loc=ynom, scale=.4, size=(samples, len(ynom))).T
    dset = DataSet(y, colnames=x)
    arr = dset.summarize()

    assert np.allclose(arr._pcolnames, x)
    assert np.allclose(arr._means(), y.mean(axis=1))
    assert np.allclose(arr._stds(), y.std(axis=1, ddof=1))


def test_anova():
    ''' Test analysis of variance in DataSet class '''
    # Data from Dilip Shaw's example Excel spreadsheet at NCSLI ABQ Section Meeting 6/4/2018
    x1 = [0.999156418, 0.999474238, 0.999339856, 1.000160754, 1.000744245,
          1.000737791, 1.000495481, 0.9995736, 1.000723728, 1.00060071]
    x2 = [1.000349612, 0.999883249, 0.999050897, 1.00017474, 0.999602596,
          1.000824172, 0.999356241, 0.999806955, 1.000643518, 1.000444615]
    x3 = [0.999757491, 0.999817772, 1.000194482, 0.999679647, 1.000937242,
          1.000470251, 0.999027869, 0.999911651, 0.999590441, 1.000451023]
    x4 = [0.999719098, 0.99964781, 1.000822782, 0.999409639, 0.999109266,
          1.000785761, 0.999307039, 1.000469276, 1.000654864, 1.000987112]
    dat = np.vstack((x1, x2, x3, x4))

    g = DataSet(dat)
    result = g.anova()
    gstats = g.group_stats()
    pstats = g.pooled_stats()
    assert np.isclose(result.F, .0850416)
    assert np.isclose(result.Fcrit, 2.866266)
    assert np.isclose(result.P, .96777478)
    assert np.isclose(pstats.reproducibility, 57.6E-6, atol=.01E-5)
    assert np.isclose(pstats.repeatability, 624.4E-6, atol=.1E-6)
    assert np.allclose(gstats.degf, [9, 9, 9, 9])
    assert np.allclose(gstats.mean, [1.000100682, 1.000013659, 0.999983787, 1.000091265])
    assert np.allclose(gstats.standarddev, [0.000646229, 0.00057407, 0.000542976, 0.000719539])
    assert pstats.reproducibility_degf == 3
    assert pstats.repeatability_degf == 36

    # Also try groups of uneven lengths, use excel anova function to verify
    dat[1, 8:] = np.nan
    dat[2, 9:] = np.nan
    dat[3, 6:] = np.nan
    g = DataSet(dat)
    gstats = g.group_stats()
    pstats = g.pooled_stats()
    assert np.allclose(gstats.degf, [9, 7, 8, 5])
    assert pstats.reproducibility_degf == 3
    assert pstats.repeatability_degf == 29
    result = g.anova()
    assert np.isclose(result.F, .23239)
    assert np.isclose(result.Fcrit, 2.93403)
    assert np.isclose(result.P, .873061)


def test_anovameans():
    ''' Test ANOVA when only means and standard deviations of each group are given. Use GUM H.5 data. '''
    x = np.arange(1, 11)
    y = np.array([10.000172, 10.000116, 10.000013, 10.000144, 10.000106,
                  10.000031, 10.000060, 10.000125, 10.000163, 10.000041])
    ystd = np.array([60, 77, 111, 101, 67, 93, 80, 73, 88, 86]) * 1E-6
    nmeas = np.full(len(y), 5)
    a = DataSetSummary(x, y, ystd, nmeas)
    gstats = a.group_stats()
    pstats = a.pooled_stats()
    assert np.isclose(pstats.mean, 10.000097, atol=.0000005)  # From GUM Table H.9
    assert np.isclose(pstats.reproducibility, 57E-6, atol=.5E-6)
    assert np.isclose(pstats.repeatability, 85E-6, atol=.5E-6)
    result = a.anova()
    assert np.isclose(result.F, 2.25, atol=.02)  # Equation H.27
    assert a.ncolumns() == 10

    # Data from Glantz Table 7-4, results in table 7-5
    x = ['Healthy', 'Nonmelancholic', 'Melancholoc']  # Check with x as strings too
    y = np.array([9.2, 10.7, 13.5])
    ystd = np.array([2.9, 2.8, 4.7])
    nmeas = np.array([16, 22, 18])
    a = DataSetSummary(x, y, ystd, nmeas)
    result = a.anova()
    assert np.isclose(result.SSbet, 164.7, atol=.1)
    assert np.isclose(result.SSwit, 666.02, atol=1)  # Rounding error in table, large atol
    assert np.isclose(result.MSbet, 82.3, atol=.1)
    assert np.isclose(result.MSwit, 12.5, atol=.1)
    assert np.isclose(result.F, 6.612, atol=.1)
    assert np.isclose(result.P, .003, atol=.001)


def test_autocorrelation():
    ''' Test autocorrelation calculation using data from NIST handbook example at
        https://www.itl.nist.gov/div898/handbook/eda/section3/eda35c.htm
    '''
    # Data from "LEW.DAT" https://www.itl.nist.gov/div898/handbook/eda/section4/eda4251.htm
    lewdata = np.array([
        -213, -564,   -35,   -15,   141,   115,  -420,  -360,   203,  -338,  -431,   194,  -220,  -513,
        154,  -125,  -559,    92,   -21,  -579,   -52,    99,  -543,  -175,   162,  -457,  -346,   204,  -300,  -474,   164,
        -107,  -572,    -8,    83,  -541,  -224,   180,  -420,  -374,   201,  -236,  -531,    83,    27,  -564,  -112,   131,
        -507,  -254,   199,  -311,  -495,   143,   -46,  -579,   -90,   136,  -472,  -338,   202,  -287,  -477,   169,  -124,
        -568,    17,    48,  -568,  -135,   162,  -430,  -422,   172,   -74,  -577,   -13,    92,  -534,  -243,   194,  -355,
        -465,   156,   -81,  -578,   -64,   139,  -449,  -384,   193,  -198,  -538,   110,   -44,  -577,    -6,    66,  -552,
        -164,   161,  -460,  -344,   205,  -281,  -504,   134,   -28,  -576,  -118,   156,  -437,  -381,   200,  -220,  -540,
        83,    11,  -568,  -160,   172,  -414,  -408,   188,  -125,  -572,   -32,   139,  -492,  -321,   205,  -262,  -504,
        142,   -83,  -574,     0,    48,  -571,  -106,   137,  -501,  -266,   190,  -391,  -406,   194,  -186,  -553,    83,
        -13,  -577,   -49,   103,  -515,  -280,   201,   300,  -506,   131,   -45,  -578,   -80,   138,  -462,  -361,   201,
        -211,  -554,    32,    74,  -533,  -235,   187,  -372,  -442,   182,  -147,  -566,    25,    68,  -535,  -244,   194,
        -351,  -463,   174,  -125,  -570,    15,    72,  -550,  -190,   172,  -424,  -385,   198,  -218,  -536,    96])

    # Suncal Autocorrelation calculation
    d = DataSet(lewdata)
    acorr = d.autocorrelation()[:50]  # Compare first 50 lag values

    # NIST's autocorrelation values
    nistacorr = np.array([1.00, -0.31, -0.74, 0.77, 0.21, -0.90, 0.38, 0.63, -0.77, -0.12, 0.82, -0.40, -0.55,
                          0.73, 0.07, -0.76, 0.40, 0.48, -0.70, -0.03, 0.70, -0.41, -0.43, 0.67, 0.00, -0.66,
                          0.42, 0.39, -0.65, 0.03, 0.63, -0.42, -0.36, 0.64, -0.05, -0.60, 0.43, 0.32, -0.64,
                          0.08, 0.58, -0.45, -0.28, 0.62, -0.10, -0.55, 0.45, 0.25, -0.61, 0.14])

    assert np.allclose(acorr, nistacorr, atol=.006)


def test_autocorrelation2():
    ''' Test autocorrelation against data/results in Figure 1 and Figure 3 of Zhang, Metrologia 43 (2006) '''
    # Data extracted from plot using PlotDigitizer...
    zhang1 = np.array([451.221374,451.2946565,450.3969466,452.5770992,450.4335878,451.4229008,449.6274809,448.7114504,451.7343511,
                       451.0931298,449.6824427,450.8732824,447.9053435,450.2320611,449.5175573,450.8,448.6748092,452.0091603,
                       448.0152672,448.7664122,449.8839695,448.3450382,449.2061069,450.2137405,448.8580153,450.470229,450.6167939,
                       451.2763359,447.9236641,449.0045802,449.7374046,449.7374046,449.9755725,449.0778626,450.8183206,449.4442748,
                       451.8992366,448.7664122,449.6641221,448.5832061,449.5175573,451.5877863,451.880916,450.1954198,448.8763359,
                       449.8656489,448.5832061,450.7633588,449.2061069,449.3709924,449.0045802,451.129771,450.1954198,450.7083969,
                       448.6564885,451.0015267,448.6198473,450.7450382,452.2839695,450.9465649,450.9282443,449.9206107,449.480916,
                       449.2610687,450.4335878,450.9648855,450.4335878,450.5618321,449.7374046,450.4335878,449.4076336,450.7450382,
                       448.9496183,449.6458015,451.2763359,448.8763359,449.7007634,450.0854962,450.7267176,450.4335878,451.3862595,
                       450.7816794,448.5832061,449.8839695,449.8290076,450.8183206,449.3160305,449.5908397,449.3160305,448.8763359,450.2320611,452.0824427,
                       450.5618321,450.1587786,449.2427481,450.6534351,449.0778626,449.6641221,449.5908397,448.6564885,449.6641221,
                       449.9022901,451.1664122,451.4778626,451.4045802,450.1587786,451.221374,450.6717557,450.7083969,449.719084,452.3572519,
                       452.8335878,452.1007634,450.8916031,451.3679389,452.1374046,452.3755725,452.7053435,451.5877863,453.2183206,
                       450.5801527,452.8885496,452.3206107,452.5954198,451.0381679,452.4305344,451.880916,451.5694656,452.8335878,
                       451.4045802,451.0381679,449.9022901,450.8183206,448.8030534,450.5435115,451.3496183,450.2320611,448.729771,
                       450.5251908,450.4152672,450.9465649,450.0122137,448.6381679,450.140458,451.4412214,451.1847328,451.2946565,451.1114504,
                       450.3419847,451.4229008,451.8076336,451.4229008,451.551145,452.6870229,451.4412214,449.3526718,451.5877863,451.2396947,451.3496183,
                       449.7740458,453.3465649,453.0900763,450.7450382,449.1328244,450.7083969,450.6351145,451.6244275,450.1770992,450.0122137,
                       449.7374046,450.7633588,450.3603053,449.9938931,450.3603053,450.5435115,450.9648855,449.5541985,450.6717557,450.6351145,
                       450.8,449.7374046,450.8366412,450.1954198,452.1374046,450.2503817,451.2763359,450.2320611,452.4122137,453.1267176,
                       451.3862595,451.3129771,451.0381679,450.5251908,452.6687023,453.8045802,454.7572519,455.4534351,454.9954198,456.3328244,455.6366412,
                       457.0656489,455.9480916,456.9374046,456.4793893,457.5603053,456.9557252,454.8671756,457.3221374,456.0946565,
                       457.1389313,455.929771,458.1282443,454.7938931,456.259542,456.6259542,455.0687023,453.2366412,453.5847328,
                       453.4015267,452.119084,450.5618321,450.378626,449.719084,450.0854962,449.2610687,449.1328244,449.6091603,449.7923664,
                       449.5175573,449.8290076,450.3603053,450.0122137,452.3389313,449.480916,450.1587786,452.540458,452.540458,
                       450.2320611,450.3419847,452.1740458,451.4412214,453.7129771,451.1480916,450.2687023,450.378626,452.1557252,450.5068702,
                       449.810687,448.6381679,449.5541985,449.480916,450.3236641,450.2503817,449.3709924,449.2427481,449.1145038,449.2244275,
                       449.9389313,450.0854962,450.3053435,449.8290076,451.3496183,450.5801527,450.2137405,448.7114504,449.9572519,450.2503817,
                       452.4305344,452.2656489,450.8916031,450.3236641,452.1740458,451.8259542,452.7053435,454.0244275,452.778626,
                       452.4854962,454.4824427,451.2580153,452.3938931,453.5847328,449.5725191,450.4152672,451.5877863,449.2793893,
                       449.719084,448.6198473,451.4778626,450.1770992,450.1221374,449.5725191,448.4732824,448.8030534,449.4442748,449.1145038,
                       448.2167939,451.8442748,449.9206107,448.0152672,449.151145,452.3022901,452.3938931,453.5847328,450.8916031,
                       453.1450382,452.1007634,450.7450382,451.5877863,450.5801527,449.9755725,451.4961832,451.3679389,451.6793893,
                       450.8,449.3709924,450.8183206,451.3312977,452.0274809,451.4778626,451.3496183,450.8,450.7450382,450.6534351,451.6061069,
                       452.9068702,450.7083969,451.4778626,450.9832061,451.9175573,449.4076336,449.4992366,448.1068702,447.319084,449.151145,
                       449.4259542,448.5465649,448.1801527,447.740458,447.4656489,449.8473282,449.9572519,448.3816794,448.6748092,447.6854962,
                       448.5465649,448.0152672,449.0045802,449.1328244,448.9862595,450.470229,448.2351145,449.2977099,449.059542,448.3267176,447.978626,
                       448.5282443,447.8137405,448.8946565,449.8290076,448.5465649,447.319084,449.3160305,448.5832061,447.5755725,448.070229,447.6305344,
                       449.6274809,448.4916031,448.1435115,446.9709924,448.2167939,447.9236641,448.7114504,447.9419847,446.9343511,446.9526718,
                       449.9938931,447.9603053,449.4625954,448.070229,448.5282443,448.1435115,448.6564885,448.5465649,449.5541985,448.5465649,
                       448.4,448.4916031,450.8732824,450.8,450.7267176,452.1740458,449.3160305,450.7816794,449.6641221,451.1114504,
                       450.0671756,451.5694656,452.2839695,449.4442748,])
    d = DataSet(zhang1)
    acorr = d.autocorrelation_uncert()
    # Compare with Zhang's results (Section 4)
    assert acorr.nc == 13
    assert np.isclose(acorr.uncert, 0.3776, atol=.001)  # Relaxed tolerance since data points are estimated from image.
    assert np.isclose(acorr.r_unc, 3.9, atol=.05)

    zhang3 = np.array([-19.518744,-19.498219,-19.478113,-19.492563,-19.527327,-19.501986,-19.492142,-19.469103,-19.497375,-19.467844,-19.521456,-19.526062,
    -19.438942,-19.499044,-19.501556,-19.507002,-19.439775,-19.468466,-19.46281,-19.438934,-19.45841,-19.47244,-19.440607,-19.512857,-19.520187,
    -19.505316,-19.432436,-19.43055,-19.439344,-19.442696,-19.5095,-19.424263,-19.501541,-19.439968,-19.512638,-19.448973,-19.461536,-19.458603,
    -19.453577,-19.44122,-19.492317,-19.392002,-19.419437,-19.440588,-19.451895,-19.443518,-19.46069,-19.454405,-19.447702,-19.454823,-19.496706,
    -19.475134,-19.493563,-19.504034,-19.481205,-19.503403,-19.454397,-19.483925,-19.5034,-19.50633,-19.45921,-19.491669,-19.479523,-19.527689,
    -19.468212,-19.458576,-19.4502,-19.505068,-19.497736,-19.453337,-19.463388,-19.497734,-19.496058,-19.482025,-19.528097,-19.513645,-19.513645,
    -19.520346,-19.484533,-19.505894,-19.500029,-19.450394,-19.511335,-19.46233,-19.504423,-19.451647,-19.52222,-19.459602,-19.513634,-19.462114,
    -19.523474,-19.516771,-19.450802,-19.450592,-19.488077,-19.464203,-19.496452,-19.429646,-19.517393,-19.461685,-19.509644,-19.527023,-19.467337,
    -19.45896,-19.479063,-19.496862,-19.450579,-19.452253,-19.449112,-19.47152,-19.488062,-19.44136,-19.432981,-19.434446,-19.510468,-19.481146,
    -19.469418,-19.491825,-19.469206,-19.464808,-19.47046,-19.43444,-19.527632,-19.453495,-19.430876,-19.417473,-19.448675,-19.502914,-19.535585,
    -19.467312,-19.43087,-19.450346,-19.441969,-19.467726,-19.473589,-19.4294,-19.4294,-19.480078,-19.542067,-19.452852,-19.44552,-19.470442,
    -19.450754,-19.450544,-19.532637,-19.540384,-19.511484,-19.45766,-19.45766,-19.530119,-19.401114,-19.474411,-19.436714,-19.378702,-19.428335,
    -19.53828,-19.408018,-19.433777,-19.426865,-19.427074,-19.414927,-19.423931,-19.450945,-19.413248,-19.440891,-19.43712,-19.44403,-19.444239,
    -19.444447,-19.551252,-19.474812,-19.447376,-19.44612,-19.446745,-19.406326,-19.392294,-19.401089,-19.42224,-19.37826,-19.445902,-19.473127,
    -19.444435,-19.464539,-19.465166,-19.420767,-19.43417,-19.428722,-19.483591,-19.4438,-19.467464,-19.447567,-19.414268,-19.391022,-19.44191,
    -19.426413,-19.405258,-19.441908,-19.410912,-19.412376,-19.431433,-19.457819,-19.47227,-19.462006,-19.458027,-19.452162,-19.462631,-19.500956,
    -19.431425,-19.44755,-19.438963,-19.415297,-19.394564,-19.434563,-19.413828,-19.447964,-19.43016,-19.438955])
    d = DataSet(zhang3)
    acorr = d.autocorrelation_uncert()
    # Compare with Zhang's results (Section 4)
    assert acorr.nc == 17
    assert np.isclose(acorr.uncert, 0.0067, atol=.00005)
    assert np.isclose(acorr.r_unc, 2.8, atol=.05)
