Source code for geophpy.operation.general

# -*- coding: utf-8 -*-
'''
   geophpy.operation.general
   -------------------------

   DataSet Object general operations routines.

   :copyright: Copyright 2014-2019 Lionel Darras, Philippe Marty, Quentin Vitale and contributors, see AUTHORS.
   :license: GNU GPL v3.
'''

import numpy as np
import scipy
from scipy.interpolate import griddata, RectBivariateSpline, interp2d
from copy import deepcopy
from geophpy.misc.utils import *

#---------------------------------------------------------------------------#
# User defined parameters                                                   #
#---------------------------------------------------------------------------#

# list of "griddata" interpolation methods available for wumappy interface
gridding_interpolation_list = ['none', 'nearest', 'linear', 'cubic']

# List of allowed rotation angle (for z_image) for wumappy interface
rotation_angle_list = [0, 90, 180, 270]


#---------------------------------------------------------------------------#
# DataSet Basic Interpolations                                              #
#---------------------------------------------------------------------------#
def getgriddinginterpolationlist():
    '''
    cf. dataset.py
    '''
    return gridding_interpolation_list


def interpolate(dataset, interpolation="none", x_step=None, y_step=None, x_prec=2, y_prec=2, x_frame_factor=0., y_frame_factor=0.):
   ''' Dataset gridding.

   cf. :meth:`~geophpy.dataset.DataSet.interpolate`

   '''

   x, y, z = dataset.data.values.T[:3]

   # Creating a regular grid ###################################################
   # Distinct x, y values
   x_list = np.unique(x)
   y_list = np.unique(y)

   # Median step between two distinct x values
   if x_step is None:
       x_step = get_median_xstep(dataset, prec=x_prec) # add prec is None is this function
       # ...TBD... why not take the min diff value instead of the median ?
       ## Because on all parallel profiles, min can be smaller than actuall step size

   else:
       x_prec = getdecimalsnb(x_step)

   # Min and max x coordinates and number of x pixels
   xmin = x.min()
   xmax = x.max()

   xmin = (1.+x_frame_factor)*xmin - x_frame_factor*xmax
   xmax = (1.+x_frame_factor)*xmax - x_frame_factor*xmin

   xmin = round(xmin, x_prec)
   xmax = round(xmax, x_prec)

   nx = int(np.around((xmax-xmin)/x_step) + 1)

   # Median step between two distinct y values
   if y_step is None:
       y_step = get_median_ystep(dataset, prec=y_prec)
       # ...TBD... why not take the min diff value instead of the median ?

   else:
       y_prec = getdecimalsnb(y_step)

   # Determinate min and max y coordinates and number of y pixels
   ymin = y.min()
   ymax = y.max()

   ymin = (1.+y_frame_factor)*ymin - y_frame_factor*ymax
   ymax = (1.+y_frame_factor)*ymax - y_frame_factor*ymin

   ymin = round(ymin, y_prec)
   ymax = round(ymax, y_prec)

   ny = int(np.around((ymax - ymin)/y_step) + 1)

   # Regular grid
   xi = np.linspace(xmin, xmax, nx, endpoint=True)
   yi = np.linspace(ymin, ymax, ny, endpoint=True)
   X, Y = np.meshgrid(xi, yi)

   # Gridding data #############################################################
   # No interpolation
   if interpolation.lower() == "none":
      ## just project data into the grid
      ## if several data points fall into the same pixel, they are averaged
      ## don't forget to "peakfilt" the raw values beforehand to avoid averaging bad data points

      ### attempt using scipy.stats.binned_statistic_2d
      x, y, val = dataset.get_xyzvalues()
      statistic, xedges, yedges, binnumber = scipy.stats.binned_statistic_2d(
          x, y, values=val, statistic='mean',bins=[xi, yi])
      Z = statistic.T
      #Z = np.flipud(Z)
      
      #print(type(Z))
      #Z = dataset.data.z_image
      #Z[np.isnan(Z)] = 0  # replacing nan with zero, waiting for better
      #print('*** interpolate - sat', Z)
      ###

##      Z = np.zeros(X.shape)
##      Count = np.zeros(X.shape)  # nb of data points in the pixel initialization
##      #print('*** interpolate - count ', Count)
##
##      for x, y, z in dataset.data.values:
##         indx = np.where(xi+x_step/2. > x)
##         indy = np.where(yi+y_step/2. > y)
##         Z[indy[0][0], indx[0][0]] += z
##         Count[indy[0][0], indx[0][0]] += 1
##
##      idx0 = Count == 0  # index of pixel with no data
##      #print('*** interpolate - idx0', idxo)
##      Z[~idx0] = Z[~idx0]/Count[~idx0]

   # SciPy iterpolation
   elif interpolation in getgriddinginterpolationlist():
      ## perform data interpolation onto the grid
      ## the interpolation algorithm will deal with overlapping data points
      ## nevertheless don't forget to "peakfilt" the rawvalues beforehand to avoid interpolation being too much influenced by bad data points
      '''
      # Fill holes in each profiles with "nan" #######################
      ## this is to avoid filling holes with interpolated values
      nan_array = []
      for x in x_list:
         profile = np.unique(y_array[np.where(x_array == x)])
         nan_array = profile_completewithnan(x, profile, nan_array, y_step, factor=2, ymin=ymin, ymax=ymax)
      if (len(nan_array) != 0):
         completed_array = np.append(dataset.data.values, np.array(nan_array), axis=0)
         T = completed_array.T
         x_array = T[0]
         y_array = T[1]
         z_array = T[2]
      '''

      Z = griddata((x, y), z, (X, Y), method=interpolation)

      if np.all(np.isnan(Z.flatten())):  # interpolation failled
          print('griddata', Z, len(Z))
          print('griddataAllNan', np.all(np.isnan(Z)))
          return dataset

   # Other iterpolation
   else:
      # Undefined interpolation method ###############################
      # ...TBD... raise an error here !
      pass

   # Fill the DataSet Object ###################################################
   dataset.data.z_image = Z
   dataset.info.x_min = xmin
   dataset.info.x_max = xmax
   dataset.info.y_min = ymin
   dataset.info.y_max = ymax
   dataset.info.z_min = np.nanmin(Z)
   dataset.info.z_max = np.nanmax(Z)
   dataset.info.x_gridding_delta = x_step
   dataset.info.y_gridding_delta = y_step
   dataset.info.gridding_interpolation = interpolation

   return dataset


def sample(dataset):
    ''' Re-sample data at ungridded sample position from gridded Z_image.

    cf. :meth:`~geophpy.dataset.DataSet.sample`

    '''

    # Current gridded values
    X, Y = get_xygrid(dataset)
    Z = dataset.data.z_image

    idx_nan = np.isnan(Z)
    xy = np.stack((X[~idx_nan].flatten(),Y[~idx_nan].flatten())).T
    z = Z[~idx_nan].flatten()

    # ungridded values coordinates at which resample
    xiyi = dataset.data.values.T[:2].T  # [[x0, y0], [x1, y1], ...]
    zi = dataset.data.values.T[2]

    # Re-sampling
    z_interp = griddata(xy, z, xiyi, method='cubic')
    zi *= 0.
    zi += z_interp

    return dataset


def regrid(dataset, x_step=None, y_step=None, method='cubic'):
    ''' Re-grid dataset grid.

    cf. :meth:`~geophpy.dataset.DataSet.regrid`

    '''

    datasetOld = dataset.copy()
    dataset.sample()

##    # New grid step, resample the old grid by a factor 2.
##    if x_step is None:
##        x_step_old = dataset.info.x_gridding_delta
##        prec = getdecimalsnb(x_step_old)
##        x_step = np.around(x_step_old, prec)
##
##    if y_step is None:
##        y_step_old = dataset.info.y_gridding_delta
##        prec = getdecimalsnb(y_step_old)
##        y_step = np.around(y_step_old, prec)

    # Re-gridding dataset
    dataset.interpolate(x_step=x_step, y_step=y_step, interpolation=method)

    # Filling DataSet Object
    dataset.data.values = datasetOld.data.values

    return dataset


def histo_fit(dataset, valfilt=False):
    ''' Fit dataset histogram distribution. '''

    # Fit on ungridded dataset values
    if valfilt or dataset.data.z_image is None:
        data = dataset.get_values()

    # Fit on gridded dataset values
    else:
        data = dataset.get_grid_values()

    # Normal (gaussian) fit
    #    data = scipy.stats.norm.fit(ser)
    m, s = scipy.stats.norm.fit(ser) # get mean and standard deviation
    #pdf_g = scipy.stats.norm.pdf(lnspc, m, s)
    return m, s


#---------------------------------------------------------------------------#
# DataSet Grid manipulation                                                 #
#---------------------------------------------------------------------------#
def get_xvect(dataset):
    ''' Return dataset x-coordinate grid vector. '''

    is_grid = (dataset.info is not None
               and dataset.data.z_image is not None)

    if not is_grid:
        return None

    xmin = dataset.info.x_min
    xmax = dataset.info.x_max
    nx = dataset.data.z_image.shape[1]

    return np.array([np.linspace(xmin, xmax, nx)])


def get_yvect(dataset):
    ''' Return dataset y-coordinate grid vector. '''

    is_grid = (dataset.info is not None
               and dataset.data.z_image is not None)

    if not is_grid:
        return None

    ymin = dataset.info.y_min
    ymax = dataset.info.y_max
    ny = dataset.data.z_image.shape[0]

    return np.array([np.linspace(ymin, ymax, ny)])


def get_xyvect(dataset):
    ''' Return dataset x- and y-coordinate grid vectors. '''

    x = get_xvect(dataset)
    y = get_yvect(dataset)

    return x, y

##def zimage_xcoord(dataset):
##    '''
##    Return dataset x-coordinate array of a Z_image.
##    '''
##    return np.array([np.linspace(dataset.info.x_min, dataset.info.x_max, dataset.data.z_image.shape[1])])
##
##
##def zimage_ycoord(dataset):
##    '''
##    Return dataset y-coordinate array of a Z_image.
##    '''
##    return np.array([np.linspace(dataset.info.y_min, dataset.info.y_max, dataset.data.z_image.shape[0])])


def get_xygrid(dataset):
    ''' Return dataset x and y-coordinate  grid. '''

    is_grid = (dataset.info is not None
               and dataset.data.z_image is not None)

    if not is_grid:
        return None, None

    x, y = get_xyvect(dataset)
    X, Y = np.meshgrid(x, y)

    return X, Y


def get_xgrid(dataset):
    ''' Return dataset x-coordinate  grid. '''

    return get_xygrid(dataset)[0]


def get_ygrid(dataset):
    ''' Return dataset y-coordinate  grid. '''

    return get_xygrid(dataset)[1]


def get_gridextent(dataset):
    ''' Return dataset grid extent. '''

    is_grid = (dataset.info is not None
               and dataset.data.z_image is not None)

    if not is_grid:
        return None, None, None, None

    xmin = dataset.info.x_min
    xmax = dataset.info.x_max
    ymin = dataset.info.y_min
    ymax = dataset.info.y_max

    return  xmin, xmax, ymin, ymax


def get_gridcorners(dataset):
    ''' Return dataset grid corners coordinates (BL, BR, TL, TR). '''

    is_grid = (dataset.info is not None
               and dataset.data.z_image is not None)

    if not is_grid:
        return None

    xmin = dataset.info.x_min
    xmax = dataset.info.x_max
    ymin = dataset.info.y_min
    ymax = dataset.info.y_max

    return np.array([[xmin, xmax, xmin, xmax], [ymin, ymin, ymax, ymax]])

def get_xvalues(dataset):
    ''' Return the x-coordinates from the dataset values. '''

    return dataset.data.values.T[0]


def get_yvalues(dataset):
    ''' Return the y-coordinates from the dataset values. '''

    return np.asarray(dataset.data.values.T[1])


def get_xyvalues(dataset):
    ''' Return the x- and y-coordinates from the dataset values. '''

    x = get_xvalues(dataset)
    y = get_yvalues(dataset)

    return x, y


def get_values(dataset):
    ''' Return the dataset values. '''

    return np.asarray(dataset.data.values.T[2])


def get_xyzvalues(dataset):
    ''' Return both the x, y-coordinates and dataset values. '''

    x = get_xvalues(dataset)
    y = get_yvalues(dataset)
    values = get_values(dataset)

    return x, y, values


def get_boundingbox(dataset):
    ''' Return the coordinates (BL, BR, TL, TR) of the box bounding the data values. '''

    x, y = get_xyvalues(dataset)

    xmin = x.min()
    xmax = x.max()
    ymin = y.min()
    ymax = y.max()

    return np.array([[xmin, ymin], [xmax, ymin], [xmin, ymax], [xmax, ymax]])


def get_median_xstep(dataset, prec=None):
    ''' Return the median step between two distinct x values rounded to the given precision.

    Profiless are considered parallel to the y-axis. '''

    x = get_xvalues(dataset)

    if prec is None:
        prec = max(getdecimalsnb(x))

    x_list = np.unique(x)
    x_step = np.median(np.around(np.diff(x_list), prec))

    return x_step


def get_median_ystep(dataset, prec=None):
    ''' Return the median step between two distinct y values rounded to the given precision.

    Profiles are considered parallel to the y-axis. '''

    y = get_yvalues(dataset)

    if prec is None:
        prec = max(getdecimalsnb(y))

    #y_list = np.unique(y)
    #y_step = np.median(np.around(np.diff(y_list), prec))
    # Profiles are considered parallel to the y-axis, so y values have more variations than x values.
    ## using np.unique() can result in an underestimation off the median y_step.
    y_step = np.median(np.around(np.abs(np.diff(y)), prec))

    return y_step


def get_median_xystep(dataset, x_prec=None, y_prec=None):
    ''' Return the median steps between two distinct x and y values rounded to the given precisions. '''

    x_step = get_median_xstep(dataset, prec=x_prec)
    y_step = get_median_ystep(dataset, prec=y_prec)

    return x_step, y_step


def get_track(dataset, num, attr='values'):
    ''' Return the values corresponding to the track number.

    Parameters
    ----------
    dataset : 1-D array-like
        Dataset object or any object having a track and values attributes.

    num : int or sequence of int
        Track number.

    attr : {'values', 'x', 'y', 'east', 'north', 'long', 'lat'}
        The named attribute of object.

    Return
    ------
    profiles : 1-D array-like

    '''

    if dataset.data.track is None:
        return

    # sequence of tracks
    if hasattr(num, '__iter__') and not isinstance(num, str):
        idx = np.any(np.asarray([dataset.data.track==int(track_num) for track_num in num]), axis=0)

    # single track
    else:
        idx = dataset.data.track==int(num)

    if attr == 'index':
        return np.where(idx)[0]
        
    values = getattr(dataset.data, attr, None)

    if values is not None:
        return values[idx]

    return values

    
#def del_track(dataset, num):
#    ''' Delete the values corresponding to the track number.
#
#    Parameters
#    ----------
#    dataset : 1-D array-like
#        Dataset object or any object having a track and values attributes.
#
#    num : int
#        Track number.
#
#    Return
#    ------
#    Dataset cleared of track num related attributes
#
#    '''
#
#    if dataset.data.track is None:
#        return
#
#    idx = np.where(dataset.data.track==int(num))
#
#    if dataset.data.track is None:
#        return
#    return dataset.data.values[np.where(dataset.data.track==int(num))]



##
#####
##def get_min_xstep(dataset, prec=None):
##    ''' Return the min step between two distinct x values rounded to the given precision. '''
##
##    x = get_xvalues(dataset)
##
##    if prec is None:
##        prec = max(getdecimalsnb(x))
##
##    x_list = np.unique(x)
##    x_step = np.min(np.around(np.diff(x_list), prec))
##
##    return x_step
##
##
##def get_min_ystep(dataset, prec=None):
##    ''' Return the min step between two distinct y values rounded to the given precision. '''
##
##    y = get_yvalues(dataset)
##
##    if prec is None:
##        prec = max(getdecimalsnb(y))
##
##    y_list = np.unique(y)
##    y_step = np.min(np.around(np.diff(y_list), prec))
##
##    return y_step
##
##
##def get_min_xystep(dataset, x_prec=None, y_prec=None):
##    ''' Return the min steps between two distinct x and y values rounded to the given precisions. '''
##
##    x_step = get_min_xstep(dataset, prec=x_prec)
##    y_step = get_min_ystep(dataset, prec=y_prec)
##
##    return x_step, y_step
#####

#def apodisation2d(val, apodisation_factor):
##   '''
##   2D apodisation, to reduce side effects
##
##   Parameters :
##
##   :val: 2-Dimension array
##
##   :apodisation_factor: apodisation factor in percent (0-25)
##
##   '''
#   if (apodisation_factor > 0):
#      # apodisation in the x direction
#      for profile in val.T:
#         _apoisation1d(profile, apodisation_factor)

      # apodisation in the y direction
#      for profile in val:
#         _apodisation1d(profile, apodisation_factor)



#def _apodisation1d(array1D, apodisation_factor):
##   '''
##   1D apodisation, to reduce side effects
##
##   Parameters :
##
##   :array1D: 1-Dimension array
##
##   :apodisation_factor: apodisation factor in percent (0-25)
##
##   '''
#   na = len(array1D)                                  # n is the number of array elements
#   napod = int(np.around((na * apodisation_factor)/100))     # napod is the number of array elements to treat
#   if (napod <= 1):                                   # one element at least must be treated
#      napod = 1
#   pi2 = np.pi/2.
#   for n in range(napod):                             # for napod first data
#      array1D[n] = array1D[n]*np.cos((napod-n)*pi2/napod)

#   for n in range(na-napod, na):                      # for napod last data
#      array1D[n] = array1D[n]*np.cos((n+1-na+napod)*pi2/napod)

###
##
# In the Future, MOVE TO operation.spectral
##
###
#---------------------------------------------------------------------------#
# Fourier Transform tools                                                   #
#---------------------------------------------------------------------------#
def fillnanvalues(val, indexout=False):
    '''
    Fill 'nan' values of each profile (row) using simple linear interpolation.

    Parameters
    ----------
    val: array_like
        Array where to replace the NaNs.

    indexout: bool,
        Flag to return the index (boolean indexing) of the NaNs in the original array.

    Returns
    -------
    The completed array (and the index of the NaNs in the original array).

    '''

    val_valid = np.copy(val)

    # Index of NaNs in the array
    nan_idx = np.isnan(val)

    # Data interpolation
    ## if there are NaNs in the profile, the value at the NaNs locations
    ## will be estimated using linear interpolation.
    if nan_idx.any():
        nprof = 0
        for profile in val_valid:
            nprof += 1

            # All-nan profile
            if np.isnan(profile).all():
                nan_idx = np.isnan(profile)
                profile[nan_idx] = 1  # -999

            # Missing data in the profile
            elif np.isnan(profile).any():
                nan_idx = np.isnan(profile)
                val_idx = ~nan_idx
                valid_data = profile[val_idx]
                val_interp = np.interp(nan_idx.nonzero()[0], val_idx.nonzero()[0], valid_data)
                profile[nan_idx] = val_interp

    # Return both data and nan index
    if indexout:
        return val_valid, nan_idx

    # Return data alone
    return val_valid

##def wavenumber(nx, ny, dx, dy):
##    '''
##    Computes the grid wavenumber coordinates.
##
##    Parameters
##    ----------
##    nx, ny : int
##        Dimension of grid in x (col) and y (line) directions.
##
##    dx, dy : float
##        Sample intervals in the x and y directions.
##
##    Returns
##    -------
##    kx, ky : array_like
##        The wavenumbers coordinate in the kx and ky directions.
##
##    Examples
##    --------
##    >>> ny, nx = grid.shape
##    >>> dy, dx = 0.1, 1  # grid spatial interval in m
##    >>> fourier = np.fft.fft2(grid)
##    >>> kx, ky = wavenumber(nx, ny, dx, dy)
##    >>> kx
##    array([[ 0.  ,  0.01694915,  0.03389831,  ..., -0.05084746, -0.03389831, -0.01694915]
##    [ 0.  ,  0.01694915,  0.03389831,  ..., -0.05084746, -0.03389831, -0.01694915]
##    ...
##    [ 0.  ,  0.01694915,  0.03389831,  ..., -0.05084746, -0.03389831, -0.01694915]
##    [ 0.  ,  0.01694915,  0.03389831,  ..., -0.05084746, -0.03389831, -0.01694915]])
##    >>> ky
##    array([[   0.    ,    0.    ,    0.     ,    ... ,   0.      ,   0.    ,   0.    ]
##    [ 0.02003606,  0.02003606,  0.02003606,  ..., 0.02003606, 0.02003606, 0.02003606]
##    [ 0.04007213,  0.04007213,  0.04007213,  ..., 0.04007213, 0.04007213, 0.04007213]
##    ...
##    [ -0.04007213,  -0.04007213,  -0.04007213,  ..., -0.04007213, -0.04007213, -0.04007213]
##    [ -0.02003606,  -0.02003606,  -0.02003606,  ..., -0.02003606, -0.02003606, -0.02003606]])
##
##    '''
##
##    # x-directed wavenumber
##    kx = np.fft.fftfreq(nx, d=dx)  # x-directed wavenumber vector
##    kx.shape = [-1,nx]  # ensuring line vector
##    kx = np.matlib.repmat(kx, ny, 1)  # x-directed wavenumber matrix
##
##    # y-directed wavenumber
##    ky = np.fft.fftfreq(ny, d=dy)  # y-directed wavenumber vector
##    ky.shape = [ny,-1]  # ensuring column vector
##    ky = np.matlib.repmat(ky, 1, nx)  # y-directed wavenumber matrix
##
##    return 2*np.pi*kx, 2*np.pi*ky


def wavenumber(nx, ny, dx, dy, indx=None, indy=None):
    '''
    Computes the grid wavenumber coordinates.

    Parameters
    ----------

    nx, ny : int
        Dimension of grid in x (col) and y (line) directions.

    dx, dy : float
        Sample intervals in the x and y directions.

    indx, indy : int, optional
        Index in the kx and ky directions.
        If ix or iy are None, the whole matrix is returned.

    Returns
    -------
    kx, ky : array_like
        The wavenumbers coordinate in the kx and ky directions.

    Notes
    -----
    This function is a direct adaptation from the Subroutine B.20.
    "Subroutine to calculate the wavenumber coordinates of elements
    of grids" in (Blakely, 96)[#]_.

    References
    ----------
     .. [#] Blakely R. J. 1996.
         Potential Theory in Gravity and Magnetic Applications.
         Appendix B, p396.
         Cambridge University Press.

    '''

    # Nyquist frequencies in the kx and ky directions
    nyqx = nx/2 + 1
    nyqy = ny/2 + 1

    # Index determination
    if indx is None or indy is None:
        indx = range(nx)
        indy = range(ny)

    # Wavenumbers computation
    #kx = np.empty([len(indy), len(indx)])
    #ky = np.empty([len(indy), len(indx)])
    kx = np.zeros([len(indy), len(indx)])
    ky = np.zeros([len(indy), len(indx)])
    for ix in indx:
        for iy in indy:

            # kx direction
            if ix <= nyqx:
                kx[iy][ix] = float(ix) / ((nx-1)*dx)
            else:
                kx[iy][ix] = float(ix-nx) / ((nx-1)*dx)

            # ky direction
            if iy <= nyqy:
                ky[iy][ix] = float(iy) / ((ny-1)*dy)
            else:
                ky[iy][ix] = float(iy-ny) / ((ny-1)*dy)

    return 2*np.pi*kx, 2*np.pi*ky


[docs]def apodisation2d(val, apodisation_factor): ''' 2D apodisation, to reduce side effects Parameters : :val: 2-Dimension array :apodisation_factor: apodisation factor in percent (0-25) Returns : - apodisation pixels number in x direction - apodisation pixels number in y direction - enlarged array after apodisation ''' array2DTemp = [] array2D = [] if apodisation_factor > 0: # apodisation in the x direction nx = len(val.T[0]) # n is the number of array elements napodx = int(np.around((nx * apodisation_factor)/100)) # napod is the number of array elements to treat if napodx <= 1: # one element at least must be treated napodx = 1 for profile in val.T: array2DTemp.append(_apodisation1d(profile, napodx)) array2DTemp = (np.array(array2DTemp)).T # apodisation in the y direction ny = len(array2DTemp[0]) # n is the number of array elements napody = int(np.around((ny * apodisation_factor)/100)) # napod is the number of array elements to treat if napody <= 1: # one element at least must be treated napody = 1 for profile in array2DTemp: array2D.append(_apodisation1d(profile, napody)) else: # apodisation factor = 0 array2D = val # return napodx, napody, np.array(array2D) return np.array(array2D)
def _apodisation1d(array1D, napod): ''' 1D apodisation, to reduce side effects Parameters : :array1D: 1-Dimension array :napod: apodisation pixels number Returns : 1-Dimension array of len(array1D) + napod elements ''' pi2 = np.pi/2. na = len(array1D) # n is the number of array elements nresult = na + 2*napod array1Dresult = [] for n in range(napod): array1Dresult.append(array1D[n]*np.cos((napod-n)*pi2/napod)) for n in range(na): array1Dresult.append(array1D[n]) for n in range(na-napod, na): # for napod last data array1Dresult.append(array1D[n]*np.cos((n+1-na+napod)*pi2/napod)) return array1Dresult def apodisation2Dreverse(val, valwithapod, napodx, napody): ''' To do the reverse apodisation ''' na = len(val) nb = len(val[0]) for n in range(na): for m in range(nb): val[n][m] = valwithapod[n+napody][m+napodx] #---------------------------------------------------------------------------# # DataSet Basic Math Operations # #---------------------------------------------------------------------------# def stats(dataset, valfilt=False, valmin=None, valmax=None): ''' cf. dataset.py ''' # Statistics on dataset values or Z_images ################################# if valfilt: val = dataset.data.values[:, 2] else: val = dataset.data.z_image # Limiting data range ###################################################### if valmin is None: valmin = np.nanmin(val) if valmax is None: valmax = np.nanmax(val) idx = (val >= valmin) & (val <= valmax) val = val[idx] # Dataset statistics ####################################################### return arraygetstats(val) def multidatasetstats(datasets, valfilt=True, valmin=None, valmax=None): ''' Returns basic statistics for each dataset in the Sequence of DataSet Objects ''' mean, std, median, Q1, Q3, IQR = [], [], [], [], [], [] for dataset in datasets: datasetstats = stats(dataset, valfilt=valfilt, valmin=valmin, valmax=valmax) mean.append(datasetstats[0]) std.append(datasetstats[1]) median.append(datasetstats[2]) Q1.append(datasetstats[3]) Q3.append(datasetstats[4]) IQR.append(datasetstats[5]) return mean, std, median, Q1, Q3, IQR def add_constant(dataset, constant=0, valfilt=True, zimfilt=True): ''' cf. dataset.py ''' # Data values ############################################################## if valfilt: x, y, z = dataset.data.values.T z += constant # Z_image ################################################################## if zimfilt and dataset.data.z_image is not None: dataset.data.z_image += constant dataset.info.z_min += constant dataset.info.z_max += constant return dataset def times_constant(dataset, constant=1, valfilt=True, zimfilt=True): ''' cf. dataset.py ''' # Data values ############################################################## if valfilt: x, y, z = dataset.data.values.T z *= constant # Z_image ################################################################## if zimfilt and dataset.data.z_image is not None: dataset.data.z_image *= constant dataset.info.z_min *= constant dataset.info.z_max *= constant return dataset #---------------------------------------------------------------------------# # DataSet Basic Manipulations # #---------------------------------------------------------------------------# def copy(dataset): ''' cf. dataset.py ''' return deepcopy(dataset) def setmedian(dataset, median=None, profilefilt=False, valfilt=False, setmethod='additive'): ''' cf. dataset.py ''' # No value provided for the median ######################################### if median is None: return dataset # Set each profile's median ################################################ if profilefilt: # Setting data values if valfilt: # ...TBD... pass # Setting data Z_image (if any) elif dataset.data.z_image is not None: zimage = dataset.data.z_image Zfilt = np.empty(zimage.shape) colnum = 0 for col in zimage.T: Zfilt[:, colnum] = arraysetmedian(col, val=median, method=setmethod) colnum += 1 dataset.data.z_image = Zfilt # Set global dataset median ################################################ else: # Setting median for data values x, y, z = dataset.data.values.T z = arraysetmedian(z, val=median, method=setmethod) xyz = np.vstack((x, y, z)) dataset.data.values = xyz.T # Setting median for data Z_image if any if dataset.data.z_image is not None: zimage = dataset.data.z_image zimage = arraysetmedian(zimage, val=median, method=setmethod) dataset.data.z_image = zimage return dataset def setmean(dataset, mean=None, profilefilt=False, valfilt=False, setmethod='additive'): ''' cf. dataset.py ''' # No value provided for the median ######################################### if mean is None: return dataset # Set each profile's mean ################################################## if profilefilt: # Setting data values if valfilt: # ...TBD... pass # Setting data Z_image (if any) elif dataset.data.z_image is not None: zimage = dataset.data.z_image Zfilt = np.empty(zimage.shape) colnum = 0 for col in zimage.T: Zfilt[:, colnum] = arraysetmean(col, val=mean, method=setmethod) colnum += 1 dataset.data.z_image = Zfilt # Set global dataset mean ################################################## else: # Setting mean for data values x, y, z = dataset.data.values.T z = arraysetmean(z, val=mean, method=setmethod) xyz = np.vstack((x, y, z)) dataset.data.values = xyz.T # Setting mean for data Z_image if any if dataset.data.z_image is not None: zimage = dataset.data.z_image zimage = arraysetmean(zimage, val=mean, method=setmethod) dataset.data.z_image = zimage return dataset #---------------------------------------------------------------------------# # DataSet Compatibility checks # #---------------------------------------------------------------------------# def check_georef_compat(dataset_list): ''' Check the coordinates system compatibility of a list of datasets before merging. Prameters --------- dataset_list: sequence of DataSet Objects. ''' active, refsystem, utm_letter, utm_number = [], [], [], [] for dataset in dataset_list: active.append(dataset.georef.active) refsystem.append(dataset.georef.refsystem) utm_letter.append(dataset.georef.utm_zoneletter) utm_number.append(dataset.georef.utm_zonenumber) compat = [isidentical(active), isidentical(refsystem), isidentical(utm_letter), isidentical(utm_number)] return all(compat) def check_gridstep_compat(dataset_list): ''' Check the grid step compatibility of a list of datasets before merging. Prameters --------- dataset_list: tuple or list Sequence of DataSet Objects. ''' dx, dy = [], [] for dataset in dataset_list: dx.append(dataset.info.x_gridding_delta) dy.append(dataset.info.y_gridding_delta) compat = [isidentical(dx), isidentical(dy)] return all(compat) def check_zimage_compat(dataset_list): ''' Check the Z_image compatibility (i.e presence of) of a list of datasets before merging. Prameters --------- dataset_list: tuple or list Sequence of DataSet Objects. ''' iszimage = [] for dataset in dataset_list: iszimage.append(dataset.data.z_image is not None) return isidentical(iszimage) #---------------------------------------------------------------------------# # DataSet Merging Tools # #---------------------------------------------------------------------------# def overlapmatching(datasets, tol=0.1, valfilt=True): ''' ''' # Mismatch symetrical matrix ############################################### ## The mismatch matrix is symetrical, only the upper-triangle is computed n = len(datasets) misma = np.zeros((n, n)) # dataset mismatch matrix triu = np.triu_indices(n, k=1) # index matrix upper-triangle with (diagonal offset of 1) idxi, idxj = triu[0], triu[1] for k in range(len(idxi)): misma[idxi[k], idxj[k]] = dataset_mismatch(datasets[idxi[k]], datasets[idxj[k]], tol=tol, valfilt=True) tril = np.tril_indices(n, k=-1) misma[tril] = -misma[triu] print(misma) def dataset_mismatch(dataset1, dataset2, tol=0.1, valfilt=True): ''' Return the mismatch between overlapping element the two dataset. ''' xyz1, xyz2, dist = dataset_overlap(dataset1, dataset2, tol=tol, valfilt=True) mismatch = arraymismatch(xyz1[:, 2], xyz2[:, 2], weighted=True, discardspurious=True) return mismatch def dataset_overlap(dataset1, dataset2, tol=0.1, valfilt=True): ''' Return overlapping element of the two dataset. ''' if valfilt: xyz1 = dataset1.data.values.T xyz2 = dataset2.data.values.T #...TBD... else: return [], [], [] arr = array1D_getoverlap(xyz1, xyz2, tol=tol) arr1 = arr[:, 0:3] # x,y,z from array 1 arr2 = arr[:, 4:7] # x,y,z from array 2 dist = arr[:, 8] # actual distance between ovelapping points return arr1, arr2, dist def matchedges(datasets, matchmethod='equalmedian', setval=None, valfilt=True, setmethod='additive', meanrange=None, tol=0.1): ''' Match the different datasets egdes (used before datasets merging). Parameters ---------- :datasets: tuple or list Sequence of DataSet Objects. Each DataSet Object must have the same coordinates system. Reference --------- Eder-Hinterleitner A., Neubauer W. and Melichar P., 1996. Restoring magnetic anomalies. Archaeological Prospection, vol.3, no. 4, p185-197. ''' # Ensuring datasets is a list of dataset ################################### datasets = list(datasets) # Median equalization for all the sub-datasets ############################# if matchmethod.lower() == 'equalmedian': # Using the mean of the sub-datasets medians if setval is None: # Basic statistics for all sub-datasets datasets_stat = multidatasetstats(datasets) medians = datasets_stat[2] setvalue = np.mean(medians) # mean of the sub-datasets medians ### ###...TDB... Should we propose the ## Value that gives minimum variation in every subgrid ? ### # Setting all sub-datasets to a common median value for dataset in datasets: setmedian(dataset, median=setval, profilefilt=False, valfilt=False, setmethod=setmethod) # Mean equalization for all the sub-datasets ############################### elif matchmethod.lower() == 'equalmean': # Using the mean of the sub-datasets means if setval is None: # 'Selective mean' ## Mean calculated over a specific range ## (see Eder-Hinterleitner et al., 1996) if meanrange is not None: # Concatenation of all sub-datasets values valglobal = np.array([]).reshape(-1,) if valfilt: for dataset in datasets: val = dataset.data.values[:, 2] valglobal = np.hstack([valglobal, val]) else: for dataset in datasets: val = dataset.data.z_image.reshape(-1,) valglobal = np.vstack([valglobal, val]) # Mid XX percent data range valmin, valmax = arraygetmidXpercentinterval(valglobal, percent=meanrange) datasets_stat = multidatasetstats(datasets, valmin=valmin, valmax=valmax) # Classic mean of all sub-datasets else: datasets_stat = multidatasetstats(datasets) # mean of the sub-datasets means means = np.asarray(datasets_stat[0]) setval = np.mean(means) ### ###...TDB... Should we propose the ## Value that gives minimum variation in every subgrid ? ### # Setting all sub-datasets to a common mean for dataset in datasets: #setmean(dataset, mean=setval, profilefilt=False, valfilt=False, setmethod=setmethod) ##...TDB... ## Is the selective mean compared to the global mean on the global medians ? ## In the latter case, setmedian() should be used for the offeset calculation. setmedian(dataset, median=setval, profilefilt=False, valfilt=False, setmethod=setmethod) # Edge mismatch adjustment ################################################# ## Edge mismatch is computed between each of the sub-dataset and minimized ## following (Haigh J.G.B., 1992). else: pass ###...TBD... ## ??? Separate into ## MergeValues / MergeZimage / MergeHeaderso ???? ### def merge(datasetout, datasets, matchmethod=None, setval=None, setmethod='additive', meanrange=None, commonprofile=False, valfilt=False): ''' Merge datasets together. cf. :meth:`~geophpy.dataset.DataSet.merge` ''' # Filter ungridded values ################################################## ## if valfilt or dataset.data.z_image is None: ## datasets_to_merge = [] ## ## for dataset in datasets: ## datasets_to_merge.append(dataset.copy()) ## ## # Matching dataset edges ## ## # Merging values ## for dataset in datasets_to_merge: ## val = dataset.data.values ## valmerged = np.vstack([valmerged, val]) ## ## #values = dstmp.data.values ## #profiles = genop.arrange_to_profile(values) ## pass # Filter gridded values #################################################### ## elif not (valfilt or dataset.data.z_image is None): ## pass # Checking dataset compatibilty ############################################ compatible = all([check_gridstep_compat(datasets), check_georef_compat(datasets), check_zimage_compat(datasets)]) if not compatible: return iszimage = datasets[0].data.z_image is not None # Z_image presence flag # copying datasets to not alter the original data if matching needed ####### datasets_to_merge = [] for dataset in datasets: datasets_to_merge.append(dataset.copy()) # Matching datasets edges using specific method ############################ if matchmethod is not None: matchedges(datasets_to_merge, matchmethod=matchmethod, setval=setval, setmethod=setmethod, meanrange=meanrange) else: pass # Merging dataset values ################################################### ## So far the values are simply stacked together ## All duplicate point are kept ## ...TDBD... average/supress/other duplicate nc = datasets_to_merge[0].data.values.shape[1] valmerged = np.array([]).reshape(0, nc) # empty array with nc=3 columns for dataset in datasets_to_merge: val = dataset.data.values valmerged = np.vstack([valmerged, val]) datasetout.data.values = valmerged # Merged DataSet Object Initialization ##################################### ## The values from the 1st dataset are used for the parameters ## that are common to all datasets # Info parameters common to all datasets dx = datasets_to_merge[0].info.x_gridding_delta dy = datasets_to_merge[0].info.y_gridding_delta grid_interp = datasets_to_merge[0].info.gridding_interpolation plot_type = datasets_to_merge[0].info.plottype cmap_name = datasets_to_merge[0].info.cmapname datasetout.info.x_gridding_delta = dx datasetout.info.y_gridding_delta = dy datasetout.info.gridding_interpolation = grid_interp datasetout.info.plottype = plot_type datasetout.info.cmapname = cmap_name # Data & GeoRefSystem parameters common to all datasets datasetout.data.fields = datasets_to_merge[0].data.fields datasetout.georef.active = datasets_to_merge[0].georef.active datasetout.georef.active = datasets_to_merge[0].georef.active datasetout.georef.refsystem = datasets_to_merge[0].georef.refsystem datasetout.georef.utm_zoneletter = datasets_to_merge[0].georef.utm_zoneletter datasetout.georef.utm_zonenumber = datasets_to_merge[0].georef.utm_zonenumber # Info & GeoRefSystem parameters different for each dataset xmin, xmax, ymin, ymax, zmin, zmax = [], [], [], [], [], [] points_list = [] # Merged DataSet Object spatial limits ##################################### # Retreiving value for data limits for dataset in datasets_to_merge: xmin.append(dataset.info.x_min) xmax.append(dataset.info.x_max) ymin.append(dataset.info.y_min) ymax.append(dataset.info.y_max) zmin.append(dataset.info.z_min) zmax.append(dataset.info.z_max) points_list.append(dataset.georef.points_list) # Z_image is present if iszimage: xmin = min(xmin) xmax = max(xmax) ymin = min(ymin) ymax = max(ymax) zmin = min(zmin) zmax = max(zmax) # No Z_image, i.e. xmin = None etc. ## value are kept to None else: pass datasetout.georef.points_list = points_list datasetout.info.x_min = xmin datasetout.info.x_max = xmax datasetout.info.y_min = ymin datasetout.info.y_max = ymax datasetout.info.z_min = zmin datasetout.info.z_max = zmax # No Z_image in the datasets, merge is done ! ############################## if not iszimage: return ## # Merging dataset Info etc. ## # Different ## ## Data: ## easting_image = None # easting array ## northing_image = None # northing array # Merging dataset Z_images ################################################# ## So far, if several data points fall into the same pixel, ## they are averaged. ## ...TBD... possibility to keep, min max or ??? if overlaping point # Regular grid for merged dataset nx = np.around((xmax - xmin)/dx) + 1 ny = np.around((ymax - ymin)/dy) + 1 x = np.linspace(xmin, xmax, nx) y = np.linspace(ymin, ymax, ny) X, Y = np.meshgrid(x, y) # Initialization of the Merged grid Z = X * 0. P = Z.copy() # number of data points in the pixel initialization for dataset in datasets_to_merge: # Current grid Xi, Yi = dataset.get_xygrid() Zi = dataset.data.z_image nl, nc = Zi.shape for i in range(nl): for j in range(nc): # Current point coordinates xi = Xi[i][j] yi = Yi[i][j] zi = Zi[i][j] # Index of the current point in the merged grid indx = np.where(x + dx/2. > xi) indy = np.where(y + dy/2. > yi) # Filling merged grid Z[indy[0][0], indx[0][0]] += zi P[indy[0][0], indx[0][0]] += 1 # Averaging data points in the pixel initialization Z = Z/P datasetout.data.z_image = Z ### ## # in the future, MOVE TO operation.spatial ## ### #---------------------------------------------------------------------------# # DataSet Profiles' detection Tools # #---------------------------------------------------------------------------# def arrange_to_profile(values, constant=None): ''' Re-arrange a list of points by profile based on constant coordinates. Only x, y values are used to re-arrange the data into profiles. Any additionnal values will be kept and managed properly. Parameters ---------- values : 1-D array-like Array of points coordinates (and extra informations) >>> values = [[x1, y1, ...], [x2, y2, ...], ..., [xn, yn, ...]] constant : {None, 'x','y'} Profile's constant coordinates. If None, the constant coordinated id determined from the data as the coordinates with the less changes If 'x', the profile is parallel to the y-direction. If 'y', the profile is parallel to the x-direction. Return ------ prof : 1-D array-like Array of x or y-constant profiles. >>> prof = [[ [x1, y1,...], [x2, y2,...] ], # profile 1 [ [x21, y21,...], [x22, y22,...] ], # profile 2 [ [...], ..., [xn, yn, ...] ] # last profile ] ''' if constant is None: constant = estimate_constant_coordinate(values) profile_chooser = {'x' : arrange_to_xprofile, 'y' : arrange_to_yprofile} profiles = profile_chooser[constant](values) return profiles def arrange_to_xprofile(values): ''' Re-arrange a list of values by profile based on constant x-coordinate. Only x values are used to re-arrange the data into profiles. Any additionnal values will be kept and managed properly. Parameters ---------- values : 1-D array-like List of points coordinates (and extra informations) >>> values = [[x1, y1, ...], [x2, y2, ...], ..., [xn, yn, ...]] Return ------ profiles : 1-D array-like Array of x-constant profiles. >>> profiles = [[ [x1, y1,...], [x2, y2,...] ], # profile 1 [ [x21, y21,...], [x22, y22,...] ], # profile 2 [ [...], ..., [xn, yn, ...] ] # last profile ] ''' # Input as a list val = [list(point) for point in values] # Adding a fake point as last point marker fake_pts = [-999 for _ in range(len(val[0]))] # [-999, ..., -999] val.append(fake_pts) # Rearranging into a profile list npts, profiles, profile_points = [], [], [] xinit = val[0][0] for point in val: x, y = point[0:2] # current point coordinates # Add point to the current profile point list if x == xinit: profile_points.append(point) # coordinates + extra values # or create new profile elif x != xinit: # End of survey, storing last profile if x == -999: #profiles.append(point_list[:-1]) profiles.append(profile_points) npts.append(len(profile_points)) # New profile else: # Storing previous profile profiles.append(profile_points) npts.append(len(profile_points)) # Creating a new profile profile_points = [] # new empty profile xinit = x # new initial point profile_points.append(point) # adding current point to the new profile return profiles # np.asarray(profiles) def arrange_to_yprofile(values): ''' Re-arrange points by profile based on constant y-coordinate. Only y values are used to re-arrange the data into profiles. Any additional values will be kept and managed properly. Parameters ---------- values : 1-D array-like Array of points coordinates (and extra informations) >>> values = [[x1, y1, ...], [x2, y2, ...], ..., [xn, yn, ...]] Return ------ profiles : 1-D array-like Array of y-constant profiles. >>> profiles = [[ [x1, y1,...], [x2, y2,...] ], # profile 1 [ [x21, y21,...], [x22, y22,...] ], # profile 2 [ [...], ..., [xn, yn, ...] ] # last profile ] ''' # Input as a list val = [list(point) for point in values] # Adding a fake point as last point marker fake_pts = [-999 for _ in range(len(val[0]))] # [-999, ..., -999] val.append(fake_pts) # Rearranging into a profile list npts, profiles, profile_points = [], [], [] yinit = val[0][1] for point in val: x, y = point[0:2] # current point # Add point to the current profile point list if y == yinit: profile_points.append(point) # coordinates + extra values # or create new profile elif y != yinit: # End of survey, storing last profile if y == -999: #profiles.append(point_list[:-1]) profiles.append(profile_points) npts.append(len(profile_points)) # New profile else: # Storing previous profile profiles.append(profile_points) npts.append(len(profile_points)) # Creating a new profile profile_points = [] # new empty profile yinit = y # new initial point profile_points.append(point) # adding current point to the new profile return profiles # np.asarray(profiles) def arrange_to_profile_from_track(values, track): ''' Re-arrange points by profile based on track number. Parameters ---------- values : 1-D array-like Array of points coordinates (and extra informations) >>> values = [[x1, y1, ...], [x2, y2, ...], ..., [xn, yn, ...]] track : 1-D array-like Array of track number for each data point. >>> values = [1, 1, 1, ..., 2, 2, 2, ... n, n, n] Return ------ profiles : 1-D array-like Array of y-constant profiles. >>> profiles = [[ [x1, y1,...], [x2, y2,...] ], # profile 1 [ [x21, y21,...], [x22, y22,...] ], # profile 2 [ [...], ..., [xn, yn, ...] ] # last profile ] ''' track = np.asarray(track) values = np.asarray(values) profiles = [] for num in np.unique(track): idx = np.where(track==num) profiles.append(values[idx]) return profiles # np.asarray(profiles) def estimate_constant_coordinate(values): ''' Estimate the constant coordinated of a list of points based on the coordinates with the less unique values. Parameters ---------- values : 1-D array-like List of points coordinates (and extra informations) >>> values = [[x1, y1, ...], [x2, y2, ...], ..., [xn, yn, ...]] Return ------ constant : {'x','y'} Estimated profile's constant coordinates. ''' constant = ['x', 'y'] xlist = np.unique(values.T[0]) ylist = np.unique(values.T[1]) idx = np.argmin([xlist.size, ylist.size]) return constant[idx] ### ## # in the future, MOVE TO operation.spatial ## ### #---------------------------------------------------------------------------# # DataSet Basic Affine Transformations # #---------------------------------------------------------------------------# def translate(dataset, shiftx=0, shifty=0): ''' Dataset translation. cf. :meth:`~geophpy.dataset.DataSet.translate` The z value here is the actual dataset values and not the elevation. It is kept in the transformation (in place of the elevation) for convenience but unchanged by the transformations. ''' # Data values translation ################################################# xyz = dataset.data.values.T vect = np.stack((shiftx, shifty, 0)) xyz = array1D_translate(xyz, vect) ### in the future us spatial.array_translate dataset.data.values = xyz.T # Updating dataset.info (if any) ########################################## if dataset.data.z_image is not None: dataset.info.x_min += shiftx dataset.info.x_max += shiftx dataset.info.y_min += shifty dataset.info.y_max += shifty return dataset def get_rotation_angle_list(): ''' cf. dataset.py ''' return rotation_angle_list def rotate(dataset, angle=0, center=None): ''' Dataset rotation. cf. :meth:`~geophpy.dataset.DataSet.rotate` The z value here is the actual dataset values and not the elevation. It is kept in the transformation (in place of the elevation) for convenience but unchanged by the transformations. ''' # Authorized rotation angle ############################################### angle = np.mod(angle, 360) # positive angle (-90->270) if angle not in [0, 90, 180, 270]: return dataset # Data values rotation #################################################### xyz = dataset.data.values.T xyz = array1D_rotate(xyz, angle=angle, center=center) ### in the future us spatial.array_translate dataset.data.values = xyz.T # Data zimage rotation #################################################### if dataset.data.z_image is not None: # zimage rotation angleClockWise = np.mod(angle, 360) k = angleClockWise//90 # number of 90° rotation (return int) dataset.data.z_image = np.rot90(dataset.data.z_image, k) ### ??? in the future use scipy.ndimage.rotate ??? # updating dataset info (xmi, ymin, ...) xy = dataset.get_gridcorners() xy = array1D_rotate(xy, angle=angle, center=center) xmin, ymin = xy.min(axis=1) xmax, ymax = xy.max(axis=1) dataset.info.x_min = xmin dataset.info.x_max = xmax dataset.info.y_min = ymin dataset.info.y_max = ymax x, y = dataset.get_xyvect() # dx = np.median(np.diff(x)) # dy = np.median(np.diff(y)) # x_gridding_delta = dx # y_gridding_delta = dy return dataset