Source code for pyLPM.gslib

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
from ipywidgets import GridspecLayout
import ipywidgets as widgets
from IPython.display import display
from IPython.display import clear_output
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.offline as pyo
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from matplotlib import colors,ticker,cm 
import matplotlib

import subprocess
import numpy as np
import pandas as pd
from pyLPM import plots
import pkg_resources
import os
import re
import math

#############################################################################################################

#creating a global tem variable
global temp_dir_str
temp_dir_str = os.getcwd() + '\\pyLPM_data\\'
temp_dir_str = temp_dir_str.replace('\\','/')

#creating dir
if not os.path.exists(temp_dir_str): 
		os.mkdir(temp_dir_str)

#defining gslib90 folder inside package folder
global DATA_PATH
DATA_PATH = pkg_resources.resource_filename('pyLPM', 'gslib90/')

def call_program(program, parfile, usewine=False):
		"""Run a GSLib program
		
		Args:
		    program (str): gslib program path
		    parfile (str): parameter file file path
		    usewine (bool, optional): use wine flag. Defaults to False.
		"""
		if usewine == True:
				p = subprocess.Popen(['wine', program, parfile], stdout=subprocess.PIPE)
		else:
				 p = subprocess.Popen([program, parfile], stdout=subprocess.PIPE)

		for line in p.stdout:    
				print(line.decode('utf-8'), end='')

def write_GeoEAS(df,dh,x,y,z,vars=[]):
		"""Write GeoEAS file from a DataFrame
		
		Args:
		    df (DataFrame): data DataFrame
		    dh (str): dh column name
		    x (str): x column name
		    y (str): y column name
		    z (str): z column name
		    vars (list, optional): list of string with variables names. Defaults to [].
		"""

		df.replace(float('nan'),-999,inplace=True)
		columns = []
		if dh != None:
				columns.append(dh)
		columns.append(x)
		columns.append(y)
		if z != None:
				columns.append(z)
		for var in vars:
				columns.append(var)
		data = ""
		data= data +'tmp_pygslib_file\n'
		data= data + str(len(columns))+'\n'
		for col in columns:
				data=data + col+'\n'
		values = df[columns].to_string(header=False, index=False)
		data= data + values+'\n'
		f = open(temp_dir_str+'tmp.dat', 'w')
		f.write(data)
		f.close()

def read_GeoEAS(file):
		"""Read GeoEAS file into a DataFrame
		
		Args:
		    file (str): file path
		
		Returns:
		    DataFrame: DataFrame with data
		"""
		f = open(file, 'r')
		col_names = []
		results = []
		
		for index, line in enumerate(f):
			if index == 0:
				continue
			elif index == 1:
				n_cols = int(line.split()[0])
			elif index <= n_cols+1:
				col_names.append(line[:-1])
			else:
				values = [float(i) for i in line.split()]
				results.append(values)

		f.close()

		df = pd.DataFrame(results, columns = col_names)
		df.replace(-999, np.nan, inplace=True)

		return df
		#return pd.DataFrame(results, columns = col_names)

def col_number(file, col):
		"""Returns the column number from ist name on a GeoEAS file
		
		Args:
		    file (str): file path
		    col (str): column name
		
		Returns:
		    int: column number
		"""
		f = open(file, 'r')
		col_names = []
		
		for index, line in enumerate(f):
			if index == 0:
				continue
			elif index == 1:
				n_cols = int(line)
			elif index <= n_cols+1:
				col_names.append(line[:-1])

		return col_names.index(col) + 1 if col is not None else 0

def write_varg_str(varg):
		"""Summary
		
		Args:
		    varg (TYPE): Description
		
		Returns:
		    TYPE: Description
		"""
		varg_str = '{} {} \n'.format(varg['number_of_structures'], varg['nugget'])
		
		for struct in range(varg['number_of_structures']):
				
				if varg['models'][struct] is 'Spherical':
								it = 1
				elif varg['models'][struct] is 'Exponetial':
								it = 2
				elif varg['models'][struct] is 'Gaussian':
								it = 3
				
				new_line = '{} {} {} {} {} \n {} {} {} \n'.format(it, varg['contribution'][struct], varg['rotation_reference'][0], varg['rotation_reference'][1], varg['rotation_reference'][2], varg['ranges'][struct][0], varg['ranges'][struct][1], varg['ranges'][struct][2])

				varg_str = varg_str + new_line

		return varg_str

#############################################################################################################

[docs]def declus(df, x, y, z, var, tmin=-1.0e21, tmax=1.0e21, x_anis=1, z_anis=1, n_cell=10, min_size=1, max_size=20, keep_min = True, number_offsets=4, usewine=False):
		"""cell declustering algortihm. This function shows the declustering reults summary and writes weights to the DataFrame.
		
		Args:
		    df (DataFrame): points data DataFrame
		    x (str): x coordinates column name
		    y (str): y coordinates column name
		    z (str): z coordinates column name
		    var (str): variable column name
		    tmin (float, optional): minimum triming limit. Defaults to -1.0e21.
		    tmax (float, optional): maximum triming limit. Defaults to 1.0e21.
		    x_anis (float, optional): the anisotropy factors to consider rectangular cells. The cell size in the x direction is multiplied by these factors to get the cell size in the y and z directions, e.g., if a cell size of 10 is being considered and anisy2 and anisz3 then the cell size in the y direction is 20 and the cell size in the z direction is 30.. Defaults to 1.
		    z_anis (float, optional): anisotropy factor. Defaults to 1.
		    n_cell (int, optional): number of cells. Defaults to 10.
		    min_size (float, optional): minimum size. Defaults to 1.
		    max_size (float, optional): maximum size. Defaults to 20.
		    keep_min (bool, optional): an boolean flag that specifies whether a minimum mean value (True) or maximum mean value (False) is being looked for. Defaults to True.
		    number_offsets (int, optional): the number of origin offsets. Each of the ncell cell sizes are considered with noff different original starting points. This avoids erratic results caused by extreme values falling into specific cells. A good number is 4 in 2-D and 8 in 3-D. A short description of the program. Defaults to 4.
		    usewine (bool, optional): use wine flag. Defaults to False.
		"""

		write_GeoEAS(df=df,dh=None,x=x,y=y,z=z,vars=[var])
		
		decluspar = '''
										Parameters for CELLDECLUS
																	Parameters for DECLUS
									*********************

START OF PARAMETERS:
{datafile}         -file with data
{x}   {y}   {z}   {var}               -  columns for X, Y, Z, and variable
{tmin}     {tmax}          -  trimming limits
{sum}                  -file for summary output
{out}                  -file for output with data & weights
{xanis}   {zanis}                   -Y and Z cell anisotropy (Ysize=size*Yanis)
{kmin}                           -0=look for minimum declustered mean (1=max)
{ncell}  {min}  {max}               -number of cell sizes, min size, max size
{noff}                           -number of origin offsets
'''
		map_dict = {
				'datafile':temp_dir_str+'tmp.dat',
				'x':col_number(temp_dir_str+'tmp.dat', x),
				'y':col_number(temp_dir_str+'tmp.dat', y),
				'z':col_number(temp_dir_str+'tmp.dat', z),
				'var':col_number(temp_dir_str+'tmp.dat', var),
				'tmin':str(tmin),
				'tmax':str(tmax),
				'sum':temp_dir_str+'tmpsum.dat',
				'out':temp_dir_str+'tmpfile.dat',
				'xanis':str(x_anis),
				'zanis':str(z_anis),
				'ncell':str(n_cell),
				'min':str(min_size),
				'max':str(max_size),
				'kmin': -1 if keep_min == True else 0,
				'noff':str(number_offsets)
		}

		formatted_str = decluspar.format(**map_dict)
		parfile = temp_dir_str+'partmp.par'
		f = open(parfile, 'w')
		f.write(formatted_str)
		f.close()
		program = DATA_PATH+"declus.exe"

		call_program(program, parfile, usewine)

		df1 = read_GeoEAS(temp_dir_str+'tmpsum.dat')
		plots.cell_declus_sum(df1['Cell Size'],df1['Declustered Mean'])
		df2 = read_GeoEAS(temp_dir_str+'tmpfile.dat')
		df['Declustering Weight'] = df2['Declustering Weight']

[docs]def kt3d(df, dh, x, y, z, var, grid, variogram, min_samples, max_samples, max_oct, search_radius, search_ang = [0,0,0], discretization = [5,5,1], krig_type='OK', sk_mean = 0, tmin=-1.0e21, tmax=1.0e21, option='grid', debug_level=0, usewine=False):
    """Kriging algorithm. This function will show cross validation results if ``option = 'cross'`` or ``option = 'jackknife'`` or it will return estimated values and variace arrays if ``option = 'grid'``. 
    
    Args:
        df (DataFrame): points data DataFrame
        x (str): x coordinates column name
        y (str): y coordinates column name
        z (str): z coordinates column name
        var (str): variable column name
        grid (dict): grid definitions dictionary
        variogram (dict): variogram dictionary
        min_samples (int): minimum samples
        max_samples (int): maximum number of samples
        max_oct (int): maximum number of samples per octant
        search_radius (list): range1, ramge2, range3 values list
        search_ang (list, optional): azimuth, dip, rake values list. Defaults to [0,0,0].
        discretization (list, optional): block discretization. Defaults to [5,5,1].
        krig_type (str, optional): 'SK' or 'OK' flag. Defaults to 'OK'.
        sk_mean (float, optional): simple kriging mean. Defaults to 0.
        tmin (float, optional): minimum trimming limit. Defaults to -1.0e21.
        tmax (float, optional): maximum trimming limit. Defaults to 1.0e21.
        option (str, optional): cross validation 'cross', jackknife 'jackknife' or estimation 'grid' flag . Defaults to 'grid'.
        debug_level (int, optional): debug level. Defaults to 0. If 2 plots the negative weights histogram.
        usewine (bool, optional): use wine flag. Defaults to False.
    """

    write_GeoEAS(df=df,dh=dh,x=x,y=y,z=z,vars=[var])



    kt3dpar = '''
											Parameters for KT3D
									*******************

START OF PARAMETERS:
{datafile}              -file with data
{dh}  {x}  {y}  {z}  {var}  0                 -   columns for DH,X,Y,Z,var,sec var
{tmin}   {tmax}                 -   trimming limits
{option}                                -option: 0=grid, 1=cross, 2=jackknife
{datafile}                          -file with jackknife data
{x}   {y}   {z}    {var}    0              -   columns for X,Y,Z,vr and sec var
{debug}                                -debugging level: 0,1,2,3
{debugout}                         -file for debugging output
{kt3dout}                         -file for kriged output
{nx}   {ox}    {sx}                  -nx,xmn,xsiz
{ny}   {oy}    {sy}                  -ny,ymn,ysiz
{nz}    {oz}    {sz}                  -nz,zmn,zsiz
{dx}    {dy}      {dz}                    -x,y and z block discretization
{min}    {max}                           -min, max data for kriging
{max_oct}                                -max per octant (0-> not used)
{r1}  {r2}  {r3}                 -maximum search radii
 {a1}   {a2}   {a3}                 -angles for search ellipsoid
{krig_type}     {mean}                      -0=SK,1=OK,2=non-st SK,3=exdrift
0 0 0 0 0 0 0 0 0                -drift: x,y,z,xx,yy,zz,xy,xz,zy
0                                -0, variable; 1, estimate trend
extdrift.dat                     -gridded file with drift/mean
4                                -  column number in gridded file
{varg}'''

    map_dict = {
				'datafile':temp_dir_str+'tmp.dat',
				'dh': col_number(temp_dir_str+'tmp.dat', dh),
				'x': col_number(temp_dir_str+'tmp.dat', x),
				'y': col_number(temp_dir_str+'tmp.dat', y),
				'z':col_number(temp_dir_str+'tmp.dat', z),
				'var':col_number(temp_dir_str+'tmp.dat', var),
				'tmin':str(tmin),
				'tmax':str(tmax),
				'option': 0 if option is 'grid' else 1 if option is 'cross' else 2,
				'debug':debug_level,
				'debugout':temp_dir_str+'debug.out',
				'kt3dout':temp_dir_str+'output.out',
				'nx':grid['nx'],
				'ny':grid['ny'],
				'nz':grid['nz'],
				'ox':grid['ox'],
				'oy':grid['oy'],
				'oz':grid['oz'],
				'sx':grid['sx'],
				'sy':grid['sy'],
				'sz':grid['sz'],
				'dx':discretization[0],
				'dy':discretization[1],
				'dz':discretization[2],
				'min':min_samples,
				'max':max_samples,
				'max_oct':max_oct,
				'r1':search_radius[0],
				'r2':search_radius[1],
				'r3':search_radius[2],
				'a1':search_ang[0],
				'a2':search_ang[1],
				'a3':search_ang[2],
				'krig_type':0 if krig_type is 'SK' else 0,
				'mean':sk_mean,
				'varg':write_varg_str(variogram)
		}

    formatted_str = kt3dpar.format(**map_dict)
    parfile = temp_dir_str+'partmp.par'
    f = open(parfile, 'w')
    f.write(formatted_str)
    f.close()
    program = DATA_PATH+"kt3d.exe"

    call_program(program, parfile, usewine)

    if debug_level == 2:
    
        f = open(temp_dir_str+'debug.out', 'r')
        lines=f.readlines()

        begin = []
        end = []
        for idx, line in enumerate(lines):
            if 'BLOCK EST: x,y,z,vr,wt' in line:
                idxi = idx + 1
                begin.append(idxi)
                #print(idxi, lines[idxi])
            if 'estimate' in line:
                idxf = idx -1
                end.append(idxf)
                #print(idxf, lines[idxf])

        wts = []
        for b,e in zip(begin, end):
            slicing = lines[b:e+1]
            for i in slicing:
                wts.append(float(i.split()[4]))

        wts = np.array(wts)
        mask_negative = wts < 0
        neg_wts = wts[mask_negative]
        total = len(wts)

        plots.histogram(data=neg_wts,title='Negative weights of total {}'.format(total), n_bins=8)

    if option is 'grid':

        df1 = read_GeoEAS(temp_dir_str+'output.out')
        return df1['Estimate'], df1['EstimationVariance']

    if option is 'cross' or option is 'jackknife':

        df1 = read_GeoEAS(temp_dir_str+'output.out')
        real, estimate, error = df1['True'], df1['Estimate'], df1['Error: est-true']
        mask = np.isfinite(estimate)
        plots.xval(real[mask], estimate[mask], error[mask], pointsize=8, figsize=(500,900))


##########################################################################################################################################################3


[docs]def gamv(df,  x='x', y='y', z='z', var_h ='V', var_tail = 'V',  nlags:int =10, lagdist=10, lagtol= 5, 
	ndirections=3, azm = [0,0,0], atol = [22,22,22], bandh =[5,5,5],
	dip = [0,0,0], dtol =[22,22,22], bandv = [5,5,5], standardize = 0, 
	variogram_type = 1, tmin =  -999, tmax = 999, usewine = False):
	
	"""Calculate experimental functions using gamv 
	
	Args:
	    df (pandas.DataFrame): DataFrame containing all spatial information
	    x (str): Label for the X coordinates 
	    y (str, optional): Label for the Y coordinates 
	    z (str, optional): Label for the Z coordinates 
	    var_h (str, optional): Label for the variable at the head of the distance vector
	    var_tail (str, optional): Label for the variable at the tail of the distance vector
	    nlags (int, optional): Number of lags for the experimental variogram
	    lagdist (int, optional): Size of the lag for the experimental variogram
	    lagtol (int, optional): Linear tolerance for the experimental variogram 
	    ndirections (int, optional): Number of directions to calculate
	    azm (list, optional): List of azimuth angles in degrees
	    atol (list, optional): List of horizontal angular tolerances in degrees
	    bandh (list, optional): List of horizontal band width 
	    dip (list, optional): List of dip angles in degrees 
	    dtol (list, optional): List of vertical angular tolerances in degrees
	    bandv (list, optional): List of vertical band width 
	    standardize (int, optional): Standardize variables 0- No, 1- Yes 
	    variogram_type (int, optional): Variogram experimental function. 1 = traditional semivariogram, 2 = traditional cross semivariogram, 3 = covariance,  4 = correlogram, 5 = general relative semivariogram,   6 = pairwise relative semivariogram, 7 = semivariogram of logarithms, 8 = semimadogram, 9 = indicator semivariogram - continuous, 10= indicator semivariogram - categorical
	    tmin (TYPE, optional): Minimum trimming limits 
	    tmax (int, optional): Maximum trimming limits 
	    usewine (bool, optional): Option to use wine for Linux users
	"""

	# Write gslib file on directory 

	write_GeoEAS(df=df,dh=None,x=x,y=y,z=z,vars=[var_h, var_tail])

	# Define the variogram directions strings for the file parameter

	directions = ""
	for i in range(ndirections):
		if i == ndirections - 1:
			directions += "{} {} {} {} {} {} -azm,atol,bandh,dip,dtol,bandv".format(str(azm[i]), str(atol[i]), str(bandh[i]), str(dip[i]), str(dtol[i]), str(bandv[i]))
		else:
			directions += "{} {} {} {} {} {} -azm,atol,bandh,dip,dtol,bandv \n".format(str(azm[i]), str(atol[i]), str(bandh[i]), str(dip[i]), str(dtol[i]), str(bandv[i]))

	# Define the file parameter 

	gamvpar = '''
								Parameters for GAMV
									*******************

START OF PARAMETERS:
{datafile}               -file with data
{x}   {y}   {z}                         -   columns for X, Y, Z coordinates
2   {var_h}   {var_tail}                        -   number of variables,col numbers
{tmin}     {tmax}               -   trimming limits
{out}                           -file for variogram output
{nlags}                                -number of lags
{lagdist}                                -lag separation distance
{lagtol}                              -lag tolerance
{ndirections}                                 -number of directions
{directions}  
{standardize}                                 -standardize sills? (0=no, 1=yes)
1                                 -number of variograms
1   2   {variogram_type}                         -tail var., head var., variogram type
'''
	string_c = temp_dir_str.replace("\\", "/")
	map_dict = {'datafile':string_c+'tmp.dat',
				'x': col_number(string_c +'tmp.dat', x),
				'y': col_number(string_c +'tmp.dat', y),
				'z':col_number(string_c +'tmp.dat', z),
				'var_h':col_number(string_c +'tmp.dat', var_h),
				'var_t':col_number(string_c +'tmp.dat', var_tail),
				'out': string_c +'output.out',
				'tmin':str(tmin),
				'tmax':str(tmax),
				'nlags':nlags,
				'lagdist':lagdist,
				'lagtol' : lagtol,
				'ndirections': ndirections,
				'directions': directions,
				'standardize': standardize,
				'var_h':col_number(string_c +'tmp.dat',var_h),
				'var_tail':col_number(string_c +'tmp.dat', var_tail),
				'variogram_type': variogram_type}


	# Write the gslib par file 

	formatted_str = gamvpar.format(**map_dict)
	parfile = temp_dir_str+'gamv.par'
	f = open(parfile, 'w')
	f.write(formatted_str)
	f.close()

	# Run the gamv executable 

	program = DATA_PATH+"gamv.exe"
	call_program(program, parfile, usewine)

	# Open and read experimental variogram output 

	myfile = open(temp_dir_str+'output.out', 'r')
	data=myfile.readlines()
	count = 0
	dfs =[]

	distance = []
	number_of_pairs =[]
	variogram = [] 
	number = []
	count = 0 
	list_of_headers = ['Semivariogram', 'Cross', 'Covariance','Correlogram', 
	'Relative', 'General', 'Pairwise', 'Variogram', 'Semimadogram', 'Indicator' ]
	for i in range(len(data)):
		splited = data[i].split()
		if (splited[0] not in list_of_headers):
			distance.append(float(splited[1]))
			number_of_pairs.append(int(splited[3]))
			variogram.append(float(splited[2]))
			number.append(int(splited[0]))
		if (splited[0] in list_of_headers and i > 0) or (i == (len(data) -1)):
			df = pd.DataFrame(np.array([distance, variogram, number_of_pairs]).T, columns =['Average distance', 'Spatial continuity', 'Number of pairs'])
			dfs.append(df)
			number  =[] 
			distance = []
			number_of_pairs =[]
			variogram = [] 

	print(dfs)
	

	# If cross experimental variograms, only use the cross variograms 

	# Plot the experimental variogram functions 

	plots.plot_experimental_variogram(dfs, azm, dip)

	# Define the returning dictionary containing the experimental values 
	returning = {'Directions': [azm, dip],
				 'Values' : dfs}

	return returning 

##########################################################################################################################################################