#!/usr/bin/gawk -f
###########################################################################
# This file is part of meadTools, version 2.0.
#
# Copyright (c) 2001-2019, Instituto de Tecnologia Quimica e Biologica,
# Universidade Nova de Lisboa, Portugal.
#
# meadTools is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 2 of the License, or (at your
# option) any later version.
#
# meadTools is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with meadTools.  If not, see <http://www.gnu.org/licenses/>.
#
# For further details and info check the README file.
#
# You can get meadTools at www.itqb.unl.pt/simulation
###########################################################################

############################################################################
# addHtaut: a program to add protons for tautomeric calculations.
#
# The program presently recognizes 4 different chemical groups:
# carboxyl, phenyl, alcohol/thiol, and waters.  New site types
# containing these chemical groups can be easily added by defining the
# appropriate atoms within the function define_sites().  Addition of
# other chemical groups will require writing the corresponding
# process_*() function that adds the tautomeric protons.
############################################################################


BEGIN{
  cmd = "addHtaut" ;
  usage = "Usage: "cmd" PQR_FILE FF SITES_FILE" ;
  if (ARGC != 4) error("Wrong number of arguments.\n" usage) ;
  filecheck(pqr_file = ARGV[1]) ;
  pi = atan2(0,-1) ;

  #while (getline < sites_file)
  #{
  #  if ($2 ~ "^CT") cterm[$1] = 1 ;
  #  else if ($2 ~ "^NT") nterm[$1] = 1 ;    # nterm is not really used...
  #  else tit[$1] = substr($2,1,3) ;  # this may not be totally general...
  #}

  forcefield = ARGV[2]

  tit_input = ARGV[3]
  split(tit_input,tit_array,",")
  for (i in tit_array)
  {
      split(tit_array[i],residue,"--")
      resnumb=residue[1]
      resname=residue[2]
      chain=residue[3]
      if (residue[2] != "CTR") {
        tit[chain][resnumb] = resname
      } else {
	      cterm[chain][resnumb] = 1
      }
  }



  define_sites() ;
  resnumb = "" ;
  while (getline < pqr_file)
  {
    nline++ ;
    if (nline != 1 && int(substr($0,23,4)) != resnumb || substr($0,22,1) != chain) process_residue() ;
    # if (NF != 11) warning("Wrong number of fields ("NF") in pqr line "nline) ;
    resname = $4 ;
    resnumb = int(substr($0,23,4)) ;
    chain = substr($0,22,1) ;
    atm = $3 ;

    r[atm,1] = substr($0,31,8);
    r[atm,2] = substr($0,39,8);
    r[atm,3] = substr($0,47,8);

    line[atm] = $0 ;

    print_pqr_line($2, atm, resname, chain, resnumb, r[atm,1], r[atm,2], r[atm,3], substr($0,55,8), substr($0,63,8)) ;

    # the next 2nd condition may not be totally general...
    if (tit[chain][resnumb] != "" && tit[chain][resnumb] != substr(resname,1,3))
    {
      warning("tit[chain][resnumb] = "tit[chain][resnumb]) ;
      error(resname " " resnumb " : name in .pqr and .sites do not match!\n") ;
    }
  }
  process_residue() ;

}

function process_residue()
{
  if (tit[chain][resnumb] ~ carboxyl) process_carboxyl(resname) ;
  if (cterm[chain][resnumb] == 1) process_carboxyl("CTR") ;
  if (tit[chain][resnumb] ~ phenyl) process_phenyl(resname) ;
  if (tit[chain][resnumb] ~ alcohol) process_alcohol(resname) ;
  if (tit[chain][resnumb] ~ water) process_water(resname) ;
  delete r ;
  delete line ;
}

function process_carboxyl(stype)
{
  # Some parameters that depend on the force field:
  # (These are for G43a1; if necessary can be made stype-dependent):
  angCOH = 109.5 * pi / 180 ;
  distOH = 1.000 ;
  chargeH = 0.398 ;  # "Aesthetic", does not affect MEAD calculations.
  radiusH = 0.000 ;

  # Check if site is OK:
  if(line[h1[stype]] != "" || line[h2[stype]] != "")
    error("Site "stype" "resnumb" has proton(s) already.") ;
  if(line[o1[stype]] == "" || line[o2[stype]] == "" || line[c[stype]] == "")
    error("Site "stype" "resnumb" has missing atoms.") ;
  # Compute bond vectors:
  for (k = 1 ; k <= 3 ; k++) rCO1[k] = r[o1[stype],k] - r[c[stype],k] ;
  for (k = 1 ; k <= 3 ; k++) rCO2[k] = r[o2[stype],k] - r[c[stype],k] ;
  # Compute and write h1:
  cross(rCO2,rCO1,rb) ;
  for (k = 1 ; k <= 3 ; k++)
    ra[k] = -cos(angCOH) * distOH * rCO1[k] / norm(rCO1) ;
  cross(rCO1,rb,rc) ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = sin(angCOH) * distOH * rc[k] / norm(rc) ;
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o1[stype],k] + ra[k] + ru[k] ;
  print_pqr_line(0, h1[stype], resname, chain, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
  # Compute and write h3 ("back" proton):
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o1[stype],k] + ra[k] - ru[k] ;
  print_pqr_line(0, h3[stype], resname, chain, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
  # Compute and write h2:
  cross(rCO1,rCO2,rb) ;
  for (k = 1 ; k <= 3 ; k++)
    ra[k] = -cos(angCOH) * distOH * rCO2[k] / norm(rCO2) ;
  cross(rCO2,rb,rc) ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = sin(angCOH) * distOH * rc[k] / norm(rc) ;
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o2[stype],k] + ra[k] + ru[k] ;
  print_pqr_line(0, h2[stype], resname, chain, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
  # Compute and write h4 ("back" proton):
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[o2[stype],k] + ra[k] - ru[k] ;
  print_pqr_line(0, h4[stype], resname, chain, resnumb,
		 rH[1], rH[2], rH[3], chargeH, radiusH) ;
}


function process_phenyl(stype)
{
  # Check if site is OK:
  if(line[h1[stype]] == "" || line[o[stype]] == "" || line[c[stype]] == "")
    error("Site "stype" "resnumb" has missing atoms.") ;
  # Compute bond vectors:
  for (k = 1 ; k <= 3 ; k++) rOH[k] = r[h1[stype],k] - r[o[stype],k] ;
  for (k = 1 ; k <= 3 ; k++) rOC[k] = r[c[stype],k] - r[o[stype],k] ;
  # Compute and write h2:
  cross(rOH,rOC,ra) ;
  cross(rOC,ra,rb) ;
  for (k = 1 ; k <= 3 ; k++)
    rH[k] = r[h1[stype],k] - 2 * dot(rOH,rb) / dot(rb,rb) * rb[k] ;
  split(line[h1[stype]], f) ;
  print_pqr_line(0, h2[stype], f[4], chain, resnumb, rH[1], rH[2], rH[3], f[9], f[10]) ;
}

function process_alcohol(stype)
{
  # Check if site is OK:
  if(line[h1[stype]] == "" || line[o[stype]] == "" || line[c[stype]] == "")
  {
      warning("Site "stype" "resnumb" has missing atoms.") ;
      return 0 ;
  } ;
  # Compute bond vectors et al:
  for (k = 1 ; k <= 3 ; k++) rOH[k] = r[h1[stype],k] - r[o[stype],k] ;
  for (k = 1 ; k <= 3 ; k++) rOC[k] = r[c[stype],k] - r[o[stype],k] ;
  cross(rOH,rOC,ra) ;
  cross(rOC,ra,rb) ;
  for (k = 1 ; k <= 3 ; k++) rc[k] = dot(rOH,rb) / dot(rb,rb) * rb[k] ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = 0.5 * sqrt(3) * norm(rc) * ra[k] / norm(ra) ;
  # Compute and write h2:
  for (k = 1 ; k <= 3 ; k++) rH[k] = r[h1[stype],k] - 1.5 * rc[k] + ru[k] ;
  split(line[h1[stype]], f) ;
  print_pqr_line(0, h2[stype], f[4], chain, resnumb, rH[1], rH[2], rH[3], f[9], f[10]) ;
  # Compute and write h3:
  for (k = 1 ; k <= 3 ; k++) rH[k] = r[h1[stype],k] - 1.5 * rc[k] - ru[k] ;
  split(line[h1[stype]], f) ;
  print_pqr_line(0, h3[stype], f[4], chain, resnumb, rH[1], rH[2], rH[3], f[9], f[10]) ;
}

function process_water(stype)
{
  # Some parameters that depend on the force field:
  # (These are for G43a1; if necessary can be made stype-dependent):
  angHOH = 109.5 * pi / 180 ;
  distOH = 1.000 ;
  chargeH = 0.410 ;  # "Aesthetic", does not affect MEAD calculations.
  radiusH = 0.000 ;

  # Check if site is OK:
  if(line[h1[stype]] != "" || line[h2[stype]] != "" ||  \
     line[h3[stype]] != "" || line[h4[stype]] != "")
    error("Site "stype" "resnumb" has proton(s) already.") ;
  if(line[o[stype]] == "")
    error("Site "stype" "resnumb" has missing atoms.") ;
  # Compute random orientation for h1:
  theta = rand() * 2 * pi ;
  phi = rand() * pi ;
  # Compute and write h1:
  rOH1[1] = distOH * sin(phi) * cos(theta) ;
  rOH1[2] = distOH * sin(phi) * sin(theta) ;
  rOH1[3] = distOH * cos(phi) ;
  for (k = 1 ; k <= 3 ; k++) rH1[k] = r[o[stype],k] + rOH1[k] ;
  split(line[o[stype]], f) ;
  print_pqr_line(0, h1[stype], f[4], chain, resnumb,
		 rH1[1], rH1[2], rH1[3], chargeH, radiusH) ;
  # Compute random orientation for h2:
  theta = rand() * 2 * pi ;
  phi = rand() * pi ;
  # Compute and write h2:
  rv[1] = distOH * sin(phi) * cos(theta) ;
  rv[2] = distOH * sin(phi) * sin(theta) ;
  rv[3] = distOH * cos(phi) ;
  cross(rOH1,rv,ru) ;
  for (k = 1 ; k <= 3 ; k++) ra[k] = distOH * sin(angHOH) * ru[k] / norm(ru) ;
  for (k = 1 ; k <= 3 ; k++) rb[k] = rOH1[k] * cos(angHOH) ;
  for (k = 1 ; k <= 3 ; k++) rH2[k] = r[o[stype],k] + ra[k] + rb[k] ;
  print_pqr_line(0, h2[stype], f[4], resnumb,
		 rH2[1], rH2[2], rH2[3], chargeH, radiusH) ;
  # Compute and write h3:
  for (k = 1 ; k <= 3 ; k++) rOH2[k] = rH2[k] - r[o[stype],k] ;
  cross(rOH1,rOH2,ra) ;
  cross(rOH2,ra,rb) ;
  for (k = 1 ; k <= 3 ; k++) rc[k] = dot(rOH1,rb) / dot(rb,rb) * rb[k] ;
  for (k = 1 ; k <= 3 ; k++)
    ru[k] = 0.5 * sqrt(3) * norm(rc) * ra[k] / norm(ra) ;
  for (k = 1 ; k <= 3 ; k++) rH3[k] = rH1[k] - 1.5 * rc[k] + ru[k] ;
  print_pqr_line(0, h3[stype], f[4], chain, resnumb,
		 rH3[1], rH3[2], rH3[3], chargeH, radiusH) ;
  # Compute and write h4:
  for (k = 1 ; k <= 3 ; k++) rH4[k] = rH1[k] - 1.5 * rc[k] - ru[k] ;
  print_pqr_line(0, h4[stype], f[4], chain, resnumb,
		 rH4[1], rH4[2], rH4[3], chargeH, radiusH) ;
}

function print_pqr_line(atnu,atna,resna,chain,resnu,x,y,z,charge,rad)
{
  printf ("ATOM  %5d %-4s %-4s%1s%4s    %8.3f%8.3f%8.3f%8.3f%8.3f\n",  \
          atnu, atna, resna, chain, resnu, x, y, z, charge, rad) ;
}

# Vector stuff:
# Dot product of vectors a and b:
function dot(a,b)
{
  return a[1] * b[1] + a[2] * b[2] + a[3] * b[3] ;
}
# Norm of vector a:
function norm(a)
{
  return sqrt(dot(a,a)) ;
}
# Returns the cross product of vectors a and b in vector axb:
function cross(a,b,axb)
{
  axb[1] = a[2] * b[3] - a[3] * b[2] ;
  axb[2] = a[3] * b[1] - a[1] * b[3] ;
  axb[3] = a[1] * b[2] - a[2] * b[1] ;
}

function define_sites()
{
  # Define carboxyl groups:
  carboxyl = "(ASP|GLU|CTR|PRA|PRD|ACE)" ;
  # Asp:
  o1["ASP"] = "OD1" ;  o2["ASP"] = "OD2"  ; c["ASP"] = "CG" ;
  h1["ASP"] = "HD11" ; h2["ASP"] = "HD21" ;
  h3["ASP"] = "HD12" ; h4["ASP"] = "HD22" ;
  # Glu:
  o1["GLU"] = "OE1" ;  o2["GLU"] = "OE2"  ; c["GLU"] = "CD" ;
  h1["GLU"] = "HE11" ; h2["GLU"] = "HE21" ;
  h3["GLU"] = "HE12" ; h4["GLU"] = "HE22" ;
  if (forcefield == "GROMOS") {
    # C-terminus:
    o1["CTR"] = "O1" ;   o2["CTR"] = "O2"   ; c["CTR"] = "C" ;
    h1["CTR"] = "HO11" ; h2["CTR"] = "HO21" ;
    h3["CTR"] = "HO12" ; h4["CTR"] = "HO22" ;
  } ;
  if (forcefield == "CHARMM") {
    # C-terminus:
    o1["CTR"] = "OT1" ;  o2["CTR"] = "OT2"   ; c["CTR"] = "C" ;
    h1["CTR"] = "HT11" ; h2["CTR"] = "HT21" ;
    h3["CTR"] = "HT12" ; h4["CTR"] = "HT22" ;
  } ;

  # Propionate A:
  o1["PRA"] = "O1A" ;  o2["PRA"] = "O2A"  ; c["PRA"] = "CGA" ;
  h1["PRA"] = "HO1A" ; h2["PRA"] = "HO2A" ;
  h3["PRA"] = "HO3A" ; h4["PRA"] = "HO4A" ;
  # Propionate D:
  o1["PRD"] = "O1D" ;  o2["PRD"] = "O2D"  ; c["PRD"] = "CGD" ;
  h1["PRD"] = "HO1D" ; h2["PRD"] = "HO2D" ;
  h3["PRD"] = "HO3D" ; h4["PRD"] = "HO4D" ;
  # Acetate:
  o1["ACE"] = "O1" ;  o2["ACE"] = "O2"    ; c["ACE"] = "C1" ;
  h1["ACE"] = "H11" ; h2["ACE"] = "H21"   ;
  h3["ACE"] = "H12" ; h4["ACE"] = "H22"   ;

  # Define phenyl groups:
  phenyl = "TYR" ;
  # Tyr :
   o["TYR"] = "OH"  ;  c["TYR"] = "CZ"  ;
  h1["TYR"] = "HH1" ; h2["TYR"] = "HH2" ;

  # Define alcohol and thiol groups:
  alcohol = "(SER|THR|CYS)" ;
  # Ser:
  o["SER"] = "OG" ;   c["SER"] = "CB" ;
  h1["SER"] = "HG1" ; h2["SER"] = "HG2" ; h3["SER"] = "HG3" ;
  # Thr:
  o["THR"] = "OG1" ; c["THR"] = "CB" ;
  h1["THR"] = "HG1" ; h2["THR"] = "HG2" ; h3["THR"] = "HG3" ;
  # Cys:
  o["CYS"] = "SG" ; c["CYS"] = "CB" ;
  h1["CYS"] = "HG1" ; h2["CYS"] = "HG2" ; h3["CYS"] = "HG3" ;

  # Define waters:
  water = "(HOH|H2O)" ;
  # HOH :
  o["HOH"] = "OW" ;
  h1["HOH"] = "HW1" ; h2["HOH"] = "HW2" ; h3["HOH"] = "HW3" ; h4["HOH"] = "HW4" ;
  # H2O :
  o["H2O"] = "OW" ;
  h1["H2O"] = "HW1" ; h2["H2O"] = "HW2" ; h3["H2O"] = "HW3" ; h4["H2O"] = "HW4" ;

}

function filecheck(file)
{
  if (system("test -f "file))
    error("File "file" does not exist.") ;
  if (system("test -r "file))
    error("File "file" exists but is not readable.") ;
}

function warning(msg)
{
  print cmd ": Warning: " msg | "cat 1>&2" ;
  close ("cat 1>&2") ;
}

function error(msg)
{
  print cmd ": Error: " msg | "cat 1>&2" ;
  close ("cat 1>&2") ;
  exit 1 ;
}

