##############################################################################
#    This file is part of mdciao.
#    
#    Copyright 2020 Charité Universitätsmedizin Berlin and the Authors
#
#    Authors: Guillermo Pérez-Hernandez
#    Contributors:
#
#    mdciao is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Lesser General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    mdciao is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public License
#    along with mdciao.  If not, see <https://www.gnu.org/licenses/>.
##############################################################################


my_frag_colors=[
         'magenta',
         'yellow',
         'lime',
         'maroon',
         'navy',
         'olive',
         'orange',
         'purple',
         'teal',
]

from os import path as _path, mkdir as _mkdir
import numpy as _np
import mdtraj as _md
from matplotlib import pyplot as _plt, rcParams as _rcParams, colors as _mplcolors

from pandas import DataFrame as _DF
from pandas import ExcelWriter as _ExcelWriter

from inspect import signature as _signature
from fnmatch import filter as _filter
from textwrap import wrap as _twrap
from itertools import product as _iterpd

import mdciao.contacts as _mdcctcs
import mdciao.fragments as _mdcfrg
import mdciao.nomenclature as _mdcnomenc
import mdciao.pdb as _mdcpdb
import mdciao.sites as _mdcsites
import mdciao.plots as _mdcplots

import mdciao.utils as _mdcu

def _offer_to_create_dir(output_dir):
    r"""
    Offer to create a directory if it does not
    exist. Does nothing if it already exists

    Parameters
    ----------
    output_dir : str

    Returns
    -------

    """
    if not _path.isdir(output_dir):
        answer = input("\nThe directory '%s' does not exist. Create it on the fly [y/n]?\nDefault [y]: " % output_dir)
        if len(answer) == 0 or answer.lower().startswith("y"):
            _mkdir(output_dir)
        else:
            print("Stopping. Please check your variable 'output_dir' and try again")
            return

def _parse_consensus_option(option, consensus_type,
                            top, fragments,
                            return_Labeler=False,
                            accept_guess=False,
                            **LabelerConsensus_kwargs):
    r"""

    Frankenstein method to hide complexity away fom the command-line tools
    while making them usable at the API-level

    Internally, it instantiates a :obj:`LabelerConsensus` object to use
    its :obj:`LabelerConsensus.top2labels` method
    
    A guess is performed on-the-fly using :obj:`guess_by_nomenclature`
    to better align :obj:`top` to the :obj:`LabelerConsensus`.

    Parameters
    ----------
    option : the option that was passed as argument.
        There's three usecases:
         * None, str(None)
           Nothing happens, an residx2conlab map
           full of Nones is returned
         * str
          The needed identifier to instantiate an
          :obj:`LabelerGPCR` or an :obj:`LabelerCGN` object.
          Examples would be a :obj:`uniprot_name` or a :obj:`ref_PDB`,
          respectively
         * :obj:`LabelerConsensus`
          An already instantiated :obj:`LabelerGPCR` or :obj:`LabelerCGN`
          The method then does nothing. Usecase are repeated
          calls to any of the methods in :obj:`command_line_tools`
          without each call instantiating its own :obj:`LabelerConsensus`
    consensus_type : str
        Either "CGN" or "GPCR"
    top : :obj:`mdtraj.Topology`
    fragments : iterable of iterables of ints
        How the :obj:`top` is fragmented. Helps
        to identify what part of :obj:`top`
        to align to the consensus sequence
    return_Labeler : bool, default is False
        Whether to return the object itself
    accept_guess : bool, default is False
        Accept the guess generated by
        :obj:`guess_by_nomenclature` without asking
        questions
    LabelerConsensus_kwargs : opt
        Keyword arguments of for the :obj:`LabelerConsensus`

    Returns
    -------
    map, LC

    """
    if isinstance(option, str) or option is None:
        if str(option).lower() == 'none':
            map_out = [None for __ in range(top.n_residues)]
            LC_out = None
        else:
            LC_out = {"GPCR": _mdcnomenc.LabelerGPCR,
                      "CGN":_mdcnomenc.LabelerCGN}[consensus_type](option, **LabelerConsensus_kwargs)

    #todo add a class check here instead of failing later on
    else:
        LC_out = option

    if LC_out is not None:
        answer = _mdcnomenc.guess_by_nomenclature(LC_out, top, fragments, consensus_type,
                                                  return_str=False,
                                                  accept_guess=accept_guess,
                                                  # verbose=True
                                                  )
        if answer is None:
            print("No fragments belonging to the %s nomenclature\n"
                  "could be guessed based on your fragments, this might be a weird case"%consensus_type)
            map_out = [None]*top.n_residues
        else:
            restrict_to_residxs = _np.hstack([fragments[ii] for ii in answer])
            map_out = LC_out.top2labels(top,
                                        min_hit_rate=0,  # We need give-up the re-guessing here,
                                        # because explicitely done it before with an option for
                                        # interactivity
                                        restrict_to_residxs=restrict_to_residxs,
                                        autofill_consensus=True,
                                        #    verbose=True,
                                        )
        print()
    if not return_Labeler:
        return map_out
    else:
        return map_out, LC_out

#TODO test
#TODO document
def _parse_consensus_options_and_return_fragment_defs(option_dict, top,
                                                      fragments_as_residue_idxs,
                                                      accept_guess=False,
                                                      save_nomenclature_files=False,
                                                      verbose=True):
    consensus_frags, consensus_maps, consensus_labelers = {}, [], {}
    for key, option in option_dict.items():
        map_CL, CL = _parse_consensus_option(option, key, top, fragments_as_residue_idxs,
                                           return_Labeler=True,
                                           accept_guess=accept_guess,
                                           write_to_disk=save_nomenclature_files)
        consensus_maps.append(map_CL)
        if CL is not None:
            consensus_labelers[key] = CL
            if verbose:
                print("These are the %s fragments mapped onto your topology:"%key)
                #TODO check whether this shouldn't be outside the if verbose???
                consensus_frags.update(CL.top2frags(top,
                                                  input_dataframe=CL.most_recent_alignment,
                                                  fragments=fragments_as_residue_idxs,
                                                  verbose=verbose))
                if not accept_guess:
                    input("Hit enter to continue!\n")

    _mdcu.lists.assert_no_intersection(list(consensus_frags.values()),"consensus fragment")

    return consensus_frags, consensus_maps, consensus_labelers

def _parse_fragment_naming_options(fragment_names, fragments):
    r"""
    Helper method for the CLTs to understand what/how the user wants
    the fragments to be named

    Parameters
    ----------
    fragment_names : None, str or list
        If str, we assume it comes directly from the
        command line option --fragment_names,
        see :obj:`parsers._parser_add_fragment_names. Can be different
        things:
        * "" : fragment names will be named frag0,frag1,frag2 ... as needed
        * None, "None","none": fragment names will be None
        * comma-separated values, with as many values
        as fragments are in :obj:`fragments:
        If list, we do nothing (for compatibility with API use of CLI tools)
    fragments: list
        existing fragment definitions (iterables of residue indices)
         to apply the :obj:`fragment_names` to.
         Typically, :obj:`fragments` come from a call to :obj:`get_fragments`

    Returns
    -------
    fragment_names : list of strings

    fragments : list of fragments (only case "danger" was used, deprecated
    """
    #TODO fragment naming should be handled at the object level?

    if isinstance(fragment_names,(list, _np.ndarray)):
        assert len(fragment_names) == len(
            fragments), "Mismatch between nr. fragments and fragment names %s vs %s (%s)" % (
            len(fragments), len(fragment_names), fragment_names)
        return fragment_names
    if fragment_names == '':
        fragment_names = ['frag%u' % ii for ii in range(len(fragments))]
    elif str(fragment_names).lower()=="none":
        fragment_names = [None for __ in fragments]
    else:
        #TODO get rid of this danger nonsens
        assert isinstance(fragment_names, str), "Argument --names invalid: %s" % fragment_names
        if 'danger' not in fragment_names.lower():
            fragment_names = [ff.strip(" ") for ff in fragment_names.split(",")]
            assert len(fragment_names) == len(
                fragments), "Mismatch between nr. fragments and fragment names %s vs %s (%s)" % (
                len(fragments), len(fragment_names), fragment_names)
            return fragment_names

        elif 'danger' in fragment_names.lower():
            raise NotImplementedError
            # browse older version to see what was here

    return fragment_names

# TODO mix and match with the color options of flareplots
def _parse_coloring_options(color_option, n,
                            default_color="tab:blue",
                            color_cycle=my_frag_colors
                            ):
    r"""
    Helper function to parse user input and return a color list

    Parameters
    ----------
    color_option: str, list, bool, or None
       * str : return a list of len n with this color as each entry
       * list : assert len(list)>=len(n) and return it the first n-entries of it
       * bool : True  : create a list of len n that repeats :obj:`color_cycle`
                        as needed
       * bool : False : create a list of len n with :obj:`default_color` as entries
                        (same as :obj:`color_option` had equal to :obj:`default_color`)
       * None : same as false
    n : int
        Wanted number of colors
    default_color: str
        Any color matplotlib understands
    color_cycle: iterable of matplotlib colors

    Returns
    -------
    colors

    """
    assert isinstance(color_cycle,list)

    if str(color_option).lower()=="none":
        color_option = False

    if isinstance(color_option, bool):
        if not color_option:
            colors = [default_color for __ in range(n)]
        else:
            vec_idxs = _np.mod(_np.arange(n), len(my_frag_colors))
            colors = _np.array(color_cycle)[vec_idxs].tolist()
    elif isinstance(color_option, str):
        color_option = color_option.split(",")
        if len(color_option)==1:
            colors = [color_option[0] for __ in range(n)]
    elif isinstance(color_option,list):
        if len(color_option)<n:
            raise ValueError("Not enough input values %s for expected output of size n %u"%(color_option,n))
        else:
            colors = color_option[:n]

    return colors

# TODO Consider putting the figure instantiation also here
def _manage_timedep_ploting_and_saving_options(ctc_grp,
                                               fn,
                                               myfig,
                                               plot_timedep=True,
                                               separate_N_ctcs=False,
                                               title=None,
                                               savefigs=True,
                                               savetrajs=False,
                                               ):
    r"""
    Towards a common function for saving/managing timedep files
    for neighborhoods, sites, and interfaces

    Parameters
    ----------
    ctc_grp : :obj:`mdciao.contacts.ContactGroup`
    fn : :obj:`mdciao.utils.str_and_dict.FilenameGenerator`
    myfig :obj:`matplotlib.figure.Figure`
    plot_timedep : bool, default is True
    separate_N_ctcs : bool, defaul tis True
    t_unit : str or None, default is None
    savefigs : bool, default is True
    savetrajs : bool, default is False
    Returns
    -------

    """
    lastname = ""
    # TODO manage interface and sites appropiately
    if ctc_grp.is_neighborhood:
        lastname = "%s"%ctc_grp.anchor_res_and_fragment_str.replace('*', "")

    if title is None:
        title = fn.output_desc #TODO consider using lastname
    fname_timedep = ('%s.%s.time_trace@%2.1f_Ang.%s' % (fn.output_desc,
                                                        lastname,
                                                        fn.ctc_cutoff_Ang,
                                                        fn.graphic_ext)).replace("..", ".")

    fname_N_ctcs = ('%s.%s.time_trace@%2.1f_Ang.N_ctcs.%s' % (fn.output_desc,
                                                              lastname,
                                                              fn.ctc_cutoff_Ang,
                                                              fn.graphic_ext)).replace("..", ".")


    # Differentiate the type of figures we can have
    if len(myfig) == 0:
        fnames = []
        print("No figures of time-traces were produced because only 1 frame was provided")
    elif len(myfig) == 1:
        if plot_timedep:
            fnames = [fname_timedep]
        else:
            fnames = [fname_N_ctcs]
    elif len(myfig) == 2:
        fnames = [fname_timedep, fname_N_ctcs]

    if savefigs:
        for iname, ifig in zip(fnames, myfig):
            fname = _path.join(fn.output_dir, iname)
            ifig.axes[0].set_title("%s" % title) # TODO consider firstname lastname
            ifig.savefig(fname, bbox_inches="tight", dpi=fn.graphic_dpi)
            _plt.close(ifig)
            print(fname)

    # even if no figures were produced or saved, we can still be save the trajs
    if savetrajs:
        ctc_grp.save_trajs(fn.output_desc, fn.table_ext, fn.output_dir, t_unit=fn.t_unit, verbose=True)
        if separate_N_ctcs:
            ctc_grp.save_trajs(fn.output_desc, fn.table_ext, fn.output_dir, t_unit=fn.t_unit, verbose=True,
                               ctc_cutoff_Ang=fn.ctc_cutoff_Ang)
        print()

def _color_schemes(istr):
    r"""
    Choose or generate a color scheme

    Parameters
    ----------
    istr : str
        * colorname,
        * csv colorname list,
        * color scheme name, currently
         * "P" : ["red", "purple", "gold", "darkorange"]
         * "H" : ["m", "darkgreen", "darkorange", "navy"],
        * "auto" :obj:`matplotlib` prop_cycle

    Returns
    -------
        list of colorlike strings

    """
    if "," in istr:
        return istr.split(",")
    elif _mplcolors.is_color_like(istr):
        return [istr]
    else:
        return {"p": ["red", "purple", "gold", "darkorange"],
                "h": ["m", "darkgreen", "darkorange", "navy"],
                "auto":  _plt.rcParams['axes.prop_cycle'].by_key()["color"]}[str(istr).lower()]

def _load_any_geom(geom):
    r"""
    Helper method for command-line-tools to create a :obj:`~mdtraj.Trajectory`
    from either filenames or :obj:`mdtraj.Trajectory` (i.e. do nothing)
    Parameters
    ----------
    geom : str or :obj:`~mdtraj.Trajectory`

    Returns
    -------
    outgeom : :obj:`~mdtraj.Trajectory`
    """
    if isinstance(geom, str):
        outgeom = _md.load(geom)
    else:
        outgeom = geom

    return outgeom

def _trajsNtop2xtcsNrefgeom(trajectories,topology):
    r"""
    Inform about trajs and load necessary tops in different scenarios

    Parameters
    ----------
    trajectories: check get_sorted_trajectories
    topology : str, top

    Returns
    -------
    xtcs, refgeom
    xtcs : whatever get_sorted_trajectories returns
    refgeom : :obj:`mdtraj.Trajectory` object

    """
    # Inform about trajectories
    xtcs = _mdcu.str_and_dict.get_sorted_trajectories(trajectories)
    if topology is None:
        # TODO in case the xtc[0] is a pdb/grofile, it will be read one more time later
        refgeom = _load_any_geom(xtcs[0])[0]
    else:
        refgeom = _load_any_geom(topology)
    return xtcs,refgeom

def _fragment_overview(a,labtype):
    r"""
    provide the CLTs GPCR_overview and CGN_overview

    Parameters
    ----------
    a : :obj:`argparse.Namespace` object
        Contains the arguments used by the user
    labtype : srt, "GPCR" or "CGN"
        lets the code know which :obj:`LabelerConsensus` to use

    Returns
    -------
    None
    """
    if labtype == "CGN":
        val = a.PDB_code_or_txtfile
        obj = _mdcnomenc.LabelerCGN(val, write_to_disk=a.write_to_disk)

    elif labtype == "GPCR":
        val = a.GPCR_uniprot_or_file
        if _path.exists(val):
            format = "%s"
        else:
            format = _signature(_mdcnomenc.LabelerGPCR).parameters["format"].default
        obj = _mdcnomenc.LabelerGPCR(val,
                                     format=format,
                                     write_to_disk=a.write_to_disk)
    else:
        raise ValueError("Don't know the consensus type %s, only 'GPCR' and 'CGN'"%labtype)

    if a.topology is not None:
        top = _md.load(a.topology).top
        map_conlab = obj.top2labels(top,
                                    autofill_consensus=a.fill_gaps)
        obj.top2frags(top,input_dataframe=obj.most_recent_alignment)
        _mdcu.residue_and_atom.parse_and_list_AAs_input(a.AAs, top, map_conlab)
        if str(a.labels).lower() != "none":
            labels = [aa.strip(" ") for aa in a.labels.split(",")]
            conlab2residx = obj.conlab2residx(top, map=map_conlab)
            for lab in labels:
                for match in _filter(list(conlab2residx.keys()),lab):
                    idx = conlab2residx[match]
                    rr = top.residue(idx)
                    print(idx,rr, map_conlab[idx])

        if a.print_conlab:
            for ii, ilab in enumerate(map_conlab):
                print(ii, top.residue(ii), ilab)
    else:
        for key, frag in obj.fragments.items():
            print("fragment %s with %u AAs:"%(key, len(frag)))

            idf = _DF.from_dict({"residue"  : frag,
                                 "consensus": obj.fragments_as_conlabs[key]})

            textblocks = [['%-25s'%iline for iline in idf.loc[idxs].to_string().splitlines()] for idxs in _mdcu.lists.re_warp(_np.arange(len(idf)),10)]
            for ii in range(len(textblocks[0])):
                line = ''
                for tb in textblocks:
                    try:
                        line += ' | %s'%tb[ii]
                    except IndexError as E:
                        pass
                print(line)
            
def residue_neighborhoods(residues,
                          trajectories,
                          topology=None,
                          res_idxs=False,
                          ctc_cutoff_Ang=3.5,
                          stride=1,
                          ctc_control=5,
                          n_nearest=4,
                          chunksize_in_frames=10000,
                          nlist_cutoff_Ang=15,
                          n_smooth_hw=0,
                          sort=True,
                          pbc=True,
                          ylim_Ang=15,
                          fragments=["lig_resSeq+"],
                          fragment_names="",
                          fragment_colors=None,
                          graphic_ext=".pdf",
                          table_ext=".dat",
                          GPCR_uniprot=None,
                          CGN_PDB=None,
                          output_dir='.',
                          output_desc='neighborhood',
                          t_unit='ns',
                          curve_color="auto",
                          gray_background=False,
                          graphic_dpi=150,
                          short_AA_names=False,
                          allow_same_fragment_ctcs=True,
                          save_nomenclature_files=False,
                          plot_timedep=True,
                          n_cols=4,
                          distro=False,
                          n_jobs=1,
                          separate_N_ctcs=False,
                          accept_guess=False,
                          switch_off_Ang=None,
                          plot_atomtypes=False,
                          no_disk=False,
                          savefigs=True,
                          savetabs=True,
                          savetrajs=False,
                          figures=True,
                          pre_computed_distance_matrix=None
                          ):
    r"""Per-residue neighborhoods based on contact frequencies between pairs
    of residues.

    A neighborhood is a :obj:`mdciao.contacts.ContactGroup`-object containing a set of
    :obj:`mdciao.contacts.ContactPair`-objects with a shared residue,
    called the `anchor_residue`.

    The contact frequencies will be printed, plotted and saved. The residue-residue
    distance time-traces used for their computation will be also returned

    Note
    ----
    The time-independent figures (e.g. "neighborhood.overall@3.5_Ang.pdf") are always shown
    whereas the time-dependent figures (e.g. "neighborhood.GDP395.time_trace@3.5_Ang.pdf")
    are never shown, because the number of time-traces becomes very high very quickly.
    It's easier to look at them with an outside viewer.


    The user may be prompted when necessary,
    although this behaviour can be turned off with :obj:`accept_guess`

    Input can be from disk and/or from memory (see below).

    Can be parallelized up to the number of used trajectories.

    Many other optional parameters are exposed to allow fine-tuning of the
    computing, plotting, printing, and saving. Additional information can be regarding nomenclature,
    fragmentation heuristics and/or naming and or/coloring, residue labeling, time-trace
    averaging, data-streaming,

    Parameters
    ----------
    residues : int, iterable of ints or str
        The residue(s) for which the neighborhood will be computed.
        This input is pretty flexible wrt to strings and numbers,
        which are interpreted as sequence indices unless
        :obj:`res_idxs` is True
        Valid inputs are are:
         * residues = [1,10,11,12]
         * residues = '1,10,11,12'
         * residues = '1,10-12'
         * residues = [1]
         * residues = 1
         * residues = '1'
         * residues = '1,10-12,GLU*,GDP*,E30'
         Please refer to :obj:`mdciao.utils.residue_and_atom.rangeexpand_residues2residxs`
         for more info
    trajectories : str, :obj:`mdtraj.Trajectory`, or None
        The MD-trajectories to calculate the frequencies from.
        This input is pretty flexible. For more info check
        :obj:`mdciao.utils.str_and_dict.get_sorted_trajectories`.
        Accepted values are:
         * pattern, e.g. "*.ext"
         * one string containing a filename
         * list of filenames
         * one :obj:`mdtraj.Trajectory` object
         * list of :obj:`mdtraj.Trajectory` objects
    topology : str or :obj:`~mdtraj.Trajectory`, default is None
        The topology associated with the :obj:`trajectories`
        If None, the topology of the first :obj:`trajectory` will
        be used, i.e. when no :obj:`topology` is passed, the first
        :obj:`trajectory` has to be either a .gro or .pdb file, or
        an :obj:`~mdtraj.Trajectory` object
    Other Parameters
    ----------------
    res_idxs : bool, default is False
        Whether the indices of :obj:`residues` should be understood as
         * zero-indexed, residue serial indices or
         * residue sequence, eg. 30 in GLU30, this is called 'resSeq'
         in an :obj:`mdtraj.core.Residue`-object
    ctc_cutoff_Ang : float, default is 3.5
        Any residue-residue distance is considered a contact if d<=ctc_cutoff_Ang
    stride : int, default is 1
        Stride the input data by this number of frames
    ctc_control : int or float, default is 5
        Control the number of reported contacts. Can be an
        integer (keep the first n contacts) or a float
        representing a fraction [0,1] of the total number of
        contacts.Default is 5.
    n_nearest : int, default is 4
        Exclude these many bonded neighbors for each residue, i.e
    chunksize_in_frames : int, default is 10000
        Stream through the trajectory data in chunks of this many frames
        Can lead to memory errors if :obj:`n_jobs` makes it so that
        e.g. 4 trajectories of 10000 frames each are loaded to memory
        and their residue-residue distances computed
    nlist_cutoff_Ang : int, default is 15
        Before computing the residue-residue distance for all frames,
        neighbor-list is created, for each residue, that includes
        the residues up to :obj:`nlist_cutoff_Ang` from the residue.
        Increase this parameters (e.g. to 30) if you expect large conformational
        changes and/or the geometry in :obj:`topology`
    n_smooth_hw: int, default is 0
        Plots of the time-traces will be smoothed using a window
        of 2*n_smooth_hw
    sort : bool, default is True
        Sort the input :obj:`residues` according to their indices
    pbc : bool, default is True
        Use periodic boundary conditions
    ylim_Ang : float, default is 15
        Limit in Angstrom of the y-axis of the time-traces.
        Default is 15. Switch to any other float or 'auto'
        for automatic scaling
    fragments : list, default is ["lig_resSeq+"]
        Fragment control. For compatibility reasons, it has
        to be a list, even if it only has one element.
        There exist several input modes:

        * ["consensus"] : use things like "TM*" or "G.H*", i.e.
         Ballesteros-Weinstein or CGN-sub-subunit labels.
        * List of len 1 with some fragmentation heuristic, e.g.
         ["lig_resSeq+"]. will use the default of
         :obj:`mdciao.fragments.get_fragments`. See there for
         info on defaults and other heuristics.
        * List of len N that can mix different possibilities:
          * iterable of integers (lists or np.arrays, e.g. np.arange(20,30)
          * ranges expressed as integer strings, "20-30"
          * ranges expressed as residue descriptors ["GLU30-LEU40"]
        Numeric expressions are interepreted as zero-indexed and unique
        residue serial indices, i.e. 30-40 does not necessarily equate
        "GLU30-LEU40" unless serial and sequence index coincide.
        If there's more than one "GLU30", the user gets asked to
        disambiguate. The resulting fragments need not cover all of the topology,
        they only need to not overlap.
    fragment_names : string or list of strings, default is ""
        If string, it has to be a list of comma-separated values.
        If you want unnamed fragments, use None, "None", or "".
        Has to contain names for all fragments that result from
        :obj:`fragments` or more.
        mdciao wil try to use :obj:`replace4latex`
        to generate LaTeX expressions from stuff like "Galpha"
        You can use fragment_names="None" or "" to avoid using fragment names
    fragment_colors : None, boolean or list, default is None
        Assign colors to fragments. These colors will be used
        to color-code the frequency bars. If True, colors
        will be automatically selected, otherwise picked
        from the list. Use with cautions, plots
        get shrill quickly
    graphic_ext : str, default is ".pdf"
        The extension (=format) of the saved figures
    table_ext : str, default is ".dat"
        The extension (=format) of the saved tables
    GPCR_uniprot : str or :obj:`mdciao.nomenclature.LabelerGPCR`, default is None
        Try to find Ballesteros-Weinstein definitions. If str, e.g. "adrb2_human",
        try to locate a local filename or do a web lookup in the GPCRdb.
        If :obj:`mdciao.nomenclature.LabelerGPCR`, use this object directly
        (allows for object re-use when in API mode)
        See :obj:`mdciao.nomenclature` for more info and references.
    CGN_PDB : str or :obj:`mdciao.nomenclature.LabelerCGN`, default is None
        Try to find Common G-alpha Numbering definitions. If str, e.g. "3SN6",
        try to locate local filenames ("3SN6.pdb", "CGN_3SN6.txt") or do web lookups
        in https://www.mrc-lmb.cam.ac.uk/CGN/ and http://www.rcsb.org/.
        If :obj:`mdciao.nomenclature.LabelerCGN`, use this object directly
        (allows for object re-use when in API mode)
        See :obj:`mdciao.nomenclature` for more info and references.
    output_dir : str, default is '.'
        directory to which the results are written.
    output_desc : str, default is 'neighborhood'
        Descriptor for output files.
    t_unit : str, default is 'ns'
        Unit used for the temporal axis.
    curve_color : str, default is 'auto'
        Type of color used for the curves. Alternatives are "P" or "H"
    gray_background : bool, default is False
        Use gray background when using smoothing windows
    graphic_dpi : int, default is 150
        Dots per Inch (DPI) of the graphic output. Only has
        an effect for bitmap outputs.
    short_AA_names : bool, default is False
        Use one-letter aminoacid names when possible, e.g.
        K145 insted of Lys145.
    allow_same_fragment_ctcs : bool, default is True
        Allow contacts whithin the same fragment.
    save_nomenclature_files : bool, default is False
        Save available nomenclature definitions to disk so
        that they can be accessed locally in later uses.
    plot_timedep : bool, default is True
        Plot and save time-traces of the contacts
    n_cols : int, default is 4
        number of columns of the overall plot.
    distro : bool, default is False
        Plot distance distributions instead of contact bar
        plots
    n_jobs : int, default is 1
        Number of processors to use. The parallelization is
        done over trajectories and not over contacts, beyond
        n_jobs>n_trajs parallelization will not have any
        effect.
    separate_N_ctcs : bool, default is False
        Separate the plot with the total number contacts
        from the time-trace plot.
    accept_guess : bool, default is False
        Accept mdciao's guesses regarding fragment
        identification using nomenclature labels
    switch_off_Ang : NoneType, default is None
        Use a linear switchoff instead of a crisp one.
    plot_atomtypes : bool, default is False
        Add the atom-types to the frequency bars by
        'hatching' them. '--' is sidechain-sidechain '|' is
        backbone-backbone '\' is backbone-sidechain '/' is
        sidechain-backbone. See Fig XX for an example
    savefigs : bool, default is True
        Save the figures
    savetabs : bool, default is True
        Save the frequency tables
    savetrajs : bool, default is False
        Save the timetraces
    no_disk : bool, default is False
        If True, don't save any files at all:
        figs, tables, trajs, nomenclature
    figures : bool, default is True
        Draw figures
    pre_computed_distance_matrix : (m,m) np.ndarray, default is None
        The distance matrix here will speed up the
        pre-computing of likely neighbors. Usecase
        are several API-calls following each other

    Returns
    -------
    out_dict : dict
        * neighborhoods : dictionary keyed by unique, zero-indexed residue indices.
         The values are :obj:`mdciao.contacts.ContactGroup` objects
        * ctc_idxs : 2D np.ndarray with the residue indices of the contact pairs within obj`:nlist_cutoff_Ang` in at least one frame
        * ctcs_trajs : list of per-traj 2D np.ndarrays with the mindist between the residues of "ctc_idxs"
        * time_array : list of per-traj time-arrays

        Usually, only *neighborhoods* is usefull, other entries are there for debugging

    """

    # Input control residues
    if residues is None:
        print("You have to provide some residue input via the --residues option")
        return None

    _offer_to_create_dir(output_dir)
    xtcs, refgeom = _trajsNtop2xtcsNrefgeom(trajectories, topology)
    fn = _mdcu.str_and_dict.FilenameGenerator(output_desc, ctc_cutoff_Ang, output_dir,
                                              graphic_ext, table_ext, graphic_dpi, t_unit)
    if no_disk:
        savetrajs = False
        savefigs  = False
        savetabs = False
        save_nomenclature_files = False

    # More input control
    ylim_Ang=_np.float(ylim_Ang)
    print("Will compute contact frequencies for (%u items):\n%s"
          "\n with a stride of %u frames" % (len(xtcs),_mdcu.str_and_dict.inform_about_trajectories(xtcs, only_show_first_and_last=15), stride))

    fragments_as_residue_idxs, user_wants_consensus = _mdcfrg.fragments._fragments_strings_to_fragments(fragments, refgeom.top, verbose=True)
    fragment_names = _parse_fragment_naming_options(fragment_names, fragments_as_residue_idxs)
    fragment_colors = _parse_coloring_options(fragment_colors,len(fragment_names))


    mid_string = "\nWill compute neighborhoods for the residues\n" \
                 "%s\nexcluding %u nearest neighbors" \
                 "\n" % (residues,n_nearest)
    res_idxs_list, consensus_maps, consensus_frags = _res_resolver(residues, refgeom.top, fragments_as_residue_idxs,
                                                                   midstring=mid_string, GPCR_uniprot=GPCR_uniprot,
                                                                   CGN_PDB=CGN_PDB,
                                                                   save_nomenclature_files=save_nomenclature_files,
                                                                   accept_guess=accept_guess,
                                                                   interpret_as_res_idxs=res_idxs, sort=sort)

    top2confrag = _np.full(refgeom.top.n_residues, None)
    for key, val in consensus_frags.items():
        top2confrag[val] = key
    # Create a neighborlist
    naive_bonds=False #WIP, perhaps expose
    try:
        nl = _mdcu.bonds.bonded_neighborlist_from_top(refgeom.top, n=n_nearest)
    except ValueError as e:
        print(e)
        #print("You can use naive bond-listing by using serial indices with the\n"
        #      "option --naive_bonds. Use this option at your own risk")
        if naive_bonds:
            print("Creating naive bond list with residue serial index.")
            nl =[_np.arange(_np.max((0,ii-n_nearest)),_np.min((ii+n_nearest+1,refgeom.top.n_residues))).tolist() for ii in range(refgeom.top.n_residues)]
        else:
            raise(e)


    # Use it to prune the contact indices
    ctc_idxs = _np.vstack(
        [[_np.sort([val, ii]) for ii in range(refgeom.top.n_residues) if ii not in nl[val] and ii != val] for val in
         res_idxs_list])

    # Can we have same-fragment contacts
    if not allow_same_fragment_ctcs:
        fragment_idxs = [[_mdcu.lists.in_what_fragment(idx, fragments_as_residue_idxs) for idx in pair] for pair in ctc_idxs]
        ctc_idxs = [ctc_idxs[ii] for (ii,pair) in enumerate(fragment_idxs) if pair[0]!=pair[1]]

    print(
        "\nPre-computing likely neighborhoods by reducing the neighbor-list\n"
        "to those within %u Angstrom"%nlist_cutoff_Ang,
        end=" ",flush=True)

    if pre_computed_distance_matrix is not None:
        if not pre_computed_distance_matrix.shape[0] == pre_computed_distance_matrix.shape[1] == refgeom.top.n_residues:
            raise ValueError("Matrix doesn't have expected size (%u,%u), but shape (%u,%u)" % (refgeom.top.n_residues,
                                                                                               refgeom.top.n_residues,
                                                                                               pre_computed_distance_matrix.shape[0],
                                                                                               pre_computed_distance_matrix.shape[1]))
        ctcs = [_np.array([pre_computed_distance_matrix[ii][jj] for (ii, jj) in ctc_idxs], ndmin=2)]
        print("using the pre_computed_contact_matrix...", end="",flush=True)
        ctc_idxs=_np.array(ctc_idxs)
    else:
        print("in the first frame of reference geom\n'%s':..." % [topology or refgeom][0],
              end="",
              flush=True)
        ctcs, ctc_idxs = _md.compute_contacts(refgeom[0], _np.vstack(ctc_idxs), periodic=pbc)
    print("done!")

    ctc_idxs_small = _np.flatnonzero(ctcs[0] < nlist_cutoff_Ang / 10)
    _, ctc_idxs_small = _md.compute_contacts(refgeom, ctc_idxs[ctc_idxs_small])
    ctc_idxs_small = _mdcu.lists.unique_list_of_iterables_by_tuple_hashing(ctc_idxs_small)

    print("From %u potential distances, the neighborhoods have been "
          "reduced to only %u potential contacts.\n"
          "If this number is still too high (i.e. the computation is too slow)"
          ", consider using a smaller nlist_cutoff_Ang " % (
              len(ctc_idxs), len(ctc_idxs_small)))

    ctcs_trajs, time_arrays, at_pair_trajs = _mdcctcs.trajs2ctcs(xtcs, refgeom.top, ctc_idxs_small, stride=stride,
                                                       chunksize=chunksize_in_frames, return_times_and_atoms=True,
                                                       consolidate=False,
                                                       n_jobs=n_jobs,
                                                       )
    print() # to make sure we don't overwrite output
    actcs = _np.vstack(ctcs_trajs)
    if switch_off_Ang is None:
        ctcs_mean = _np.mean(actcs < ctc_cutoff_Ang / 10, 0)
    else:
        ctcs_mean = _np.mean(_mdcctcs._linear_switchoff(actcs, ctc_cutoff_Ang / 10, switch_off_Ang / 10),0)

    final_look = _mdcctcs.select_and_report_residue_neighborhood_idxs(ctcs_mean, res_idxs_list,
                                                                      fragments_as_residue_idxs, ctc_idxs_small,
                                                                      refgeom.top,
                                                                      interactive=False,
                                                                      ctcs_kept=ctc_control)

    # Create the neighborhoods as groups of contact_pair objects
    neighborhoods = {}
    empty_CGs = []
    for res_idx, val in final_look.items():
        CPs = []
        for idx in val:
            pair = ctc_idxs_small[idx]
            consensus_labels = [_mdcnomenc.choose_between_consensus_dicts(idx, consensus_maps.values()) for idx in pair]
            fragment_idxs = [_mdcu.lists.in_what_fragment(idx, fragments_as_residue_idxs) for idx in pair]
            CPs.append(_mdcctcs.ContactPair(pair,
                                   [itraj[:, idx] for itraj in ctcs_trajs],
                                   time_arrays,
                                   top=refgeom.top,
                                   anchor_residue_idx=res_idx,
                                   consensus_labels=consensus_labels,
                                   trajs=xtcs,
                                   fragment_idxs=fragment_idxs,
                                   consensus_fragnames=[top2confrag[idx] for idx in pair],
                                   fragment_names=[fragment_names[idx] for idx in fragment_idxs],
                                   fragment_colors=[fragment_colors[idx] for idx in fragment_idxs],
                                   atom_pair_trajs=[itraj[:, [idx * 2, idx * 2 + 1]] for itraj in at_pair_trajs]
                                   ))
        try:
            neighborhoods[res_idx] = _mdcctcs.ContactGroup(CPs, neighbors_excluded=n_nearest)
        except NotImplementedError as e:
            print(e)
            empty_CGs.append(res_idx)
            neighborhoods[res_idx] = None
    if len(empty_CGs) == len(final_look):
        print("No residues have any neighbors at %2.1f Ang. No output produced." % ctc_cutoff_Ang)
        return
    elif len(empty_CGs)>0:
        print("The following residues have no neighbors at %2.1f Ang, their frequency histograms will be empty"%ctc_cutoff_Ang)
        print("\n".join([str(refgeom.top.residue(ii)) for ii in empty_CGs]))


    if figures:
        overall_fig = _mdcplots.CG_panels(n_cols, neighborhoods, ctc_cutoff_Ang,
                                  distro=distro,
                                  short_AA_names=short_AA_names,
                                  plot_atomtypes=plot_atomtypes,
                                  switch_off_Ang=switch_off_Ang)
        if savefigs:
            overall_fig.savefig(fn.fullpath_overall_fig, dpi=graphic_dpi)
            print("The following files have been created:")
            print(fn.fullpath_overall_fig)

    neighborhoods = {key:val for key, val in neighborhoods.items() if val is not None}
    # TODO undecided about this
    # TODO this code is repeated in sites...can we abstract this oafa?
    if savetabs:
        for CG in neighborhoods.values():
            fname = fn.fname_per_residue_table(CG.anchor_res_and_fragment_str)
            CG.frequency_table(ctc_cutoff_Ang,
                               fname,
                               switch_off_Ang=switch_off_Ang,
                               write_interface=False,
                               atom_types=True,
                               # AA_format="long",
                               )
            print(fname)

    if figures and (plot_timedep or separate_N_ctcs):
        # TODO make a method out of this to use in all CLTs
        # TODO perhaps use https://github.com/python-attrs/attrs
        # to avoid boilerplate
        # Thi is very ugly
        for CG in neighborhoods.values():
            # TODO this plot_N_ctcs and skip_timedep is very bad, but ATM my only chance without major refactor
            # TODO perhaps it would be better to bury dt in the plotting directly?
            panelheight = 3
            myfig = CG.plot_timedep_ctcs(panelheight,
                                            color_scheme=_color_schemes(curve_color),
                                            ctc_cutoff_Ang=ctc_cutoff_Ang,
                                            switch_off_Ang=switch_off_Ang,
                                            dt=_mdcu.str_and_dict.tunit2tunit["ps"][t_unit],
                                            gray_background=gray_background,
                                            n_smooth_hw=n_smooth_hw,
                                            plot_N_ctcs=True,
                                            pop_N_ctcs=separate_N_ctcs,
                                            shorten_AAs=short_AA_names,
                                            skip_timedep=not plot_timedep,
                                            t_unit=t_unit,
                                            ylim_Ang=ylim_Ang,
                                            )

            # One title for all axes on top
            title = CG.anchor_res_and_fragment_str
            if short_AA_names:
                title = CG.anchor_res_and_fragment_str_short
            title = _mdcu.str_and_dict.latex_superscript_fragments(title)
            if n_nearest >0:
                title += "\n%u nearest bonded neighbors excluded" % (n_nearest)
            _manage_timedep_ploting_and_saving_options(CG, fn, myfig,
                                                       plot_timedep=plot_timedep,
                                                       separate_N_ctcs=separate_N_ctcs,
                                                       title=title,
                                                       savefigs=savefigs,
                                                       savetrajs=savetrajs
                                                       )

    return {"ctc_idxs": ctc_idxs_small,
            'ctcs_trajs': ctcs_trajs,
            'time_array': time_arrays,
            "neighborhoods": neighborhoods}

def interface(
        trajectories,
        topology=None,
        frag_idxs_group_1=None,
        frag_idxs_group_2=None,
        GPCR_uniprot="None",
        CGN_PDB="None",
        chunksize_in_frames=10000,
        ctc_cutoff_Ang=3.5,
        curve_color="auto",
        fragments=['lig_resSeq+'],
        fragment_names="",
        graphic_dpi=150,
        graphic_ext=".pdf",
        gray_background=False,
        interface_cutoff_Ang=35,
        ctc_control=20,
        n_smooth_hw=0,
        output_desc="interface",
        output_dir=".",
        short_AA_names=False,
        stride=1,
        t_unit="ns",
        plot_timedep=True,
        accept_guess=False,
        n_jobs=1,
        n_nearest=0,
        sort_by_av_ctcs=True,
        scheme="closest-heavy",
        separate_N_ctcs=False,
        table_ext="dat",
        title=None,
        min_freq=.10,
        contact_matrix=True,
        cmap='binary',
        flareplot=True,
        save_nomenclature_files=False,
        no_disk=False,
        savefigs=True,
        savetabs=True,
        savetrajs=False,
        figures=True,
        self_interface=False,
):
    r"""Contact-frequencies between two groups of residues

    The groups of residues can be defined directly
    by using residue indices or by defining molecular fragments
    and using these definitions as a shorthand to address
    large sub-domains of the molecular topology. See in particular
    the documentation for :obj:`fragments`, :obj:`frag_idxs_group_1`
    obj:`frag_idxs_group_2`.

    Typically, the two groups of residues conforming both
    sides of the interface, also called interface members,
    do not share common residues, because the members
    belong to different molecular units. For example,
    in a receptor--G-protein complex, one partner is
    the receptor and the other partner is the G-protein.

    By default, mdciao.interface doesn't allow interface
    members to share residues. However, sometimes it's
    useful to allow it because the contacts of one fragment
    with itself (the self-contacts) are also important.
    E.g. the C-terminus of a receptor interfacing with
    the entire receptor, **including the C-terminus**.
    To allow for this behaviour, use :obj:`self_interface` = True,
    and possibly increase :obj:`n_nearest`, since otherwise
    neighboring residues of the shared set (e.g. C-terminus)
    will always appear as formed.

    Parameters
    ----------
    trajectories :
        The MD-trajectories to calculate the frequencies
        from. This input is pretty flexible. For more info check
        :obj:`mdciao.utils.str_and_dict.get_sorted_trajectories`.
        Accepted values are:
         * pattern, e.g. "*.ext"
         * one string containing a filename
         * list of filenames
         * one :obj:`~mdtraj.Trajectory` object
         * list of :obj:`~mdtraj.Trajectory` objects
    topology : str or :obj:`~mdtraj.Trajectory`, default is None
        The topology associated with the :obj:`trajectories`
        If None, the topology of the first :obj:`trajectory` will
        be used, i.e. when no :obj:`topology` is passed, the first
        :obj:`trajectory` has to be either a .gro or .pdb file, or
        an :obj:`~mdtraj.Trajectory` object
    frag_idxs_group_1 : NoneType, default is None
        Indices of the fragments that belong to the group_1.
        Strings can be CSVs and include ranges, e.g. '1,3-4',
        or be consensus labels "TM*,-TM6".
        Defaults to None which will prompt the user of
        information, except when only two fragments are
        present. Then it defaults to [0]
    frag_idxs_group_2 : NoneType, default is None
        Indices of the fragments that belong to the group_2.
        Strings can be CSVs and include ranges, e.g. '1,3-4',
        or be consensus labels "TM*,-TM6".
        Defaults to None which will prompt the user of
        information, except when only two fragments are
        present. Then it defaults to [1]
    GPCR_uniprot : str, default is 'None'
        Try to find Ballesteros-Weinstein definitions. If
        str, e.g. "adrb2_human", try to locate a local
        filename or do a web lookup in the GPCRdb.
        If :obj:`mdciao.nomenclature.LabelerGPCR`, use this object
        directly (allows for object re-use when in API mode)
        See :obj:`mdciao.nomenclature` for more info and
        references.
    CGN_PDB : str, default is 'None'
        Try to find Common G-alpha Numbering definitions. If
        str, e.g. "3SN6", try to locate local filenames
        ("3SN6.pdb", "CGN_3SN6.txt") or do web lookups in
        https://www.mrc-lmb.cam.ac.uk/CGN/ and
        http://www.rcsb.org/. If
        :obj:`mdciao.nomenclature.LabelerCGN`, use this
        object directly (allows for object re-use when in API
        mode) See :obj:`mdciao.nomenclature` for more info
        and references.
    chunksize_in_frames : int, default is 10000
        Stream through the trajectory data in chunks of this
        many frames Can lead to memory errors if
        :obj:`n_jobs` makes it so that e.g. 4 trajectories
        of 10000 frames each are loaded to memory and their
        residue-residue distances computed
    ctc_cutoff_Ang : float, default is 3.5
        Any residue-residue distance is considered a contact
        if d<=ctc_cutoff_Ang
    curve_color : str, default is 'auto'
        Type of color used for the curves. Alternatives are
        "P" or "H"
    fragments : list, default is ['lig_resSeq+']
        Fragment control. For compatibility reasons, it has
        to be a list, even if it only has one element.
        There exist several input modes:

        * ["consensus"] : use things like "TM*" or "G.H*", i.e.
         Ballesteros-Weinstein or CGN-sub-subunit labels.
        * List of len 1 with some fragmentation heuristic, e.g.
         ["lig_resSeq+"] : will use the default of
         :obj:`mdciao.fragments.get_fragments`. See there for
         info on defaults and other heuristics.
        * List of len N that can mix different possibilities:

          * iterable of integers (lists or np.arrays, e.g. np.arange(20,30)
          * ranges expressed as integer strings, "20-30"
          * ranges expressed as residue descriptors ["GLU30-LEU40"]
        Numeric expressions are interepreted as zero-indexed and unique
        residue serial indices, i.e. 30-40 does not necessarily equate
        "GLU30-LEU40" unless serial and sequence index coincide.
        If there's more than one "GLU30", the user gets asked to
        disambiguate. The resulting fragments need not cover all of the topology,
        they only need to not overlap.
    fragment_names : str or list, default is ''
        If string, it has to be a list of comma-separated
        values. If you want unnamed fragments, use None,
        "None", or "". Has to contain names for all
        fragments that result from :obj:`fragments` or more.
        mdciao wil try to use :obj:`replace4latex` to
        generate LaTeX expressions from stuff like "Galpha"
        You can use fragment_names="None" or "" to avoid
        using fragment names
    graphic_dpi : int, default is 150
        Dots per Inch (DPI) of the graphic output. Only has
        an effect for bitmap outputs.
    graphic_ext : str, default is '.pdf'
        The extension (=format) of the saved figures
    gray_background : bool, default is False
        Use gray background when using smoothing windows
    interface_cutoff_Ang : float, default is 35
        The interface between both groups is defined as the
        set of group_1-group_2-distances that are within
        this cutoff in the reference topology. Otherwise, a
        large number of non-necessary distances (e.g.
        between N-terminus and G-protein) are computed.
        Default is 35. Setting this cutoff to None is
        equivalent to using no cutoff,
        i.e. all possible contacts are regarded
    ctc_control : int, default is 20
        Control the number of reported contacts. Can be an
        integer (keep the first n contacts) or a float
        representing a fraction [0,1] of the total number of
        contacts.Default is 5.
    n_smooth_hw : int, default is 0
        Plots of the time-traces will be smoothed using a
        window of 2*n_smooth_hw
    output_desc : str, default is 'interface'
        Descriptor for output files.
    output_dir : str, default is '.'
        Directory to which the results are written.
    short_AA_names : bool, default is False
        Use one-letter aminoacid names when possible, e.g.
        K145 insted of Lys145.
    stride : int, default is 1
        Stride the input data by this number of frames
    t_unit : str, default is 'ns'
        Unit used for the temporal axis.
    plot_timedep : bool, default is True
        Plot and save time-traces of the contacts
    accept_guess : bool, default is False
        Accept mdciao's guesses regarding fragment
        identification using nomenclature labels
    n_jobs : int, default is 1
        Number of processors to use. The parallelization is
        done over trajectories and not over contacts, beyond
        n_jobs>n_trajs parallelization will not have any
        effect.
    n_nearest : int, default is 0
        Exclude these many bonded neighbors for each
        residue. Usually, the chosen molecular
        fragments belong to different chains and
        don't share any bonds, so this parameter
        has no effect. However, if you choose
        to compare molecular fragments that
        are bonded (e.g. the C-terminus with
        the rest of the molecule), there's
        one pair that'll be bonded across the
        fragment-boundary, yielding one contact
        that's always formed. Setting :obj:`n_nearest`
        to 1 will delete this contact.
    sort_by_av_ctcs : bool, default is True
        When presenting the results summarized by residue,
        sort by sum of frequencies (~average number of
        contacts). Default is True.
    scheme : str, default is 'closest-heavy'
        Type of scheme for computing distance between
        residues. Choices are {'ca', 'closest', 'closest-
        heavy', 'sidechain', 'sidechain-heavy'}. See
        :obj:`mdtraj.compute_distances` documentation for more info
    separate_N_ctcs : bool, default is False
        Separate the plot with the total number contacts
        from the time-trace plot.
    table_ext : str, default is "dat"
        The extension (=format) of the saved tables
    title : NoneType, default is None
        Name of the system. Used for figure titles (not
        filenames) Defaults to :obj:`output_desc` if None is given
    min_freq : float, default is 0.1
        Do not show frequencies smaller than this. If you
        notice the output beingtruncated a values too far
        away from this, you need to increase the :obj:`ctc_control`
        parameter
    contact_matrix : bool, default is True
        Produce a plot of the interface contact matrix
    cmap : str, default is 'binary'
        The colormap for the contact matrix. Default is
        'binary' which is black and white, but you can
        choose anthing from here:
        https://matplotlib.org/3.1.0/tutorials/colors/colormaps.html
    flareplot : bool, default is True
        Produce a flare plot of interface the contact
        matrix. The format will .pdf no matter the value of
        :obj:`graphic_ext`
    save_nomenclature_files : bool, default is False
        Save available nomenclature definitions to disk so
        that they can be accessed locally in later uses.
    no_disk : bool, default is False
        If True, don't save any files at all:
        figs, tables, trajs, nomenclature
    savefigs : bool, default is True
        Save the figures
    savetabs : bool, default is True
        Save the frequency tables
    savetrajs : bool, default is False
        Save the timetraces
    figures : bool, default is True
        Draw figures
    self_interface : bool, default is False
        Allow the interface members to share
        residues

    Returns
    -------
    CG_interface : :obj:`mdciao.contacts.ContactGroup`
        The object containing the :obj:`mdciao.contacts.ContactPair`
        objects tha conform the interface.

    """
    if str(title).lower()=="none":
        title = output_desc

    _offer_to_create_dir(output_dir)
    xtcs, refgeom = _trajsNtop2xtcsNrefgeom(trajectories,topology)
    fn = _mdcu.str_and_dict.FilenameGenerator(output_desc,ctc_cutoff_Ang,output_dir,
                                              graphic_ext, table_ext, graphic_dpi,t_unit)
    if no_disk:
        savetrajs = False
        savefigs  = False
        savetabs = False
        save_nomenclature_files = False

    print("Will compute contact frequencies for trajectories:\n%s"
          "\n with a stride of %u frames" % (_mdcu.str_and_dict.inform_about_trajectories(xtcs, only_show_first_and_last=15), stride))

    fragments_as_residue_idxs, user_wants_consensus = _mdcfrg.fragments._fragments_strings_to_fragments(fragments, refgeom.top, verbose=True)
    if user_wants_consensus and all([str(cons).lower() == 'none' for cons in [GPCR_uniprot, CGN_PDB]]):
        raise ValueError(
            "User wants to define interface fragments using consensus labels, but no consensus labels were provided via the 'CGN_PDB' or the 'GPCR_uniprot' arguments.")
    fragment_names = _parse_fragment_naming_options(fragment_names, fragments_as_residue_idxs)
    consensus_frags, consensus_maps, consensus_labelers = \
        _parse_consensus_options_and_return_fragment_defs({"GPCR": GPCR_uniprot,
                                                           "CGN": CGN_PDB},
                                                          refgeom.top,
                                                          fragments_as_residue_idxs,
                                                          accept_guess=accept_guess,
                                                          save_nomenclature_files=save_nomenclature_files)
    top2confrag = _np.full(refgeom.top.n_residues, None)
    for key, val in consensus_frags.items():
        top2confrag[val] = key
    if user_wants_consensus:
        intf_frags_as_residxs, \
        intf_frags_as_str_or_keys  = _mdcfrg.frag_dict_2_frag_groups(consensus_frags, ng=2, answers=[frag_idxs_group_1, frag_idxs_group_2])

    else:
        intf_frags_as_residxs, \
        intf_frags_as_str_or_keys   = _mdcfrg.frag_list_2_frag_groups(fragments_as_residue_idxs,
                                                               frag_idxs_group_1, frag_idxs_group_2,
                                                               )
    intersect = list(set(intf_frags_as_residxs[0]).intersection(intf_frags_as_residxs[1]))
    if len(intersect) > 0:
        if self_interface:
            ctc_idxs = _mdcu.lists.unique_product_w_intersection(intf_frags_as_residxs[0], intf_frags_as_residxs[1])
        else:
            raise AssertionError("Some residues appear in both members of the interface, but this"
                                 " behavior is blocked by default.\nIf you are sure this"
                                 " is correct, unblock this option with 'self_interface=True'.\n"
                                 "The residues are %s" % intersect)
    else:
        ctc_idxs = _np.vstack(list(_iterpd(intf_frags_as_residxs[0], intf_frags_as_residxs[1])))
         # Remove self-contacts
        ctc_idxs = _np.vstack([pair for pair in ctc_idxs if pair[0]!=pair[1]])

    # Create a neighborlist
    if n_nearest>0:
        print("Excluding contacts between %u nearest neighbors"%n_nearest)
        nl = _mdcu.bonds.bonded_neighborlist_from_top(refgeom.top, n=n_nearest)
        ctc_idxs = _np.vstack([(ii,jj) for ii,jj in ctc_idxs if jj not in nl[ii]])

    print("\nComputing distances in the interface between fragments\n%s\nand\n%s"%
          ('\n'.join(_twrap(', '.join(['%s' % gg for gg in intf_frags_as_str_or_keys[0]]))),
           '\n'.join(_twrap(', '.join(['%s' % gg for gg in intf_frags_as_str_or_keys[1]])))))
    if interface_cutoff_Ang is None:
        ctc_idxs_intf = ctc_idxs
    else:
        print("The interface is restricted to the residues within %3.1f "
              "Angstrom of each other in the reference topology.\n"
              "Computing interface..."%interface_cutoff_Ang, end="")
        ctcs, ctc_idxs = _md.compute_contacts(refgeom[0], _np.vstack(ctc_idxs))
        print("done!")
        ctc_idxs_intf = ctc_idxs[_np.argwhere(ctcs[0] < interface_cutoff_Ang / 10).squeeze()]
        print()
        print(
            "From %u potential group_1-group_2 distances, the interface was reduced to only %u potential contacts.\nIf this "
            "number is still too high (i.e. the computation is too slow) consider using a smaller interface cutoff" % (
            len(ctc_idxs), len(ctc_idxs_intf)))
    print()
    ctcs, times, at_pair_trajs = _mdcctcs.trajs2ctcs(xtcs, refgeom.top, ctc_idxs_intf,
                                 stride=stride, return_times_and_atoms=True,
                                 consolidate=False,
                                 chunksize=chunksize_in_frames,
                                 n_jobs=n_jobs,
                                 progressbar=True,
                                 scheme=scheme
                                 )

    # Stack all data
    actcs = _np.vstack(ctcs)

    # Get frequencies so that we don't create unnecessary ctc objects
    ctcs_bin = (actcs <= ctc_cutoff_Ang / 10).astype("int").sum(0)
    ctc_frequency = ctcs_bin / actcs.shape[0]
    tot_freq = ctc_frequency.sum()
    order = _np.argsort(ctc_frequency)[::-1]
    ctc_objs = []
    n_ctcs =  _mdcu.lists._get_n_ctcs_from_freqs(ctc_control,ctc_frequency[order])[0]
    #TODO still unsure about where it's best to put this
    _mdcctcs.contacts._contact_fraction_informer(n_ctcs, ctc_frequency[order], or_frac=.9)
    for ii, idx in enumerate(order[:n_ctcs]):
        ifreq = ctc_frequency[idx]
        if ifreq > min_freq:
            pair = ctc_idxs_intf[idx]
            consensus_labels = [_mdcnomenc.choose_between_consensus_dicts(idx, consensus_maps,
                                                                no_key=None) for idx in pair]
            fragment_idxs = [_mdcu.lists.in_what_fragment(idx, fragments_as_residue_idxs) for idx in pair]
            ctc_objs.append(_mdcctcs.ContactPair(pair,
                                                 [itraj[:, idx] for itraj in ctcs],
                                                 times,
                                                 top=refgeom.top,
                                                 consensus_labels=consensus_labels,
                                                 trajs=xtcs,
                                                 fragment_idxs=fragment_idxs,
                                                 fragment_names=[fragment_names[idx] for idx in fragment_idxs],
                                                 consensus_fragnames=[top2confrag[idx] for idx in pair],
                                                 atom_pair_trajs=[itraj[:, [idx * 2, idx * 2 + 1]] for itraj in
                                                                  at_pair_trajs]
                                                 ))
            cum_freq = ctc_frequency[order[:ii+1]].sum()
            #print(ii, ifreq.round(2), cum_freq.round(2), (cum_freq.sum()/tot_freq*100).round(2))

    ctc_grp_intf = _mdcctcs.ContactGroup(ctc_objs,
                                         interface_fragments=intf_frags_as_residxs,  # interface_residx_short,
                                         name=title)
    print()
    print(ctc_grp_intf.frequency_dataframe(ctc_cutoff_Ang).round({"freq":2, "sum":2}))
    print()
    dfs = ctc_grp_intf.frequency_sum_per_residue_names(ctc_cutoff_Ang,
                                                       list_by_interface=True,
                                                       return_as_dataframe=True,
                                                       sort=sort_by_av_ctcs)
    print(dfs[0].round({"freq":2}))
    print()
    print(dfs[1].round({"freq":2}))

    if savetabs:
        print("The following files have been created:")
        ctc_grp_intf.frequency_table(ctc_cutoff_Ang, fn.fullpath_overall_excel, sort=sort_by_av_ctcs)
        print(fn.fullpath_overall_excel)
        ctc_grp_intf.frequency_table(ctc_cutoff_Ang, fn.fullpath_overall_dat, atom_types=True)
        print(fn.fullpath_overall_dat)
        ctc_grp_intf.frequency_to_bfactor(ctc_cutoff_Ang, fn.fullpath_pdb, refgeom[0],
                                          # interface_sign=True
                                          )
        print(fn.fullpath_pdb)

    if figures:
        panelheight = 3
        n_cols = 1
        n_rows = 2
        panelsize = 4
        panelsize2font = 3.5
        fudge = 7
        histofig, histoax = _plt.subplots(n_rows, n_cols, sharex=True, sharey=False,
                                          figsize=(n_cols * panelsize * _np.ceil(ctc_grp_intf.n_ctcs/fudge),
                                                  n_rows * panelsize),
                                          )

        # One loop for the histograms
        _rcParams["font.size"] = panelsize * panelsize2font
        ctc_grp_intf.plot_freqs_as_bars(ctc_cutoff_Ang,
                                        title,
                                        ax=histoax[0],
                                        xlim=_np.min((n_ctcs, ctc_grp_intf.n_ctcs)),
                                        label_fontsize_factor=panelsize2font / panelsize,
                                        shorten_AAs=short_AA_names,
                                        truncate_at=min_freq,
                                        total_freq=tot_freq
                                        )

        ctc_grp_intf.plot_frequency_sums_as_bars(ctc_cutoff_Ang,
                                                 title,
                                                 jax=histoax[1],
                                                 list_by_interface=True,
                                                 label_fontsize_factor=panelsize2font / panelsize,
                                                 truncate_at=.05,
                                                 shorten_AAs=short_AA_names,
                                                 sort=sort_by_av_ctcs,
                                                 )
        histofig.tight_layout(h_pad=2, w_pad=0, pad=0)

        # TODO bury this in plots?
        if contact_matrix:
            cmat_fig, iax = ctc_grp_intf.plot_interface_frequency_matrix(ctc_cutoff_Ang,
                                                                     colorbar=True,
                                                                     grid=True,
                                                                     cmap=cmap)

            iax.set_title("'%s'  as contact matrix" % _mdcu.str_and_dict.replace4latex(title),
                          fontsize=iax.get_xticklabels()[0].get_fontsize() * 2)
            cmat_fig.tight_layout()


        if savefigs:
            histofig.savefig(fn.fullpath_overall_fig, dpi=graphic_dpi, bbox_inches="tight")
            print(fn.fullpath_overall_fig)
            cmat_fig.savefig(fn.fullpath_matrix)
            print(fn.fullpath_matrix)

        if flareplot:
            ifig, iax = ctc_grp_intf.plot_freqs_as_flareplot(ctc_cutoff_Ang,
                                                             consensus_maps=consensus_labelers.values(),
                                                             SS=refgeom,
                                                             fragment_names=fragment_names,
                                                             fragments=fragments_as_residue_idxs,
                                                             )
            ifig.tight_layout()
            if savefigs:
                ifig.savefig(fn.fullpath_flare_pdf, bbox_inches="tight")
                print(fn.fullpath_flare_pdf)

        if plot_timedep or separate_N_ctcs:
            myfig = ctc_grp_intf.plot_timedep_ctcs(panelheight,
                                                   color_scheme=_color_schemes(curve_color),
                                                   ctc_cutoff_Ang=ctc_cutoff_Ang,
                                                   dt=_mdcu.str_and_dict.tunit2tunit["ps"][t_unit],
                                                   gray_background=gray_background,
                                                   n_smooth_hw=n_smooth_hw,
                                                   plot_N_ctcs=True,
                                                   pop_N_ctcs=separate_N_ctcs,
                                                   shorten_AAs=short_AA_names,
                                                   skip_timedep=not plot_timedep,
                                                   t_unit=t_unit)
            _manage_timedep_ploting_and_saving_options(ctc_grp_intf, fn, myfig,
                                                       plot_timedep=plot_timedep,
                                                       separate_N_ctcs=separate_N_ctcs,
                                                       savefigs=savefigs,
                                                       savetrajs=savetrajs
                                                       )

    return ctc_grp_intf


def sites(site_inputs,
          trajectories,
          topology=None,
          ctc_cutoff_Ang=3.5,
          stride=1,
          scheme="closest-heavy",
          chunksize_in_frames=10000,
          n_smooth_hw=0,
          pbc=True,
          GPCR_uniprot="None",
          CGN_PDB="None",
          fragments=['lig_resSeq+'],
          default_fragment_index=None,
          fragment_names="",
          output_dir='.',
          graphic_ext=".pdf",
          t_unit='ns',
          curve_color="auto",
          gray_background=False,
          graphic_dpi=150,
          short_AA_names=False,
          save_nomenclature_files=False,
          ylim_Ang=10,
          n_jobs=1,
          accept_guess=False,
          table_ext="dat",
          output_desc="sites",
          plot_atomtypes=False,
          distro=False,
          no_disk=False,
          savefigs=True,
          savetabs=True,
          savetrajs=False,
          figures=True,
          plot_timedep=True,
          ):
    r"""

    Compute distances between groups of contact-pairs that are
    already pre-defined as sites

    Parameters
    ----------
    site_inputs : list, default is None
        List of sites to compute. Sites can be either
        paths to site file(s) in json formats or
        directly a site dictionary. A site dictionary
        is something like {"name":"site",
                           "pairs":{"AAresSeq":["GLU30-ARG40",
                                                "LYS31-W70"]}}
        See :obj:`mdciao.sites` for more info
    trajectories :
        The MD-trajectories to calculate the frequencies
        from. This input is pretty flexible. For more info check
        :obj:`mdciao.utils.str_and_dict.get_sorted_trajectories`.
        Accepted values are:
         * pattern, e.g. "*.ext"
         * one string containing a filename
         * list of filenames
         * one :obj:`~mdtraj.Trajectory` object
         * list of :obj:`~mdtraj.Trajectory` objects
    topology : str or :obj:`~mdtraj.Trajectory`, default is None
        The topology associated with the :obj:`trajectories`
        If None, the topology of the first :obj:`trajectory` will
        be used, i.e. when no :obj:`topology` is passed, the first
        :obj:`trajectory` has to be either a .gro or .pdb file, or
        an :obj:`~mdtraj.Trajectory` object
    ctc_cutoff_Ang : float, default is 3.5
        Any residue-residue distance is considered a contact
        if d<=ctc_cutoff_Ang
    stride : int, default is 1
        Stride the input data by this number of frames
    scheme : str, default is 'closest-heavy'
        Type of scheme for computing distance between
        residues. Choices are {'ca', 'closest', 'closest-
        heavy', 'sidechain', 'sidechain-heavy'}. See mdtraj
        documentation for more info
    chunksize_in_frames : int, default is 10000
        Stream through the trajectory data in chunks of this
        many frames Can lead to memory errors if
        :obj:`n_jobs` makes it so that e.g. 4 trajectories
        of 10000 frames each are loaded to memory and their
        residue-residue distances computed
    n_smooth_hw : int, default is 0
        Plots of the time-traces will be smoothed using a
        window of 2*n_smooth_hw
    pbc : bool, default is True
        Use periodic boundary conditions
    GPCR_uniprot : str, default is 'None'
        Try to find Ballesteros-Weinstein definitions. If
        str, e.g. "adrb2_human", try to locate a local
        filename or do a web lookup in the GPCRdb.
        If :obj:`mdciao.nomenclature.LabelerGPCR`, use this object
        directly (allows for object re-use when in API mode)
        See :obj:`mdciao.nomenclature` for more info and
        references.
    CGN_PDB : str, default is 'None'
        Try to find Common G-alpha Numbering definitions. If
        str, e.g. "3SN6", try to locate local filenames
        ("3SN6.pdb", "CGN_3SN6.txt") or do web lookups in
        https://www.mrc-lmb.cam.ac.uk/CGN/ and
        http://www.rcsb.org/. If
        :obj:`mdciao.nomenclature.LabelerCGN`, use this
        object directly (allows for object re-use when in
        API mode) See :obj:`mdciao.nomenclature` for more
        info and references.
    fragments : list, default is ['lig_resSeq+']
        Fragment control. For compatibility reasons, it has
        to be a list, even if it only has one element.
        There exist several input modes:

        * ["consensus"] : use things like "TM*" or "G.H*", i.e.
         Ballesteros-Weinstein or CGN-sub-subunit labels.
        * List of len 1 with some fragmentation heuristic, e.g.
         ["lig_resSeq+"]. will use the default of
         :obj:`mdciao.fragments.get_fragments`. See there for
         info on defaults and other heuristics.
        * List of len N that can mix different possibilities:
          * iterable of integers (lists or np.arrays, e.g. np.arange(20,30)
          * ranges expressed as integer strings, "20-30"
          * ranges expressed as residue descriptors ["GLU30-LEU40"]
        Numeric expressions are interepreted as zero-indexed and unique
        residue serial indices, i.e. 30-40 does not necessarily equate
        "GLU30-LEU40" unless serial and sequence index coincide.
        If there's more than one "GLU30", the user gets asked to
        disambiguate. The resulting fragments need not cover all of the topology,
        they only need to not overlap.
    default_fragment_index : NoneType, default is None
        In case a residue identified as, e.g, "GLU30", appears
        more than one time in the topology, e.g. in case of
        a dimer, pass which fragment/monomer should be chosen
        by default. The default behaviour (None)
        will prompt the user when necessary
    fragment_names : str or list, default is ''
        If string, it has to be a list of comma-separated
        values. If you want unnamed fragments, use None,
        "None", or "". Has to contain names for all
        fragments that result from :obj:`fragments` or more.
        mdciao wil try to use :obj:`replace4latex` to
        generate LaTeX expressions from stuff like "Galpha"
        You can use fragment_names="None" or "" to avoid
        using fragment names
    output_dir : str, default is '.'
        directory to which the results are written
    graphic_ext : str, default is '.pdf'
        Extension of the output graphics, default is .pdf
    t_unit : str, default is 'ns'
        Unit used for the temporal axis.
    curve_color : str, default is 'auto'
        Type of color used for the curves. Alternatives are
        "P" or "H"
    gray_background : bool, default is False
        Use gray background when using smoothing windows
    graphic_dpi : int, default is 150
        Dots per Inch (DPI) of the graphic output. Only has
        an effect for bitmap outputs.
    short_AA_names : bool, default is False
        Use one-letter aminoacid names when possible, e.g.
        K145 insted of Lys145.
    save_nomenclature_files : bool, default is False
        Save available nomenclature definitions to disk so
        that they can be accessed locally in later uses.
    ylim_Ang : int, default is 10
        Limit in Angstrom of the y-axis of the time-traces.
        Switch to any other float or 'auto' for automatic scaling
    n_jobs : int, default is 1
        Number of processors to use. The parallelization is
        done over trajectories and not over contacts, beyond
        n_jobs>n_trajs parallelization will not have any
        effect
    accept_guess : bool, default is False
        Accept mdciao's guesses regarding fragment
        identification using nomenclature labels
    table_ext : str, default is dat
        Extension for tabled files (.dat, .txt, .xlsx).
    output_desc :
        Descriptor for output files.
    plot_atomtypes : bool, default is False
        Add the atom-types to the frequency bars by
        'hatching' them. '--' is sidechain-sidechain '|' is
        backbone-backbone '\' is backbone-sidechain '/' is
        sidechain-backbone. See Fig XX for an example
    distro : bool, default is False
        Plot distance distributions instead of contact bar plots
    savefigs : bool, default is True
        Save the figures
    savetabs : bool, default is True
        Save the frequency tables
    savetrajs : bool, default is False
        Save the timetraces
    no_disk : bool, default is False
        If True, don't save any files at all:
        figs, tables, trajs, nomenclature
    figures : bool, default is True
        Draw figures
    plot_timedep : bool, default is True
        Plot time-traces of the contacts
    Returns
    -------
    CG_site : dictionary
        Keyed with the site name, its values are the
        :obj:`mdciao.contacts.ContactGroup`-objects,
        that conform each site

    """

    ylim_Ang = _np.float(ylim_Ang)
    _offer_to_create_dir(output_dir)
    xtcs, refgeom = _trajsNtop2xtcsNrefgeom(trajectories, topology)
    fn = _mdcu.str_and_dict.FilenameGenerator(output_desc, ctc_cutoff_Ang, output_dir,
                                              graphic_ext, table_ext, graphic_dpi, t_unit)
    if no_disk:
        savetrajs = False
        savefigs  = False
        savetabs = False
        save_nomenclature_files = False

    print("Will compute the sites\n %s\nin the trajectories:\n%s\n with a stride of %u frames.\n" % (
        "\n ".join([_mdcsites.site2str(ss) for ss in site_inputs]),
        _mdcu.str_and_dict.inform_about_trajectories(xtcs, only_show_first_and_last=15),stride))

    # TODO decide if/to expose _fragments_strings_to_fragments or refactor it elsewhere
    fragments_as_residue_idxs, user_wants_consensus = _mdcfrg.fragments._fragments_strings_to_fragments(fragments, refgeom.top, verbose=True)
    fragment_names = _parse_fragment_naming_options(fragment_names, fragments_as_residue_idxs)
    fragment_defs, consensus_maps, __ = \
        _parse_consensus_options_and_return_fragment_defs({"GPCR": GPCR_uniprot,
                                                           "CGN": CGN_PDB},
                                                          refgeom.top,
                                                          fragments_as_residue_idxs,
                                                          accept_guess=accept_guess,
                                                          save_nomenclature_files=save_nomenclature_files)
    sites = [_mdcsites.x2site(ff) for ff in site_inputs]
    ctc_idxs_small, site_maps = _mdcsites.sites_to_res_pairs(sites, refgeom.top,
                                                                    fragments=fragments_as_residue_idxs,
                                                                    default_fragment_idx=default_fragment_index,
                                                                    fragment_names=fragment_names)

    print('%10s  %10s  %10s  %10s %10s %10s' % tuple(("residue  residx fragment  resSeq GPCR  CGN".split())))
    for idx in _np.unique(ctc_idxs_small):
        print('%10s  %10u  %10u %10u %10s %10s' % (refgeom.top.residue(idx), idx, _mdcu.lists.in_what_fragment(idx,
                                                                                                               fragments_as_residue_idxs),
                                                   idx,
                                                   consensus_maps[0][idx], consensus_maps[1][idx]))

    ctcs, time_array, at_pair_trajs = _mdcctcs.trajs2ctcs(xtcs, refgeom.top, ctc_idxs_small, stride=stride,
                                       chunksize=chunksize_in_frames,
                                       return_times_and_atoms=True, consolidate=False, periodic=pbc,
                                       scheme=scheme,
                                       n_jobs=n_jobs)

    # Abstract each site to a group of contacts and fragments
    site_as_gc = {}
    for isite, imap in zip(sites,site_maps):
        key = isite["name"]
        site_as_gc[key] = []
        for idx in imap:
            pair = ctc_idxs_small[idx]
            consensus_labels = [_mdcnomenc.choose_between_consensus_dicts(idx, consensus_maps) for idx in pair]
            fragment_idxs = [_mdcu.lists.in_what_fragment(idx, fragments_as_residue_idxs) for idx in pair]
            site_as_gc[key].append(_mdcctcs.ContactPair(pair,
                                               [itraj[:, idx] for itraj in ctcs],
                                               time_array,
                                               top=refgeom.top,
                                               consensus_labels=consensus_labels,
                                               trajs=xtcs,
                                               fragment_idxs=fragment_idxs,
                                               fragment_names=[fragment_names[idx] for idx in fragment_idxs],
                                               atom_pair_trajs=[itraj[:, [idx * 2, idx * 2 + 1]] for itraj in
                                                                at_pair_trajs]
                                               #colors=[fragcolors[idx] for idx in idxs]
                                               ))
        site_as_gc[key] = _mdcctcs.ContactGroup(site_as_gc[key], name="site '%s'"%key)
    overall_fig = _mdcplots.CG_panels(4, site_as_gc, ctc_cutoff_Ang,
                               distro=distro,
                               short_AA_names=short_AA_names,
                               plot_atomtypes=plot_atomtypes,
                               verbose=True)

    if scheme!="closest-heavy":
        scheme_desc='%s.'%scheme
    else:
        scheme_desc=''

    overall_fig.tight_layout(h_pad=2, w_pad=0, pad=0)
    if any([savetabs,savefigs,savetrajs]):
        print("The following files have been created:")

    if savefigs:
        overall_fig.savefig(fn.fullpath_overall_fig, dpi=graphic_dpi)
        print(fn.fullpath_overall_fig)
    _plt.close(overall_fig)

    for site_name, isite_nh in site_as_gc.items():
        if savetabs:
            isite_nh.frequency_table(ctc_cutoff_Ang,
                                     fn.fname_per_site_table(site_name),
                                     write_interface=False,
                                     atom_types=True,
                                     # AA_format="long",
                                     )
            print(fn.fname_per_site_table(site_name))

    if figures and plot_timedep:
        for site_name, isite_nh in site_as_gc.items():
            panelheight = 4
            myfig = isite_nh.plot_timedep_ctcs(panelheight,
                                               color_scheme=_color_schemes(curve_color),
                                               ctc_cutoff_Ang=ctc_cutoff_Ang,
                                               n_smooth_hw=n_smooth_hw,
                                               dt=_mdcu.str_and_dict.tunit2tunit["ps"][t_unit],
                                               t_unit=t_unit,
                                               gray_background=gray_background,
                                               shorten_AAs=short_AA_names,
                                               plot_N_ctcs=True,
                                               ylim_Ang=ylim_Ang,
                                               )

            _manage_timedep_ploting_and_saving_options(isite_nh, fn, myfig,
                                                       plot_timedep=True,
                                                       separate_N_ctcs=False,
                                                       title="site: %s" % site_name,
                                                       savefigs=savefigs,
                                                       savetrajs=savetrajs
                                                       )


    return site_as_gc

def compare(datasets, graphic_ext=".pdf", output_desc="freq_comparison", pop=False, **kwargs):
    r"""

    Compare contact frequencies across different sets of data


    Parameters
    ----------
    datasets : iterable (list or dict)
        The datasets to compare with each other.
        If dict, then the keys will be used as names
        for the contact groups, e.g. "WT", "MUT" etc.
        If list, then  the keys will be auto-generated.
        The entries of the list/dictionary can be:
          * :obj:`~mdciao.contacts.ContactGroup` objects.
            For these, a :obj:`ctc_cutoff_Ang` value
            needs to be passed along, otherwise frequencies
            cannot be computed on-the-fly.
          * dictionaries where the keys are residue-pairs,
            one letter-codes, no fragment info,
            as in :obj:`mdciao.contacts.ContactGroup.ctc_labels_short`
            and the values are contact frequencies
          * files generated by (or in the same format as)
            :obj:`~mdciao.contacts.ContactGroup.frequency_table`

            * ascii-files with the contact labels in the second and frequencies in
              the third column, see :obj:`~mdciao.contacts.ContactGroup.frequency_str_ASCII_file`
            * .xlsx files with the header in the second row,
              containing at least the column-names "label" and "freqs", see
              :obj:`~mdciao.contacts.ContactGroup.frequency_spreadsheet`
    graphic_ext : str, default is ".pdf"
        The extension for figures
    output_desc : str, default is 'freq_comparison'
        Descriptor for output files.
    pop : bool, default is True
        Use :obj:`~matplotlib.pyplot.show` to
        force the figure to be drawn.
    kwargs : dict
        Optional arguments for
        :obj:`~mdciao.plots.compare_groups_of_contacts`

    Returns
    -------
    myfig : :obj:`~matplotlib.figure.Figure`
        Figure with the comparison plot
    freqs : dictionary
        Unified frequency dictionaries,
        including mutations and anchor
    plotted_freqs : dictionary
        Like :obj:`freqs` but sorted and purged
        according to the user-defined input options,
        s.t. it represents the plotted values
    """
    myfig, freqs, plotted_freqs = _mdcplots.compare_groups_of_contacts(datasets, **kwargs)
    myfig.tight_layout()

    output_desc=output_desc.strip(".").replace(" ","_")
    fname = "%s.%s" % (output_desc, graphic_ext.strip("."))
    print("Created files")
    myfig.savefig(fname)
    print(fname)
    fname_excel = "%s.xlsx" % output_desc
    writer = _ExcelWriter(fname_excel, engine='xlsxwriter')
    workbook = writer.book
    sheet1_name = "plotted frequencies"
    writer.sheets[sheet1_name] = workbook.add_worksheet(sheet1_name)
    offset = 0
    header = 'pairs by contact frequency'
    if "anchor" in kwargs.keys():
        header+= "(anchor was %s)"%kwargs["anchor"]
    writer.sheets[sheet1_name].write_string(0, offset,header
                                            )
    offset += 1
    _DF.from_dict(plotted_freqs).round({"freq": 2, "sum": 2}).to_excel(writer,
                                                                       sheet_name=sheet1_name,
                                                                       startrow=offset,
                                                                       startcol=0,
                                                                       )
    # offset = 0
    sheet2_name = "all frequencies"
    writer.sheets[sheet2_name] = workbook.add_worksheet(sheet2_name)
    writer.sheets[sheet2_name].write_string(offset, 0, 'pairs by contact frequency')
    _DF.from_dict(freqs).round({"freq": 2, "sum": 2}).to_excel(writer,
                                                               sheet_name=sheet2_name,
                                                               startrow=offset,
                                                               startcol=0,
                                                               )
    writer.save()
    print(fname_excel)
    if pop:
        myfig.tight_layout()
        _plt.show()

    return myfig, freqs, plotted_freqs

def pdb(code,
        filename=None,
        verbose=True,
        url="https://files.rcsb.org/download/",
        ):
    r""" Return a :obj:`~mdtraj.Trajectory` from a four-letter PDB code via RSCB PBB lookup

    Thinly wraps around :obj:`mdciao.pdb.pdb2traj`, which wraps around :obj:`mdtraj.load_pdb`
    and prints the corresponding citation.

    Will return None if lookup fails

    Parameters
    ----------
    code : str
        four-letter code, e.g. 3SN6
    filename : str, default is None
        if str, save to this file,
        eventually overwriting
    verbose : bool, default is False
        Be verbose
    url : str, default is 'https://files.rcsb.org/download'
        base URL for lookups

    Returns
    -------
    traj : :obj:`~mdtraj.Trajectory` or None
    """

    return _mdcpdb.pdb2traj(code, filename=filename, verbose=verbose,url=url)

def _res_resolver(res_range, top, fragments, midstring=None, GPCR_uniprot=None, CGN_PDB=None,
                  save_nomenclature_files=False, accept_guess=False, **rangeexpand_residues2residxs_kwargs):
    consensus_frags, consensus_maps, consensus_labelers = \
        _parse_consensus_options_and_return_fragment_defs({"GPCR": GPCR_uniprot,
                                                           "CGN": CGN_PDB},
                                                          top,
                                                          fragments,
                                                          verbose=True,
                                                          save_nomenclature_files=save_nomenclature_files,
                                                          accept_guess=accept_guess)
    consensus_maps = {"GPCR": consensus_maps[0],
                      "CGN": consensus_maps[1]}

    res_idxs_list = _mdcu.residue_and_atom.rangeexpand_residues2residxs(res_range, fragments, top,
                                                                        pick_this_fragment_by_default=None,
                                                                        additional_resnaming_dicts=consensus_maps,
                                                                        **rangeexpand_residues2residxs_kwargs,
                                                                        )

    if midstring is not None:
        print(midstring)

    header = '%10s  %10s  %10s  %10s %10s %10s' % tuple(("residue  residx fragment  resSeq GPCR  CGN".split()))
    print(header)
    for idx in res_idxs_list:
        print(_mdcu.residue_and_atom.residue_line("", top.residue(idx),
                                                  _mdcu.lists.in_what_fragment(idx, fragments),
                                                  consensus_maps=consensus_maps,
                                                  table=True))
    return res_idxs_list, consensus_maps, consensus_frags

def residue_selection(expression,
                      top, GPCR_uniprot=None,
                      CGN_PDB=None,
                      save_nomenclature_files=False,
                      accept_guess=False,
                      fragments=None):
    r"""
    Find residues in an input topology using Unix filename pattern matching
    like in an 'ls' Unix operation.

    Parameters
    ----------
    expression : str
        Unix-like expressions and ranges are allowed, e.g.
        'GLU,PH*,380-394,3.50,GH.5*.', as are consensus
        descriptors if consensus labels are provided
    top : str, :obj:`~mdtraj.Trajectory`, or :obj:`~mdtraj.Topology`
        The topology to use.
    GPCR_uniprot : str or :obj:`mdciao.nomenclature.LabelerGPCR`, default is None
        Try to find Ballesteros-Weinstein definitions. If str, e.g. "adrb2_human",
        try to locate a local filename or do a web lookup in the GPCRdb.
        If :obj:`~mdciao.nomenclature.LabelerGPCR`, use this object directly
        See :obj:`mdciao.nomenclature` for more info and references.
    CGN_PDB : str or :obj:`mdciao.nomenclature.LabelerCGN`, default is None
        Try to find Common G-alpha Numbering definitions. If str, e.g. "3SN6",
        try to locate local filenames ("3SN6.pdb", "CGN_3SN6.txt") or do web lookups
        in https://www.mrc-lmb.cam.ac.uk/CGN/ and http://www.rcsb.org/.
        If :obj:`mdciao.nomenclature.LabelerCGN`, use this object directly
    save_nomenclature_files : bool, default is False
        Save available nomenclature definitions to disk so :
    accept_guess : bool, default is False
        Accept mdciao's guesses regarding fragment
        identification using nomenclature labels
    fragments : list, default is None
        Fragment control.
        * None: use the default :obj:`~mdciao.fragments.get_fragments`,
          currently 'lig_resSeq+'
        * ["consensus"] : use things like "TM*" or "G.H*", i.e.
         Ballesteros-Weinstein or CGN-sub-subunit labels.
        * List of len 1 with some fragmentation heuristic, e.g.
         ["lig_resSeq+"]. will use the default of
         :obj:`mdciao.fragments.get_fragments`. See there for
         info on defaults and other heuristics.
        * List of len N that can mix different possibilities:
          * iterable of integers (lists or np.arrays, e.g. np.arange(20,30)
          * ranges expressed as integer strings, "20-30"
          * ranges expressed as residue descriptors ["GLU30-LEU40"]
        Numeric expressions are interepreted as zero-indexed and unique
        residue serial indices, i.e. 30-40 does not necessarily equate
        "GLU30-LEU40" unless serial and sequence index coincide.
        If there's more than one "GLU30", the user gets asked to
        disambiguate. The resulting fragments need not cover all of the topology,
        they only need to not overlap.

    Returns
    -------
    res_idxs_list : np.ndarray
        The residue indices of the residues
        that match the :obj:`expression`
    frags : list of integers
        Whatever fragments the user chose
    consensus_maps : dict
        Keys are currently just 'GPCR' and 'CGN'
        Values are lists of len :obj:`topology.n_residues`
        with the consensus labels. All labels
        will be None if no consensus info
        was provided

    """
    if isinstance(top,_md.Topology):
        _top = top
    else:
        _top = _load_any_geom(top).top

    if fragments is None:
        fragments = [_signature(_mdcfrg.get_fragments).parameters["method"].default]
    _frags, __ = _mdcfrg.fragments._fragments_strings_to_fragments(_mdcu.lists.force_iterable(fragments),
                                                                   _top, verbose=True)
    res_idxs_list, consensus_maps, __ = _res_resolver(expression, _top, _frags,
                                                      midstring="Your selection '%s' yields:" % expression,
                                                      GPCR_uniprot=GPCR_uniprot, CGN_PDB=CGN_PDB,
                                                      save_nomenclature_files=save_nomenclature_files,
                                                      accept_guess=accept_guess,
                                                      just_inform=True)

    return res_idxs_list, _frags, consensus_maps


def fragment_overview(topology,
             methods=['all'],
             AAs=None,
             ):

    """
    Prints the fragments obtained by :obj:`get_fragments` for the available methods.

    Optionally, you can pass along a list of residue
    descriptors to be printed after the fragments have
    been shown.

    Parameters
    ----------
    topology :  :obj:`mdtraj.Topology`
        The moleculr topology to fragment
    methods : str or list of strings
        method(s) to be used for obtaining fragments
    AAs : list, default is None
        Anything that :obj:`find_AA` can understand

    Returns
    -------
    fragments_out : dict
        The result of the fragmentation schemes keyed
        by their method name

    """

    return _mdcfrg.overview(topology,methods=methods, AAs=AAs)
