# This example demonstrates using the library to make an mmCIF file of a
# typical single-template single-chain homology or comparative model, similar
# to those generated by MODELLER (https://salilab.org/modeller/) and deposited
# in the ModBase database (https://modbase.compbio.ucsf.edu/)

# For a more complete (but less documented) script to convert a complete
# ModBase PDB file into a corresponding mmCIF or BinaryCIF file, see
# https://github.com/salilab/modbase_utils/blob/main/modbase_pdb_to_cif.py

# Import used classes.
import modelcif
import modelcif.protocol
import modelcif.model
import modelcif.dumper
import modelcif.reference
import modelcif.qa_metric
import modelcif.alignment
# Different methods measure "sequence identity" in different ways, so import
# the class that matches the way Modeller understands it (number of identical
# aligned residues, divided by the length of the shorter sequence)
from modelcif.alignment import ShorterSequenceIdentity as SequenceIdentity
import ihm.citations
import modelcif.reader

# First, we create a system, which contains everything we know about the
# modeling. A single mmCIF file can contain multiple Systems, but in most
# cases we use just one:
system = modelcif.System(title='S54091 hypothetical protein YPR070w')

# List the authors of this file (here these are the ModBase authors)
system.authors.extend(('Pieper U', 'Webb B', 'Narayanan E', 'Sali A'))

# Describe the software that was used in the modeling
modpipe_software = modelcif.Software(
    name='ModPipe', classification='comparative modeling',
    location='https://salilab.org/modpipe/', type='program',
    version='SVN.r1703', description='Comparative modeling pipeline')
# Every object we create must ultimately be linked to the System, which
# maintains simple lists for each type of object. For example, there is a
# list system.software (like system.authors above) which can be used for
# any Software object not referenced by any other object. But in this case
# we're going to use these Software objects further on in the script, so
# don't need to explicitly add them here.

modeller_software = modelcif.Software(
    name='MODELLER', classification='comparative modeling',
    location='https://salilab.org/modeller/', type='program',
    version='SVN', citation=ihm.citations.modeller,
    description='Comparative modeling by satisfaction of spatial restraints')

# Next, we define "entities", unique sequences in the system, as Entity
# objects. First, the template sequence:
template_e = modelcif.Entity('DMACDTFIKCC', description='Template subunit')

# Next, the target (model) sequence, together with a link to the reference
# sequence (in UniProt):
s = modelcif.reference.UniProt(code='MED1_YEAST', accession='Q12321')
model_e = modelcif.Entity('DSYVETLDCC', description='Model subunit',
                          references=[s])

# Next, we define asymmetric units for everything we modeled.
# These roughly correspond to chains in a traditional PDB file. Multiple
# asymmetric units may map to the same entity (for example if there are
# several copies of a given protein).
asymA = modelcif.AsymUnit(model_e, details='Model subunit A', id='A')

# Next, we group asymmetric units into assemblies.
modeled_assembly = modelcif.Assembly((asymA,), name='Modeled assembly')

# In a similar fashion, we declare a Template for each chain that we used
# as a template structure, with a link to the reference structure database
# (PDB).
s = modelcif.reference.PDB('3nc1')
template = modelcif.Template(
    entity=template_e, asym_id='A', model_num=1, name="Template Structure",
    transformation=modelcif.Transformation.identity(),
    references=[s])


# Now, we describe the alignment between target and template.
# python-ma provides various subclasses to use here. All ModBase structures
# use a simple pairwise global alignment between target and template, so
# declare a suitable class:
class Alignment(modelcif.alignment.Global, modelcif.alignment.Pairwise):
    pass


# An alignment consists of a list of aligned target-template segments.
# Here we provide the residue ranges and the actual alignment, including gaps,
# between the two, together with the sequence identity and any score available
# for the alignment (here we have the BLAST e-value):
p = modelcif.alignment.Pair(
    template=template.segment("DMACDTFIK", 1, 9),
    target=asymA.segment("DSYV-ETLD", 1, 8),
    score=modelcif.alignment.BLASTEValue(1e-15),
    identity=SequenceIdentity(45.0))
aln = Alignment(name="Modeling alignment", software=modpipe_software,
                pairs=[p])
# Alignments aren't used by any objects; they should be added directly
# to the System:
system.alignments.append(aln)

# For the actual model coordinates, we must subclass a suitable class and
# override the get_atoms() method to return a list of Atom objects. This design
# avoids having a separate copy of every atom in memory.
# Modeller models are comparative or homology models, so we subclass
# HomologyModel. For the purposes of this example, we just return a simple
# static list of atoms:

atoms = [('A', 1, 'C', 'CA', 1., 2., 3.),
         ('A', 2, 'C', 'CA', 4., 5., 6.),
         ('A', 3, 'C', 'CA', 7., 8., 9.),
         ('A', 4, 'C', 'CA', 10., 11., 12.)]


class MyModel(modelcif.model.HomologyModel):
    # Map our asym unit names to ModelCIF asym_unit objects:
    asym_unit_map = {'A': asymA}

    def get_atoms(self):
        for asym, seq_id, type_symbol, atom_id, x, y, z in atoms:
            yield modelcif.model.Atom(
                asym_unit=self.asym_unit_map[asym], type_symbol=type_symbol,
                seq_id=seq_id, atom_id=atom_id, x=x, y=y, z=z)


# Link the model to the Assembly that describes all subunits
model = MyModel(assembly=modeled_assembly, name='Best scoring model')

# Next, we describe the modeling protocol:
protocol = modelcif.protocol.Protocol()
protocol.steps.append(modelcif.protocol.TemplateSearchStep(
    name='ModPipe Seq-Prf (0001)', software=modpipe_software,
    input_data=model_e, output_data=aln))
protocol.steps.append(modelcif.protocol.ModelingStep(
    software=modeller_software, input_data=aln, output_data=model))
protocol.steps.append(modelcif.protocol.ModelSelectionStep(
    software=modpipe_software, input_data=model, output_data=model))
# Protocols aren't used by any other objects; they should be added directly
# to the System:
system.protocols.append(protocol)


# We can also attach quality scores to our model(s). To do this we must
# first define the scores by creating subclasses using a MetricMode
# (e.g. global, per-residue) and a MetricType (e.g. distance, z-score).
# Here we define the quality scores used by the ModPipe pipeline that is used
# by ModBase. Note that one score (MPQS) uses a custom metric type, while
# another (zDOPE) is a simple global z-score:
class MPQSMetricType(modelcif.qa_metric.MetricType):
    """composite score, values >1.1 are considered reliable"""


class MPQS(modelcif.qa_metric.Global, MPQSMetricType):
    """ModPipe Quality Score"""
    software = modpipe_software


class zDOPE(modelcif.qa_metric.Global, modelcif.qa_metric.ZScore):
    """Normalized DOPE"""
    software = modeller_software


class TSVModRMSD(modelcif.qa_metric.Global, modelcif.qa_metric.Distance):
    """TSVMod predicted RMSD (MSALL)"""
    software = None


class TSVModNO35(modelcif.qa_metric.Global,
                 modelcif.qa_metric.NormalizedScore):
    """TSVMod predicted native overlap (MSALL)"""
    software = None


# Add qa metrics to the model
model.qa_metrics.extend((MPQS(0.853452), zDOPE(0.31), TSVModRMSD(12.996),
                         TSVModNO35(0.143)))

# All ModBase QA metrics are global, but the library also supports per-residue
# or pairwise (between two residues) scores. Here's a fictional example for a
# z-score on the 4th residue of the first chain in the model, and a distance
# score between the 1st and 3rd residues:


class SomeLocalScore(modelcif.qa_metric.Local, modelcif.qa_metric.ZScore):
    """A per-residue z-score"""
    software = None


class SomePairScore(modelcif.qa_metric.LocalPairwise,
                    modelcif.qa_metric.Distance):
    """A distance score between two residues"""
    software = None


model.qa_metrics.append(SomeLocalScore(asymA.residue(4), -0.1))
model.qa_metrics.append(SomePairScore(asymA.residue(1), asymA.residue(3), 1.0))

# Models should be grouped together using ModelGroup and then added to the
# top-level System. Here we only have a single model in the group:
model_group = modelcif.model.ModelGroup([model], name='All models')
system.model_groups.append(model_group)

# Once the system is complete, we can write it out to an mmCIF file:
with open('output.cif', 'w') as fh:
    modelcif.dumper.write(fh, [system])

# We can also *read* an mmCIF file and create a set of Python objects from it.
# Here we read in the file we just created:
with open('output.cif') as fh:
    s, = modelcif.reader.read(fh)
for t in s.templates:
    print(t.name, "-".join(c.id for c in t.entity.sequence))
for e in s.entities:
    print(e.description, "-".join(c.id for c in e.sequence))
