| Home | Trees | Indices | Help |
|
|---|
|
|
1 """
2 APIs for working with protein structure fragments and libraries.
3
4 This package contains the nuts and bolts of HHfrag. Everything here revolves
5 around the L{Target} class, which describes a protein structure prediction
6 target. One typically assigns fragments (L{Assignment}s) to the target and then
7 builds a fragment library with L{RosettaFragsetFactory}.
8
9 @note: Internal or legacy objects are intentionally left undocumented.
10 This typically indicates experimental code.
11 """
12
13 import os
14 import numpy
15
16 import csb.io
17 import csb.core
18 import csb.bio.utils
19 import csb.bio.structure
20 import csb.bio.sequence
21
22 from csb.bio.structure import SecondaryStructure
26
27 ISites = 'IS'
28 HMMFragments = 'HH'
29 HHThread = 'TH'
30 HHfrag = HHThread
31 Rosetta = 'NN'
32
38
39 RANDOM_RMSD = { 5: 1.8749005857255376, 6: 2.4314283686276261, 7: 2.9021135267789608, 8: 3.2477716200172715, 9: 3.5469606556031708, 10: 3.8295465524456329,
40 11: 4.1343107114131783, 12: 4.3761697929053014, 13: 4.6707299668248394, 14: 4.9379016881069733, 15: 5.1809028645084911, 16: 5.4146957142595662,
41 17: 5.7135948448156988, 18: 5.9597935432566782, 19: 6.1337340535741962, 20: 6.3962825155503271, 21: 6.6107937773415166, 22: 6.8099096274123401,
42 23: 7.0435583846849639, 24: 7.2160956482560970, 25: 7.4547896324594962, 26: 7.6431870072434211, 27: 7.8727812194173836, 28: 8.0727393298443637,
43 29: 8.2551450998965326, 30: 8.4413583511786587, 31: 8.5958719774122052, 32: 8.7730435506242408, 33: 8.9970648837941649, 34: 9.1566521405105163,
44 35: 9.2828620878454728, 36: 9.4525824357923405, 37: 9.6322126445253300, 38: 9.7851684750961176, 39: 9.9891454649821476, 40: 10.124373939352028,
45 41: 10.284348528344765, 42: 10.390457305096271, 43: 10.565792044674239, 44: 10.676532740033737, 45: 10.789537132283652, 46: 11.004475543757550,
46 47: 11.064541647783571, 48: 11.231219875286985, 49: 11.319222637391441, 50: 11.485478165340824, 51: 11.607522494435521, 52: 11.700268836069840,
47 53: 11.831245255954073, 54: 11.918975893263905 }
50 """
51 Base class, representing a match between a fragment and its target.
52 """
53
55
56 self._id = id
57 self._qstart = qstart
58 self._qend = qend
59 self._probability = probability
60 self._rmsd = rmsd
61 self._tm_score = tm_score
62 self._qlength = qlength
63
64 @property
67
68 @property
71
72 @property
75
76 @property
79
80 @property
83
84 @property
87
88 @property
91
92 @property
95
96 @property
99
100 @property
103
104 @property
107
117
124
126 """
127 Fragment-based phi/psi angles predictor.
128
129 @param target: target protein, containing fragment assignments
130 @type target: L{Target}
131 @param threshold: RMSD distance threshold for L{FragmentCluster}-based filtering
132 @type threshold: float
133 @param extend: pick alternative, longer cluster reps, if possible
134 @type extend: bool
135 @param init: populate all L{FragmentCluster}s on instantiation. If False, this step
136 will be performed on demand (the first time C{predictor.compute()} is invoked)
137
138 @note: if C{init} is False, the first call to C{predictor.compute()} might take a long
139 time. Subsequent calls will be very fast.
140 """
141
143
144 if not isinstance(target, Target):
145 raise TypeError(target)
146 if target.matches.length == 0:
147 raise ValueError('This target has no fragment assignments')
148
149 self._target = target
150 self._threshold = float(threshold)
151 self._extend = bool(extend)
152
153 self._initialized = False
154 self._reps = {}
155 self._clusters = {}
156
157 if init:
158 self.init()
159
160 @property
163
164 @property
167
168 @property
171
173 """
174 Compute and cache all L{FragmentCluster}s.
175 """
176
177 self._reps = {}
178 self._clusters = {}
179
180 for residue in self.target.residues:
181 cluster = self._filter(residue)
182
183 if cluster is not None:
184 rep = cluster.centroid()
185 if rep.has_alternative:
186 rep.exchange()
187
188 self._reps[residue.native.rank] = rep
189 self._clusters[residue.native.rank] = cluster.items
190
191 self._initialized = True
192
194
195 try:
196 nodes = []
197 for ai in residue.assignments:
198 node = ClusterNode.create(ai.fragment)
199 nodes.append(node)
200
201 cluster = FragmentCluster(nodes, threshold=self.threshold)
202 cluster.shrink(minitems=0)
203
204 return cluster
205
206 except (ClusterExhaustedError, ClusterDivergingError):
207 return None
208
210
211 for r in self._target.residues:
212 if r.native.rank == rank:
213 return r
214
215 raise ValueError('Rank {0} is out of range'.format(rank))
216
218 """
219 Extract torsion angles from the L{ClusterRep} at residue C{#rank}.
220
221 @param rank: target residue rank
222 @type rank: int
223
224 @rtype: L{TorsionPredictionInfo}
225 """
226
227 residue = self._residue(rank)
228 rep = residue.filter(threshold=self.threshold, extend=self.extend)
229
230 if rep is None:
231 return None
232
233 else:
234 fragment = rep.centroid
235 torsion = fragment.torsion_at(rank, rank)[0]
236 ss = fragment.sec_structure_at(rank, rank)[0]
237
238 return TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
239
241 """
242 Extract torsion angles from all L{ClusterRep}s, covering residue C{#rank}.
243
244 @param rank: target residue rank
245 @type rank: int
246
247 @return: L{TorsionPredictionInfo} instances, sorted by confidence
248 @rtype: tuple of L{TorsionPredictionInfo}
249 """
250
251 if not self._initialized:
252 self.init()
253
254 prediction = []
255
256 for rep in self._reps.values():
257
258 if rep.centroid.qstart <= rank <= rep.centroid.qend:
259
260 fragment = rep.centroid
261 torsion = fragment.torsion_at(rank, rank)[0]
262 ss = fragment.sec_structure_at(rank, rank)[0]
263 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss)
264
265 if rep is self._reps.get(rank, None):
266 info.primary = True
267
268 prediction.append(info)
269
270 prediction.sort(reverse=True)
271 return tuple(prediction)
272
274 """
275 Filter the current fragment map and create a new, completely flat,
276 non-overlapping map built from centroids, assigned iteratively by
277 decreasing confidence. Centroids with lower confidence which overlap
278 with previously assigned centroids will be trimmed to fill existing
279 gaps only.
280
281 @return: L{TorsionPredictionInfo} instances, one for each target residue
282 @rtype: tuple of L{TorsionPredictionInfo}
283 """
284
285 if not self._initialized:
286 self.init()
287
288 prediction = []
289 slots = set(range(1, self.target.length + 1))
290
291 reps = list(self._reps.values())
292 reps.sort(key=lambda i: i.confidence, reverse=True)
293
294 for rep in reps:
295
296 for rank in range(rep.centroid.qstart, rep.centroid.qend + 1):
297 if rank in slots:
298 torsion = rep.centroid.torsion_at(rank, rank)[0]
299 ss = rep.centroid.sec_structure_at(rank, rank)[0]
300 info = TorsionPredictionInfo(rank, rep.confidence, torsion, ss, primary=True)
301
302 prediction.append(info)
303 slots.remove(rank)
304
305 for rank in slots:
306 prediction.append(TorsionPredictionInfo(rank, 0, None))
307
308 prediction.sort(key=lambda i: i.rank)
309 return tuple(prediction)
310
312 """
313 Extract all torsion angles coming from all fragments, which had survived
314 the filtering and cover residue C{#rank}.
315
316 @param rank: target residue rank
317 @type rank: int
318
319 @return: all L{TorsionAngles} for a cluster at the specified residue
320 @rtype: tuple of L{TorsionAngles}
321 """
322
323 if not self._initialized:
324 self.init()
325 if rank not in self._clusters:
326 return tuple()
327
328 angles = []
329
330 for node in self._clusters[rank]:
331 fragment = node.fragment
332 torsion = fragment.torsion_at(rank, rank)[0]
333 angles.append(torsion)
334
335 return tuple(angles)
336
339 """
340 Struct container for a single torsion angle prediction.
341
342 @param rank: target residue rank
343 @type rank: int
344 @param confidence: confidence of prediction
345 @type confidence: float
346 @param torsion: assigned phi/psi/omega angles
347 @type torsion: L{TorsionAngles}
348 @param dssp: assigned secondary structure
349 @type dssp: L{SecondaryStructureElement}
350 @param primary: if True, designates that the assigned angles are extracted
351 from the L{ClusterRep} at residue C{#rank}; otherwise: the
352 angles are coming from another, overlapping L{ClusterRep}
353
354 """
355
357
358 self.rank = rank
359 self.confidence = confidence
360 self.torsion = torsion
361 self.primary = primary
362 self.dssp = dssp
363
365 """
366 @return: convert this prediction to a tuple: (confidence, phi, psi, omega)
367 @rtype: tuple
368 """
369 return tuple([self.confidence, self.torsion.phi, self.torsion.psi, self.torsion.omega])
370
372 return '<TorsionPredictionInfo: {0.confidence:6.3f} at #{0.rank}>'.format(self)
373
376
388
399
401 """
402 Represents a protein structure prediction target.
403
404 @param id: target sequence ID, in PDB accnC format
405 @type id: str
406 @param length: total target sequence length
407 @type length: int
408 @param residues: a list, containing target's residues. See also
409 L{Target.from_sequence}
410 @type residues: iterable of L{csb.bio.structure.ProteinResidue}s
411 """
412
413 - def __init__(self, id, length, residues, overlap=None, segments=None, factory=AssignmentFactory()):
414
415 self._id = id
416 self._accession = id[:-1]
417 self._chain_id = id[-1]
418 self._length = length
419 self._overlap = overlap
420 self._factory = factory
421
422 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
423 self._errors = csb.core.CollectionContainer()
424
425 resi = [factory.residue(native) for native in residues]
426 self._residues = csb.core.ReadOnlyCollectionContainer(items=resi,
427 type=TargetResidue, start_index=1)
428
429 if segments is not None:
430 segments = dict([(s.start, s) for s in segments])
431 self._segments = csb.core.ReadOnlyDictionaryContainer(items=segments)
432
433 @staticmethod
435 """
436 Factory, which builds L{Target} objects from a bare sequence.
437
438 @param sequence: target's sequence
439 @type sequence: L{csb.bio.sequence.AbstractSequence}, str or iterable
440
441 @rtype: L{Target}
442 """
443
444 if isinstance(sequence, csb.bio.sequence.Sequence):
445 sequence = sequence.sequence
446
447 residues = []
448
449 for rn, aa in enumerate(sequence, start=1):
450 residue = csb.bio.structure.ProteinResidue(rank=rn, type=aa)
451 residues.append(residue)
452
453 return Target(id, len(residues), residues)
454
455 @staticmethod
457 """
458 Factory, which builds L{Target} objects from an HMM profile.
459
460 @param hmm: target's HMM
461 @type hmm: L{csb.bio.hmm.ProfileHMM}
462
463 @rtype: L{Target}
464 """
465
466 residues = [ r.clone() for r in hmm.residues ]
467 return Target(hmm.id, hmm.layers.length, residues)
468
469 @staticmethod
474
475 @property
478
479 @property
482
483 @property
486
487 @property
490
491 @property
494
495 @property
498
499 @property
502
503 @property
506
507 @property
510
511 @property
514
515 @property
518
520 """
521 Add a new fragment match.
522 @param fragment: fragment to assign
523 @type fragment: L{Assignment}
524 """
525
526 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
527 raise ValueError("Fragment out of range")
528
529 self._assignments._append_item(fragment)
530
531 for rank in range(fragment.qstart, fragment.qend + 1):
532 ai = ResidueAssignmentInfo(fragment, rank)
533 self._residues[rank].assign(ai)
534
535 if fragment.segment is not None:
536 try:
537 self._segments[fragment.segment].assign(fragment)
538 except KeyError:
539 raise ValueError("Undefined segment starting at {0}".format(fragment.segment))
540
542 """
543 Assign a bunch of fragments at once.
544 @type fragments: iterable of L{Assignment}s
545 """
546 for frag in fragments:
547 self.assign(frag)
548
550 """
551 Filter the current fragment map using a L{FragmentCluster}.
552
553 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
554 @type threshold: float
555 @param extend: pick extended alternatives where possible (default=False)
556 @type extend: bool
557
558 @return: a new target, containing only cluster centroids/reps
559 @rtype: L{Target}
560 """
561
562 target = self.clone()
563
564 for residue in self.residues:
565 rep = residue.filter(threshold=threshold, extend=extend)
566
567 if rep is not None:
568 target.assign(rep.centroid)
569
570 return target
571
573 """
574 @return: a deep copy of the target
575 @rtype: L{Target}
576 """
577
578 segments = [self.segments[start] for start in self.segments]
579 segments = [TargetSegment(s.start, s.end, s.count) for s in segments]
580
581 target = self._factory.target(self.id, self.length, [r.native for r in self.residues],
582 overlap=self._overlap, segments=segments)
583
584 return target
585
587
589
590 super(ChemShiftTarget, self).__init__(id, length, residues, overlap=overlap,
591 segments=None, factory=ChemShiftAssignmentFactory())
592
594
595 if not 1 <= fragment.qstart <= fragment.qend <= len(self._residues):
596 raise ValueError("Fragment out of range")
597
598 self._assignments._append_item(fragment)
599
600 rank = fragment.qstart
601 ai = ResidueAssignmentInfo(fragment, rank)
602 self._residues[rank].assign(ai)
603
607
609 """
610 Wrapper around L{Target}'s native residues. Decorates them with additional,
611 fragment-related methods.
612
613 @type native_residue: L{csb.bio.structure.ProteinResidue}
614 """
615
617
618 self._type = native_residue.type
619 self._native = native_residue.clone()
620 self._assignments = csb.core.ReadOnlyCollectionContainer(type=ResidueAssignmentInfo)
621
622 @property
625
626 @property
629
630 @property
633
636
638 """
639 @return: the fragment with the lowest RMSD at this position in the L{Target}
640 @rtype: L{Assignment}
641 """
642
643 best = None
644
645 for ai in self.assignments:
646 a = ai.fragment
647 if a.length < FragmentCluster.MIN_LENGTH:
648 continue
649 if best is None or a.rmsd < best.rmsd:
650 best = a
651 elif a.rmsd == best.rmsd and a.length > best.length:
652 best = a
653
654 return best
655
657 """
658 Filter all fragments, covering this position in the L{Target} using a
659 L{FragmentCluster}.
660
661 @param method: one of the L{Metrics} members (default=L{Metrics.RMSD})
662 @type method: str
663 @param threshold: cluster RMSD threshold (see L{FragmentCluster})
664 @type threshold: float
665 @param extend: pick extended alternative where possible (default=False)
666 @type extend: bool
667
668 @return: cluster's representative (if converged) or None
669 @rtype: L{ClusterRep} or None
670 """
671
672 try:
673 nodes = []
674 for ai in self.assignments:
675 node = ClusterNode.create(ai.fragment, method, extend)
676 nodes.append(node)
677
678 cluster = FragmentCluster(nodes, threshold=threshold)
679
680 center = cluster.shrink(minitems=0)
681 if center.has_alternative:
682 center.exchange()
683
684 return center
685
686 except (ClusterExhaustedError, ClusterDivergingError):
687 return None
688
690 """
691 @return: the longest fragment, covering the current position
692 @rtype: L{Assignment}
693 """
694 best = None
695
696 for q in self.assignments:
697 if best is None or (q.fragment.length > best.length):
698 best = q.fragment
699
700 return best
701
703 """
704 @return: the residue-wise precision of the fragment library at the
705 current position (percentage).
706
707 @param threshold: true-positive RMSD cutoff (default=1.5)
708 @type threshold: float
709 @rtype: float
710 """
711
712 if self.assignments.length < 1:
713 return None
714 else:
715 positive = [a for a in self.assignments if a.fragment.rmsd <= threshold]
716 pos = len(positive) * 100.0 / self.assignments.length
717
718 return pos
719
721
723
724 best = None
725
726 for ai in self.assignments:
727 a = ai.fragment
728
729 if a.score < ChemShiftAssignment.BIT_SCORE_THRESHOLD * a.window:
730 continue
731
732 if best is None or a.score > best.score:
733 best = a
734 elif a.score == best.score and a.length > best.length:
735 best = a
736
737 return best
738
740
742
743 self._start = start
744 self._end = end
745 self._count = count
746
747 self._assignments = csb.core.ReadOnlyCollectionContainer(type=Assignment)
748
749 @property
752
753 @property
756
757 @property
760
761 @property
764
765 @property
768
770 if fragment.segment != self.start:
771 raise ValueError('Segment origin mismatch: {0} vs {1}'.format(fragment.segment, self.start))
772 else:
773 self._assignments._append_item(fragment)
774
776
777 best = None
778
779 for a in self.assignments:
780 if a.length < FragmentCluster.MIN_LENGTH:
781 continue
782 if best is None or a.rmsd < best.rmsd:
783 best = a
784 elif a.rmsd == best.rmsd and a.length > best.length:
785 best = a
786
787 return best
788
790
791 try:
792 cluster = FragmentCluster(self.assignments, threshold=1.5,
793 connectedness=0.5, method=method)
794 centroid = cluster.shrink(minitems=1)
795 return centroid
796
797 except ClusterExhaustedError:
798 return None
799 finally:
800 del cluster
801
803
804 best = None
805
806 for q in self.assignments:
807 if best is None or (q.length > best.length):
808 best = q
809
810 return best
811
813
814 rmsds = []
815
816 for q in self.assignments:
817 for s in self.assignments:
818 if q is not s:
819 r = q.rmsd_to(s, min_overlap)
820 if r is not None:
821 rmsds.append(r)
822 else:
823 assert q.rmsd_to(s, 1) < 0.01
824
825 return rmsds
826
828
829 mdas = []
830
831 for q in self.assignments:
832 for s in self.assignments:
833 if q is not s:
834 m = q.mda_to(s, min_overlap)
835 if m is not None:
836 mdas.append(m)
837 return mdas
838
840
841 from csb.bio.hmm import RELATIVE_SA
842 from csb.bio.io.hhpred import ScoreUnits, HHProfileParser
843
844 def convert_sa(sa):
845 return numpy.array([ RELATIVE_SA[i] for i in sa ])
846
847 sources = {}
848 scores = []
849
850 for q in self.assignments:
851 for s in self.assignments:
852
853 if s.source_id not in sources:
854 hmm = HHProfileParser(os.path.join(profiles, s.source_id + '.hhm')).parse()
855 sources[s.source_id] = hmm.dssp_solvent
856
857 if q is not s:
858
859 common = q.overlap(s)
860 if len(common) >= min_overlap:
861
862 qsa = q.solvent_at(sources[q.source_id], min(common), max(common))
863 ssa = s.solvent_at(sources[s.source_id], min(common), max(common))
864
865 if '-' in qsa + ssa:
866 continue
867
868 qsa = convert_sa(qsa)
869 ssa = convert_sa(ssa)
870 assert len(qsa) == len(ssa)
871 sa_rmsd = numpy.sqrt(numpy.sum((qsa - ssa) ** 2) / float(len(qsa)))
872
873 scores.append(sa_rmsd)
874 return scores
875
877
878 from csb.bio.hmm import BACKGROUND
879 back = numpy.sqrt(numpy.array(BACKGROUND))
880
881 sources = {}
882 scores = []
883
884 for q in self.assignments:
885 for s in self.assignments:
886
887 if s.source_id not in sources:
888 # hmm = HHProfileParser(os.path.join(hmm_path, s.source_id + '.hhm')).parse(ScoreUnits.Probability)
889 sources[s.source_id] = csb.io.Pickle.load(open(os.path.join(profiles, s.source_id + '.pkl'), 'rb'))
890
891 if q is not s:
892
893 common = q.overlap(s)
894 if len(common) >= min_overlap:
895
896 qprof = q.profile_at(sources[q.source_id], min(common), max(common))
897 sprof = s.profile_at(sources[s.source_id], min(common), max(common))
898
899 #score = qhmm.emission_similarity(shmm)
900 assert len(qprof) == len(sprof)
901 dots = [ numpy.dot(qprof[i] / back, sprof[i] / back) for i in range(len(qprof)) ]
902 score = numpy.log(numpy.prod(dots))
903 if score is not None:
904 scores.append(score)
905 return scores
906
908
909 binsize = float(binsize)
910 bins = numpy.ceil(numpy.array(data) / binsize)
911
912 hist = dict.fromkeys(bins, 0)
913 for bin in bins:
914 hist[bin] += (1.0 / len(bins))
915
916 freq = numpy.array(hist.values())
917 return - numpy.sum(freq * numpy.log(freq))
918
923
928
930
931 rmsds = self.pairwise_rmsd()
932
933 if len(rmsds) < 1:
934 return None
935
936 return sum([1 for i in rmsds if i <= threshold]) / float(len(rmsds))
937
939
940 sa_rmsds = self.pairwise_sa_rmsd(profiles=profiles)
941
942 if len(sa_rmsds) < 1:
943 return None
944
945 return sum([1 for i in sa_rmsds if i <= threshold]) / float(len(sa_rmsds))
946
948
949 if self.assignments.length < 1:
950 return None
951
952 return sum([1 for i in self.assignments if i.rmsd <= threshold]) / float(self.assignments.length)
953
955
956 cons = self.rmsd_consistency()
957
958 if cons is None:
959 return 0
960 else:
961 return numpy.log10(self.count) * cons
962
964
966
967 if not assignment.qstart <= rank <= assignment.qend:
968 raise ValueError('Rank {0} is not matched by this assignment')
969
970 self._assignment = assignment
971 self._rank = rank
972 self._relrank = rank - assignment.qstart
973
974 @property
976 return self._assignment.backbone[self._relrank]
977
978 @property
981
983 """
984 Represents a match between a fragment and its target.
985
986 @param source: source structure (must have torsion angles precomputed)
987 @type source: L{csb.bio.structure.Chain}
988 @param start: start position in C{source} (rank)
989 @type start: int
990 @param end: end position in C{source} (rank)
991 @type end: int
992 @param id: fragment ID
993 @type id: str
994 @param qstart: start position in target (rank)
995 @type qstart: int
996 @param qend: end position in target (rank)
997 @type qend: int
998 @param probability: probability of assignment
999 @type probability: float
1000 @param rmsd: RMSD of the fragment, compared to target's native structure
1001 @type rmsd: float
1002 """
1003
1004 - def __init__(self, source, start, end, qstart, qend, id=None, probability=None, rmsd=None,
1005 tm_score=None, score=None, neff=None, segment=None, internal_id=None):
1006
1007 assert source.has_torsion
1008 sub = source.subregion(start, end, clone=True)
1009 try:
1010 calpha = [r.atoms['CA'].vector.copy() for r in sub.residues]
1011 except csb.core.ItemNotFoundError:
1012 raise csb.bio.structure.Broken3DStructureError()
1013 torsion = [r.torsion.copy() for r in sub.residues]
1014
1015 self._calpha = csb.core.ReadOnlyCollectionContainer(items=calpha, type=numpy.ndarray)
1016 self._torsion = torsion
1017 self._sequence = sub.sequence
1018
1019 self._source_id = source.accession[:4] + source.id
1020 self._start = start
1021 self._end = end
1022
1023 self._score = score
1024 self._neff = neff
1025 self._ss = None
1026
1027 self._segment_start = segment
1028 self.internal_id = internal_id
1029
1030 if id is None:
1031 id = "{0}:{1}-{2}".format(self.source_id, self.start, self.end)
1032
1033 super(Assignment, self).__init__(id, qstart, qend, probability, rmsd, tm_score, None)
1034
1035 self._ss = SecondaryStructure('-' * self.length)
1036
1037 @staticmethod
1039 """
1040 Create a new L{Assignment} given a source rosetta fragment.
1041
1042 @param fragment: rosetta fragment
1043 @type fragment: L{RosettaFragment}
1044 @param provider: PDB database provider
1045 @type provider: L{StructureProvider}
1046
1047 @rtype: L{Assignment}
1048 """
1049 try:
1050 structure = provider.get(fragment.accession)
1051 except KeyError:
1052 structure = provider.get(fragment.source_id)
1053 source = structure.chains[fragment.chain]
1054 source.compute_torsion()
1055
1056 id = "{0}:{1}-{2}".format(fragment.source_id, fragment.start, fragment.end)
1057
1058 return Assignment(source, fragment.start, fragment.end,
1059 fragment.qstart, fragment.qend, id, 0, 0)
1060
1061 @property
1064
1065 @property
1068
1069 @property
1072
1073 @property
1076
1077 @property
1080
1081 @property
1084
1085 @property
1088
1089 @property
1092
1093 @property
1096
1097 @property
1100 @secondary_structure.setter
1102
1103 if isinstance(value, csb.core.string):
1104 value = csb.bio.structure.SecondaryStructure(value)
1105 if len(str(value)) != self.length:#(value.end - value.start + 1) != self.length:
1106 raise ValueError("Invalid secondary structure length", len(str(value)), self.length )
1107
1108 self._ss = value
1109
1111 """
1112 Apply rotation/translation to fragment's coordinates in place.
1113 """
1114
1115 for ca in self.backbone:
1116 newca = numpy.dot(ca, numpy.transpose(rotation)) + translation
1117 for i in range(3):
1118 ca[i] = newca[i]
1119
1121
1122 if not (self.qstart <= qstart <= qend <= self.qend):
1123 raise ValueError('Region {0}..{1} is out of range {2.qstart}..{2.qend}'.format(qstart, qend, self))
1124
1126 """
1127 @return: True if the fragment is centered around position=C{rank}.
1128 @rtype: bool
1129 """
1130
1131 if self.qstart < rank < self.qend:
1132 if (rank - self.qstart + 1) > 0.4 * (self.qend - self.qstart + 1):
1133 return True
1134
1135 return False
1136
1138 """
1139 @return: the CA coordinates of the fragment at the specified subregion.
1140 @rtype: list
1141 """
1142
1143 self._check_range(qstart, qend)
1144
1145 relstart = qstart - self.qstart
1146 relend = qend - self.qstart + 1
1147
1148 return self.backbone[relstart : relend]
1149
1151 """
1152 @return: the torsion angles of the fragment at the specified subregion.
1153 @rtype: list
1154 """
1155
1156 self._check_range(qstart, qend)
1157
1158 relstart = qstart - self.qstart
1159 relend = qend - self.qstart + 1
1160
1161 return self.torsion[relstart : relend]
1162
1164
1165 self._check_range(qstart, qend)
1166
1167 relstart = qstart - self.qstart
1168 relend = qend - self.qstart + 1
1169
1170 return sa_string[relstart : relend]
1171
1173
1174 self._check_range(qstart, qend)
1175 start = qstart - self.qstart + 1
1176 end = qend - self.qstart + 1
1177
1178 return self.secondary_structure.scan(start, end, loose=True, cut=True)
1179
1181
1182 self._check_range(qstart, qend)
1183
1184 start = qstart - self.qstart + self.start
1185 end = qend - self.qstart + self.start
1186
1187 if hasattr(source, 'subregion'):
1188 return source.subregion(start, end)
1189 else:
1190 return source[start - 1 : end]
1191
1193
1194 self._check_range(qstart, qend)
1195
1196 start = qstart - self.qstart + self.start
1197 end = qend - self.qstart + self.start
1198
1199 return source.subregion(start, end)
1200
1202 """
1203 @type other: L{Assignment}
1204 @return: target positions, covered by both C{self} and C{other}
1205 @rtype: set of int
1206 """
1207
1208 qranks = set(range(self.qstart, self.qend + 1))
1209 sranks = set(range(other.qstart, other.qend + 1))
1210
1211 return qranks.intersection(sranks)
1212
1214 """
1215 @return: the CA RMSD between C{self} and C{other}.
1216
1217 @param other: another fragment
1218 @type other: L{Assignment}
1219 @param min_overlap: require at least that number of overlapping residues
1220 (return None if not satisfied)
1221 @type min_overlap: int
1222
1223 @rtype: float
1224 """
1225
1226 common = self.overlap(other)
1227
1228 if len(common) >= min_overlap:
1229
1230 qstart, qend = min(common), max(common)
1231
1232 q = self.backbone_at(qstart, qend)
1233 s = other.backbone_at(qstart, qend)
1234
1235 if len(q) > 0 and len(s) > 0:
1236 return csb.bio.utils.rmsd(numpy.array(q), numpy.array(s))
1237
1238 return None
1239
1241
1242 common = self.overlap(other)
1243
1244 if len(common) >= min_overlap:
1245
1246 qstart, qend = min(common), max(common)
1247
1248 q = self.backbone_at(qstart, qend)
1249 s = other.backbone_at(qstart, qend)
1250
1251 if len(q) > 0 and len(s) > 0:
1252 return csb.bio.utils.rmsd(q, s) / RANDOM_RMSD[ len(common) ]
1253
1254 return None
1255
1257
1258 common = self.overlap(other)
1259
1260 if len(common) >= min_overlap:
1261
1262 qstart, qend = min(common), max(common)
1263
1264 q = self.torsion_at(qstart, qend)
1265 s = other.torsion_at(qstart, qend)
1266
1267 if len(q) > 0 and len(s) > 0:
1268
1269 maxphi = max(numpy.abs(i.phi - j.phi) for i, j in zip(q, s)[1:]) # phi: 2 .. L
1270 maxpsi = max(numpy.abs(i.psi - j.psi) for i, j in zip(q, s)[:-1]) # psi: 1 .. L-1
1271
1272 return max(maxphi, maxpsi)
1273
1274 return None
1275
1277 """
1278 @deprecated: this method will be deleted soon. Use
1279 L{csb.bio.fragments.rosetta.OutputBuilder} instead.
1280 """
1281 stream = csb.io.MemoryStream()
1282
1283 if weight is None:
1284 weight = self.probability
1285 if not qstart:
1286 qstart = self.qstart
1287 if not qend:
1288 qend = self.qend
1289
1290 source.compute_torsion()
1291 chain = self.chain_at(source, qstart, qend)
1292
1293 for i, r in enumerate(chain.residues):
1294
1295 acc = self.source_id[:4]
1296 ch = self.source_id[4].upper()
1297
1298 start = qstart - self.qstart + self.start + i
1299 aa = r.type
1300 ss = 'L'
1301 phi, psi, omega = 0, 0, 0
1302 if r.torsion.phi:
1303 phi = r.torsion.phi
1304 if r.torsion.psi:
1305 psi = r.torsion.psi
1306 if r.torsion.omega:
1307 omega = r.torsion.omega
1308
1309 stream.write(' {0:4} {1:1} {2:>5} {3!s:1} {4!s:1} {5:>8.3f} {6:>8.3f} {7:>8.3f} {8:>8.3f}\n'.format(acc, ch, start, aa, ss, phi, psi, omega, weight))
1310
1311 return stream.getvalue()
1312
1314
1315 BIT_SCORE_THRESHOLD = 1.1
1316
1318
1319 self._window = window
1320
1321 super(ChemShiftAssignment, self).__init__(
1322 source, start, end, qstart, qend, id=None, probability=1.0,
1323 rmsd=rmsd, tm_score=None, score=score, neff=None, segment=None, internal_id=None)
1324
1325 @property
1328
1331
1334
1337
1339 """
1340 Provides clustering/filtering of the fragments, covering a common residue
1341 in the target. Clustering is done via iterative shrinking of the cluster.
1342 At each iteration, node rejection (deletion) is attempted for each node. The
1343 node rejection, causing the most significant drop in the average pairwise
1344 distance (RMSD) in the cluster, is retained. This procedure is repeated
1345 until: 1) the average pairwise RMSD drops below the C{threshold} (converged),
1346 2) the cluster gets exhausted or 3) node rejection no longer
1347 causes a drop in the average distance (not converging).
1348
1349 @param items: cluster members
1350 @type items: iterable of L{ClusterNode}s
1351 @param threshold: RMSD threshold; continue shrinking until the mean distance
1352 drops below this value (default=1.5)
1353 @type threshold: float
1354 @param connectedness: use only nodes which are connected to at least c% of all
1355 initial nodes (default=0.5, that means 50%)
1356 @type connectedness: float
1357 """
1358
1359 MIN_LENGTH = 6
1360
1362
1363 items = set(i for i in items if i.fragment.length >= FragmentCluster.MIN_LENGTH)
1364
1365 self._matrix = {}
1366 self._threshold = float(threshold)
1367 self._connectedness = float(connectedness)
1368 self._weight = 0
1369 self._edges = 0
1370
1371 for i in items:
1372
1373 self._matrix[i] = {}
1374 #conn = 0.0
1375
1376 for j in items:
1377 distance = i.distance(j)
1378 if distance is not None:
1379 #conn += 1
1380 self._matrix[i][j] = distance
1381 self._edges += 1
1382 self._weight += distance
1383 i.weight += distance
1384
1385 #if conn / len(items) < self.connectedness:
1386 # # reject i as a first class node
1387 # del self._matrix[i]
1388
1389 self._items = set(self._matrix.keys())
1390
1391 if len(self._items) < 1:
1392 raise ClusterEmptyError()
1393
1394 self._initcount = self.count
1395
1396 @property
1399
1400 @property
1403
1404 @property
1406 return tuple(i.fragment for i in self._items)
1407
1408 @property
1411 @threshold.setter
1413 self._threshold = float(value)
1414
1415 @property
1418
1420
1421 d = []
1422
1423 for i in self._matrix:
1424 if skip is i:
1425 continue
1426
1427 for j in self._matrix[i]:
1428 if skip is not j:
1429 d.append(self._matrix[i][j])
1430
1431 return d
1432
1439
1441 """
1442 @return: the current mean distance in the cluster
1443 @rtype: float
1444 """
1445 if self._edges == 0:
1446 raise ClusterExhaustedError()
1447
1448 if not skip:
1449 return float(self._weight) / self._edges
1450
1451 else:
1452 weight = self._weight - 2 * skip.weight
1453 edges = self._edges - 2 * len(self._matrix[skip])
1454
1455 if edges < 1:
1456 return 0
1457 else:
1458 return float(weight) / edges
1459
1461 """
1462 @return: the current representative fragment
1463 @rtype: L{ClusterRep}
1464
1465 @note: the cluster rep is the node with the lowest average distance
1466 to all other nodes. If a fixed fragment exists, structurally similar
1467 to the rep, but longer, this fragment may be suggested as an alternative
1468 (see also L{ClusterRep}).
1469 """
1470
1471 alt = None
1472 cen = None
1473 avg = None
1474
1475 for i in self._matrix:
1476
1477 curravg = float(i.weight) / len(self._matrix[i])
1478 conn = len(self._matrix[i]) / float(self.count)
1479
1480 if avg is None or (curravg < avg and conn >= self.connectedness):
1481 avg = curravg
1482 cen = i
1483 elif curravg == avg:
1484 if i.fragment.length > cen.fragment.length:
1485 cen = i
1486
1487 d = self._distances()
1488 mean = numpy.mean(d)
1489 cons = sum(1.0 for i in d if i <= self.threshold) / len(d)
1490
1491 for i in self._matrix:
1492 if i is not cen and i.fixed and i.fragment.length > cen.fragment.length:
1493 distance = self._distance(i, cen)
1494 if distance is not None and distance < 0.5 * self.threshold:
1495 if alt is None or alt.fragment.length < i.fragment.length:
1496 alt = i
1497
1498 return ClusterRep(cen, mean, cons, len(self._matrix[cen]), alternative=alt,
1499 rejections=(self._initcount - self.count))
1500
1502 """
1503 Remove C{item} from the cluster.
1504
1505 @type item: L{ClusterNode}
1506 @raise ClusterExhaustedError: if this is the last remaining item
1507 """
1508 if self.count == 1:
1509 raise ClusterExhaustedError()
1510
1511 assert not item.fixed
1512
1513 for i in self._matrix:
1514 if item in self._matrix[i]:
1515 distance = self._matrix[i][item]
1516 self._weight -= 2 * distance
1517 i.weight -= distance
1518
1519 del self._matrix[i][item]
1520 self._edges -= 1
1521
1522 self._edges -= len(self._matrix[item])
1523 del self._matrix[item]
1524 self._items.remove(item)
1525
1527 """
1528 Shrink the cluster by a single node.
1529
1530 @return: True on successful shrink, False otherwise (e.g. if
1531 already converged)
1532 @rtype: bool
1533 @raise ClusterExhaustedError: if exhausted
1534 @raise ClusterDivergingError: if not converging
1535 """
1536
1537 mean = self.mean()
1538 if mean <= self.threshold or self.count == 1:
1539 return False # already shrunk enough
1540
1541 m = {}
1542
1543 for i in self._matrix:
1544 if not i.fixed:
1545 newmean = self.mean(skip=i)
1546 m[newmean] = i
1547
1548 if len(m) == 0: # only fixed items remaining
1549 raise ClusterExhaustedError()
1550
1551 newmean = min(m)
1552
1553 if newmean > mean:
1554 raise ClusterDivergingError() # can't converge, usually when fixed items are too far away from the average
1555 elif newmean < mean:
1556 junk = m[newmean]
1557 self.reject(junk)
1558 return True # successful shrink
1559 else:
1560 return False # converged
1561
1563 """
1564 Start automatic shrinking.
1565
1566 @param minitems: absolute minimum of the number of nodes in the cluster
1567 @type minitems: int
1568
1569 @return: cluster's representative: the node with the lowest average
1570 distance to all other nodes in the cluster
1571 @rtype: L{ClusterRep}
1572
1573 @raise ClusterExhaustedError: if C{self.count} < C{minitems} and
1574 still not converged
1575 """
1576
1577 if self.count > minitems:
1578
1579 while self.shrinkone():
1580 if self.count <= minitems:
1581 raise ClusterExhaustedError()
1582 else:
1583 raise ClusterExhaustedError()
1584
1585 return self.centroid()
1586
1588 """
1589 Cluster node.
1590
1591 @param fragment: fragment
1592 @type fragment: L{Assignment}
1593 @param distance: distance metric (a L{Metrics} member, default is RMSD)
1594 @type distance: str
1595 @param fixed: mark this node as fixed (cannot be rejected)
1596 @type fixed: bool
1597 """
1598
1599 FIXED = 0.7
1600
1601 @staticmethod
1603 """
1604 Create a new L{ClusterNode} given a specified C{Assignment}. If this
1605 assignment is a high probability match, define it as a fixed fragment.
1606
1607 @rtype: L{ClusterNode}
1608 """
1609 if fragment.probability > ClusterNode.FIXED and fragment.length >= FragmentCluster.MIN_LENGTH:
1610 return ClusterNode(fragment, distance=method, fixed=extend)
1611 else:
1612 return ClusterNode(fragment, distance=method, fixed=False)
1613
1615
1616 if fixed and fragment.length < FragmentCluster.MIN_LENGTH:
1617 raise ValueError("Can't fix a short fragment")
1618
1619 self.fragment = fragment
1620 self.fixed = bool(fixed)
1621 self.weight = 0
1622
1623 self._distance = getattr(self.fragment, distance)
1624
1626 """
1627 @return: the distance between self and another node
1628 @type other: L{ClusterNode}
1629 @rtype: float
1630 """
1631 return self._distance(other.fragment)
1632
1634 """
1635 Cluster's representative (centroid) node. This object carries the
1636 result of shrinking itself.
1637
1638 @param centroid: rep node
1639 @type centroid: L{ClusterNode}
1640 @param mean: current mean distance in the cluster
1641 @type mean: float
1642 @param consistency: percentage of pairwise distances below the RMSD C{threshold}
1643 @type consistency: float
1644 @param count: current number of nodes in the cluster
1645 @type count: int
1646 @param rejections: total number of rejections
1647 @type rejections: int
1648 @param alternative: suggested cluster rep alternative (e.g. structurally
1649 similar to the centroid, but longer)
1650 @type param:
1651 """
1652
1654
1655 if isinstance(centroid, ClusterNode):
1656 centroid = centroid.fragment
1657 if isinstance(alternative, ClusterNode):
1658 alternative = alternative.fragment
1659
1660 self._centroid = centroid
1661 self._alternative = alternative
1662 self._mean = mean
1663 self._consistency = consistency
1664 self._count = count
1665 self._rejections = rejections
1666
1667 @property
1669 """
1670 Confidence of assignment: log10(count) * consistency
1671 """
1672 if self.count <= 0 or self.count is None or self.consistency is None:
1673 return 0
1674 else:
1675 return numpy.log10(self.count) * self.consistency
1676
1677 @property
1680
1681 @property
1684
1685 @property
1688
1689 @property
1692
1693 @property
1696
1697 @property
1700
1701 @property
1704
1706 """
1707 If an alternative is available, swap the centroid and the alternative.
1708 """
1709
1710 if self._alternative is not None:
1711
1712 centroid = self._centroid
1713 self._centroid = self._alternative
1714 self._alternative = centroid
1715
1717 """
1718 @deprecated: this method is obsolete and will be deleted soon
1719 """
1720 return self.centroid.to_rosetta(source, weight=self.confidence)
1721
1723
1724 @staticmethod
1726
1727 if center.centroid.qstart <= (start - overhang):
1728 start -= overhang
1729 elif center.centroid.qstart < start:
1730 start = center.centroid.qstart
1731
1732 if center.centroid.qend >= (end + overhang):
1733 end += overhang
1734 elif center.centroid.qend > end:
1735 end = center.centroid.end
1736
1737 return AdaptedAssignment(center, start, end)
1738
1740
1741 if qstart < center.centroid.qstart:
1742 raise ValueError(qstart)
1743 if qend > center.centroid.qend:
1744 raise ValueError(qend)
1745
1746 self._qstart = qstart
1747 self._qend = qend
1748 self._center = center
1749
1750 @property
1752 return self._center.centroid
1753
1754 @property
1757
1758 @property
1760 return self._center.confidence
1761
1762 @property
1765
1766 @property
1769
1770 @property
1773
1776
1779
1781
1783
1784 if not length > 0:
1785 raise ValueError(length)
1786
1787 self._length = int(length)
1788 self._slots = set(range(1, self._length + 1))
1789 self._map = {}
1790
1791 centers = list(centroids)
1792 centers.sort(key=lambda i: i.confidence, reverse=True)
1793
1794 for c in centers:
1795 self.assign(c)
1796
1797 @property
1800
1802
1803 for r in range(center.centroid.qstart, center.centroid.qend + 1):
1804 if r in self._slots:
1805 self._map[r] = center
1806 self._slots.remove(r)
1807
1809
1810 center = None
1811 start = None
1812 end = None
1813
1814 for r in range(1, self._length + 1):
1815
1816 if center is None:
1817 if r in self._map:
1818 center = self._map[r]
1819 start = end = r
1820 else:
1821 center = None
1822 start = end = None
1823 else:
1824 if r in self._map:
1825 if self._map[r] is center:
1826 end = r
1827 else:
1828 yield AdaptedAssignment(center, start, end)
1829 center = self._map[r]
1830 start = end = r
1831 else:
1832 yield AdaptedAssignment(center, start, end)
1833 center = None
1834 start = end = None
1835
1838
1840
1841 self.residue = residue
1842 self.confidence = confidence
1843 self.confident = confident
1844 self.gap = gap
1845 self.count = count
1846 self.rep = rep
1847
1848 @property
1851
1852 @property
1855
1856 @property
1858 if self.rep:
1859 return self.rep.torsion_at(self.rank, self.rank)[0]
1860 else:
1861 return None
1862
1865 """
1866 Simplifies the construction of fragment libraries.
1867 """
1868
1872
1874 """
1875 Build a fragment library given a L{Target} and its L{Assignment}s.
1876
1877 @param target: target protein
1878 @type target: L{Target}
1879
1880 @rtype: L{RosettaFragmentMap}
1881 """
1882
1883 frag_factory = self.rosetta.RosettaFragment
1884 fragments = list(map(frag_factory.from_object, target.matches))
1885 #fragments = [ frag_factory.from_object(f) for f in target.matches if f.length >= 6 ]
1886 fragments.sort()
1887
1888 return self.rosetta.RosettaFragmentMap(fragments, target.length)
1889
1891 """
1892 Build a fixed-length fragment library from a list of
1893 variable-length L{Assignment}s.
1894
1895 @param fragments: source fragments
1896 @type fragments: iterable of L{RosettaFragment}s
1897 @param window: fixed-length fragment size (for classic Rosetta: choose 9)
1898 @type window: int
1899
1900 @return: fixed-length fragment library
1901 @rtype: L{RosettaFragmentMap}
1902 """
1903
1904 frags = []
1905
1906 for f in fragments:
1907 for qs in range(f.qstart, f.qend - window + 1):
1908 frags.append(f.subregion(qs, qs + window - 1))
1909
1910 return self.rosetta.RosettaFragmentMap(frags)
1911
1913 """
1914 Complement C{target}'s assignments with C{filling} (e.g. rosetta fragments).
1915 The regions to be complemented are determined by calculating the confidence
1916 at each residue (by filtering).
1917
1918
1919 @param target: target protein
1920 @type target: L{Target}
1921 @param filling: additional fragments to place in the low-conf regions
1922 @type filling: L{RosettaFragmentMap} or iterable of L{RosettaFragment}
1923 @param threshold: confidence threshold
1924 @type threshold: float
1925
1926 @return: complemented fragment library
1927 @rtype: L{RosettaFragmentMap}
1928 """
1929
1930 fragmap = self.make_fragset(target)
1931 covered = set()
1932
1933 for r in target.residues:
1934
1935 if r.assignments.length == 0:
1936 if callback:
1937 callback(ResidueEventInfo(r.native, gap=True))
1938 continue
1939
1940 cluster = r.filter()
1941 if cluster is None:
1942 if callback:
1943 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1944 continue
1945
1946 if cluster.confidence >= threshold:
1947 covered.add(r.native.rank)
1948 confident = True
1949 else:
1950 confident = False
1951
1952 if callback:
1953 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, confident))
1954
1955 for r in target.residues:
1956 if r.native.rank not in covered: # true for gaps and low-conf residues
1957 fragmap.mark_unconfident(r.native.rank)
1958
1959 for frag in filling:
1960 fragmap.complement(frag)
1961
1962 return fragmap
1963
1965 """
1966 Builed a filtered fragment library (by clustering), containing only
1967 representative fragments (cluster centroids).
1968
1969 @param target: target protein
1970 @type target: L{Target}
1971 @param extend: if True, pick alternative reps if available
1972 @type extend: bool
1973
1974 @return: filtered fragment library
1975 @rtype: L{RosettaFragmentMap}
1976 """
1977
1978 fragments = []
1979
1980 for r in target.residues:
1981 if r.assignments.length == 0:
1982 if callback:
1983 callback(ResidueEventInfo(r.native, gap=True))
1984 continue
1985
1986 cluster = r.filter(extend=extend)
1987 if cluster is None:
1988 if callback:
1989 callback(ResidueEventInfo(r.native, 0, 0, confident=False))
1990
1991 if extend and cluster.has_alternative:
1992 best = cluster.alternative
1993 else:
1994 best = cluster.centroid
1995
1996 fragment = self.rosetta.RosettaFragment.from_object(best)
1997 fragments.append(fragment)
1998 if callback:
1999 callback(ResidueEventInfo(r.native, cluster.confidence, cluster.count, rep=cluster.centroid))
2000
2001 fragments.sort()
2002 return self.rosetta.RosettaFragmentMap(fragments, target.length)
2003
2005 """
2006 Mix fragments from multiple libraries.
2007
2008 @type fragsets: L{RosettaFragmentMap}
2009 @return: mixed fragment library
2010 @rtype: L{RosettaFragmentMap}
2011 """
2012
2013 fragments = []
2014 length = 0
2015
2016 for fragset in fragsets:
2017 if fragset._length > length:
2018 length = fragset._length
2019
2020 for fragment in fragset:
2021 fragments.append(fragment)
2022
2023 return self.rosetta.RosettaFragmentMap(fragments, length)
2024
2027
2029
2030 FACTORY = None
2031 DSN = None
2032
2034
2035 self.factory = factory or self.__class__.FACTORY
2036 self.cs = dsn or self.__class__.DSN
2037 self.connection = None
2038 self.cursor = None
2039
2041
2042 self.connection = self.factory(self.cs)
2043 try:
2044 self.cursor = self.connection.cursor()
2045 except:
2046 self.connection.close()
2047 raise
2048 return self
2049
2057
2059
2060 self._pdb = pdb_paths
2061 self._connection = None
2062
2063 from csb.bio.io.wwpdb import find, StructureParser
2064 self._parser = StructureParser
2065 self._find = find
2066 self._factory = factory
2067
2068 try:
2069 import psycopg2.extras
2070 except ImportError:
2071 raise RuntimeError('Please install the psycopg2 module first')
2072
2073 if connection_string is None:
2074 connection_string = self.connection_string()
2075
2076 BenchmarkAdapter.Connection.FACTORY = psycopg2.extras.DictConnection
2077 BenchmarkAdapter.Connection.DSN = connection_string
2078
2079 @staticmethod
2081
2082 fields = ['dbname={0}'.format(database)]
2083
2084 if host:
2085 fields.append('host={0}'.format(host))
2086 if username:
2087 fields.append('user={0}'.format(username))
2088 fields.append('password={0}'.format(password))
2089
2090 return ' '.join(fields)
2091
2093
2094 with BenchmarkAdapter.Connection() as db:
2095
2096 db.cursor.callproc('reporting."GetTargets"', (benchmark_id,))
2097 return db.cursor.fetchall()
2098
2100
2101 with BenchmarkAdapter.Connection() as db:
2102
2103 db.cursor.callproc('reporting."GetTargetDetails"', (target_id,))
2104 return db.cursor.fetchall()
2105
2107
2108 with BenchmarkAdapter.Connection() as db:
2109
2110 db.cursor.callproc('reporting."GetAssignments"', (target_id, type))
2111 return db.cursor.fetchall()
2112
2114
2115 with BenchmarkAdapter.Connection() as db:
2116
2117 db.cursor.callproc('reporting."GetTargetSecStructureAssignments2"', (target_id, type))
2118 return db.cursor.fetchall()
2119
2121
2122 with BenchmarkAdapter.Connection() as db:
2123
2124 db.cursor.callproc('reporting."GetScores"', (benchmark_id, type))
2125 return db.cursor.fetchall()
2126
2128
2129 with BenchmarkAdapter.Connection() as db:
2130
2131 db.cursor.callproc('reporting."GetCentroids"', (benchmark_id,))
2132 return db.cursor.fetchall()
2133
2135
2136 with BenchmarkAdapter.Connection() as db:
2137
2138 db.cursor.callproc('reporting."GetTargetSegments"', (target_id,))
2139 data = db.cursor.fetchall()
2140
2141 return [ TargetSegment(row['Start'], row['End'], row['Count']) for row in data ]
2142
2144
2145 pdbfile = self._find(accession, self._pdb)
2146
2147 if not pdbfile and chain:
2148 pdbfile = self._find(accession + chain, self._pdb)
2149
2150 if not pdbfile:
2151 raise IOError('{0} not found here: {1}'.format(accession, self._pdb))
2152
2153 return self._parser(pdbfile).parse_structure()
2154
2156
2157 info = self.target_details(target_id)
2158 if not info:
2159 raise ValueError('No such Target ID in the database: {0}'.format(target_id))
2160 row = info[0]
2161
2162 id = row["Accession"]
2163 length = float(row["Length"])
2164 overlap = float(row["MaxOverlap"]) / (length or 1.)
2165
2166 native = self.structure(id[:4], id[4]).chains[id[4]]
2167 segments = self.target_segments(target_id)
2168 target = self._factory.target(id, length, native.residues, overlap, segments)
2169
2170 source = None
2171
2172 for row in self.assignments(target_id, type):
2173
2174 src_accession = row['Source'][:4]
2175 src_chain = row['Source'][4]
2176
2177 if source is None or source.accession != src_accession:
2178 try:
2179 source = self.structure(src_accession, src_chain)
2180 except (IOError, ValueError) as ex:
2181 target.errors.append(ex)
2182 continue
2183
2184 if src_chain == '_':
2185 frag_chain = source.first_chain
2186 else:
2187 frag_chain = source.chains[src_chain]
2188 if not frag_chain.has_torsion:
2189 frag_chain.compute_torsion()
2190
2191 fragment = self._factory.assignment(
2192 source=frag_chain,
2193 start=row['SourceStart'],
2194 end=row['SourceEnd'],
2195 id=row['FragmentName'],
2196 qstart=row['Start'],
2197 qend=row['End'],
2198 probability=row['Probability'],
2199 score=row['Score'],
2200 neff=row['Neff'],
2201 rmsd=row['RMSD'],
2202 tm_score=row['TMScore'],
2203 segment=row['SegmentStart'],
2204 internal_id=row['InternalID'])
2205
2206 target.assign(fragment)
2207
2208 if ss:
2209 self._attach_sec_structure(target, target_id, type)
2210
2211 return target
2212
2214
2215 ss = {}
2216
2217 for row in self.assignments_sec_structure(target_id, type):
2218 frag_id, state = row["AssignmentID"], row["DSSP"]
2219 if row[frag_id] not in ss:
2220 ss[frag_id] = []
2221
2222 ss[frag_id].append(state)
2223
2224 for a in target.matches:
2225 if a.internal_id in ss:
2226 dssp = ''.join(ss[a.internal_id])
2227 a.secondary_structure = dssp
2228
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Thu Jun 20 10:55:51 2013 | http://epydoc.sourceforge.net |