source: protein.py@ 3fbbfbb

Last change on this file since 3fbbfbb was 3fbbfbb, checked in by Jan Meinke <j.meinke@…>, 14 years ago

Move to doxygen comments and smmp_p.

Doxygen comments in Fortran are !> ... !! ... !<. I'm planning move the API documentation from the
lyx file into the code. This should make it easier to get documentation for all the common block
variables as well.

Use import smmp_p to indicate the parallel version of the Python bindings.

  • Property mode set to 100644
File size: 10.6 KB
Line 
1#!/usr/bin/env python
2# protein.py
3#
4# Copyright 2007 Frank Eisenmenger, U.H.E. Hansmann,
5# Jan H. Meinke, Sandipan Mohanty
6#
7import sys
8import smmp_p as smmp
9from math import *
10
11crd = 57.2957795130823
12
13def nursvr(ivr):
14 res = 0
15 for i in range(smmp.mol_i.ntlml - 1, -1, -1):
16 ifirs = smmp.mol_i.irsml1[i] - 1
17 if ivr + 1 > smmp.res_i.ivrrs1[ifirs] and smmp.mol_i.nvrml[i] > 0:
18 for j in range(smmp.mol_i.irsml2[i] - 1, ifirs - 1, -1):
19 if ivr + 1 > smmp.res_i.ivrrs1[j] and smmp.res_i.nvrrs[j] > 0:
20 res = j + 1
21 return res
22 return res
23
24## Provides read-only access to SMMP's atoms. Usually, you won't initialize
25# an Atom directly since you need to know the internal array index of the
26# Atom in SMMP. You can get a list of all the Atoms in a Protein by using
27# protein.Protein.getAtoms().
28# Various properties are accessible. See the functions' documentations for
29# details.
30class Atom:
31 """Atoms are the basic chemical units."""
32 #TODO: insert namespace
33
34 ## Initialize the atom with its ID. The ID is the array index of the atom
35 # in SMMP.
36 # @param id index of atom in SMMP
37 def __init__(self, id):
38 self.__id = id
39
40 def position(self):
41 """Cartesian coordinates of this atom."""
42 res = (smmp.atm_r.xat[self.__id], smmp.atm_r.yat[self.__id], smmp.atm_r.zat[self.__id])
43 return res
44
45 def charge(self):
46 """Partial charge of this atom."""
47 return smmp.atm_r.cgat[self.__id]
48
49 def radius(self):
50 """Van-der-Waals Radius of this atom."""
51 return smmp.solvent.rvdw[self.__id]
52
53 def name(self):
54 return ''.join([smmp.atm_c.nmat[i][self.__id] for i in range(0,4)]).strip()
55
56 def __str__(self):
57 return "%s at (%s, %s, %s)" % (self.name(), self.position()[0], self.position()[1], self.position()[2])
58
59## Amino acids bound within a protein are often called residues. Strictly
60# speaking the residue is the part that's connected to the Calpha and not part
61# of the backbone, but we'll include the backbone atoms as well. Just like Atoms
62# Residues reference the corresponding data in SMMP and are not independent.
63class Residue:
64 """An amino acid within a Protein."""
65
66 ## Initialize the residue with the corresponding index in SMMP.
67 # @param id index of residue in SMMP
68 def __init__(self, id):
69 self.__id = id
70
71 def atoms(self):
72 """Returns the list of atoms belonging to this residue."""
73 first = smmp.res_i.iatrs1[self.__id]
74 last = smmp.res_i.iatrs2[self.__id]
75 res = [Atom(i) for i in range(first - 1, last - 1)]
76 return res
77
78
79## Proteins are constructed from a sequence file and a configuration file or
80# read from a PDB file.
81class Protein:
82 """A protein is a polypeptide made of the 20 naturally occuring amino
83 acids.
84 """
85
86 def __init__(self, seq, var=' '):
87 """The structure of the protein is read in from a file.
88 Pass the name of a textfile with a sequence string and--optionally--the name of a
89 variable file that describes the conformation. Or give the name of pdb
90 file---it must end with 'pdb'---that contains both information."""
91
92 grpn = 'nh2'
93 grpc = 'cooh'
94 prevID = smmp.mol_i.ntlml - 1
95 if seq[-3:].lower() == 'pdb':
96 if smmp.mol_i.ntlml:
97 print "Sorry, but I can only read the first protein from a pdb file."
98 raise ValueError
99 smmp.init_molecule(0, grpn, grpc, seq)
100 else:
101 smmp.init_molecule(1, grpn, grpc, seq, var)
102 self.__id = smmp.mol_i.ntlml - 1
103 if self.__id - prevID > 1:
104 print "Sorry, I don't know how to deal with multiple proteins in one sequence file, yet."
105 raise NotImplementedError
106
107 def setOrigin(self, x, a):
108 """Sets the position of the N terminal of the protein in the global
109 coordinate system and the orientation of the protein. Expects two
110 tuples. The first contains the coordinates of the first atom (x, y,
111 and z) and the second contains three angles.
112 """
113 #TODO: Implement type checking
114 smmp.mol_r.gbpr[0][self.__id] = x[0]
115 smmp.mol_r.gbpr[1][self.__id] = x[1]
116 smmp.mol_r.gbpr[2][self.__id] = x[2]
117 smmp.mol_r.gbpr[3][self.__id] = a[0]
118 smmp.mol_r.gbpr[4][self.__id] = a[1]
119 smmp.mol_r.gbpr[5][self.__id] = a[2]
120
121 def origin(self):
122 r = [smmp.mol_r.gbpr[i][self.__id] for i in range(0, 3)]
123 a = [smmp.mol_r.gbpr[i][self.__id] for i in range(3, 6)]
124 return r, a
125
126 def getDihedral(self, angle, residue):
127 try:
128 res = int(residue)
129 idx = res
130 except ValueError:
131 res = residue.split()
132 res[0] = res[0].lower()
133 res[1] = int(res[1])
134 idx = self.findResidue(res[0], res[1])
135 varIdx = ''
136 for i in range(smmp.res_i.ivrrs1[idx], smmp.res_i.ivrrs1[idx] + smmp.res_i.nvrrs[idx]):
137 varName = ''.join(list([str(smmp.var_c.nmvr[j][i]) for j in range(0, 3)]))
138 if varName == angle:
139 varIdx = i
140 break
141 if varIdx:
142 return smmp.var_r.vlvr[varIdx] * crd
143 raise KeyError, '%s not found for %s.' % (angle, residue)
144
145 ## Residues are given by their 3-letter abbreviation and their index.
146 # For example, findResidue('gly', 5) returns the index of the fifth occurence
147 # of glycine in the protein. If the residue is not found an exception is raised.
148 #
149 # @param name 3-letter abbreviation of the residue
150 # @param idx index of occurence of the amino acid given in name.
151
152 def findResidue(self, name, idx = 1):
153 """Find the idxth residue of type name and return the index."""
154 res = ''.join(self.residues()).lower()
155 name = name.lower()
156 j = 0
157 try:
158 for i in range(0, idx):
159 k = res.index(name, j)
160 j = j + k
161 except:
162 raise KeyError, "%s%s not found." % (name, idx)
163 if j >=len(res):
164 raise KeyError, "%s%s not found." % (name, idx)
165 return j / 3
166
167 def atoms(self):
168 """Returns the list of atoms belonging to this protein."""
169 first = smmp.res_i.iatrs1[smmp.mol_i.irsml1[self.__id] - 1]
170 last = smmp.res_i.iatrs2[smmp.mol_i.irsml2[self.__id] - 1]
171 res = [Atom(i) for i in range(first - 1, last - 1)]
172 return res
173
174 def residues(self):
175 """Returns a list of the amino acids in this protein."""
176 seq = [str(c[0]) for c in smmp.res_c.seq]
177 first = smmp.mol_i.irsml1[self.__id]- 1
178 last = smmp.mol_i.irsml2[self.__id] - 1
179 return ''.join(seq).split()[first:last]
180
181 def id(self):
182 return self.__id
183
184 def strand(self):
185 """Returns the number of residues in a beta-strand-like configuration."""
186 maxDev = 30.0
187 phiSheet = -150.0
188 psiSheet = 150.0
189 strandCount = 0
190 ifivr = smmp.mol_i.ivrml1[self.__id]
191 i = 0
192 iv = smmp.var_i.idvr[i]
193 while iv < ifivr:
194 i += 1
195 iv = smmp.var_i.idvr[i]
196 for j in range(i, ifivr + smmp.mol_i.nvrml[self.__id]):
197 iv = smmp.var_i.idvr[j] - 1
198 name =''. join([str(smmp.var_c.nmvr[iv * 3 + k][0]) for k in range(0,3)])
199 if name == 'phi':
200 phi = smmp.var_r.vlvr[iv] * crd
201 psi = smmp.var_r.vlvr[smmp.var_i.idvr[j+1] - 1] * crd
202 if abs(phi - phiSheet) < maxDev and abs(psi - psiSheet) < maxDev:
203 strandCount += 1
204 return strandCount
205
206 def directionVector(self):
207 """Calculates the normalized end-to-end vector for this molecule.
208 The first and last c_alpha carbon is used as reference."""
209 atoms = self.atoms()
210 beginning = []
211 for a in atoms:
212 if a.name().strip() == 'ca':
213 if beginning:
214 end = a.position()
215 else:
216 beginning = a.position()
217 res = [end[i] - beginning[i] for i in range(0,3)]
218 sum = sqrt(reduce(lambda x, y: x + y ** 2, res))
219 res = map(lambda x: x / sum, res)
220 return res
221
222 def contacts(self):
223 """Returns a triplet containing the total number of contacts, the number
224 of native contacts, and the hemming distance. The latter two quantities
225 are only meaningful if a reference structure has been defined.
226 """
227 return smmp.contacts()
228
229 def hbond(self):
230 return smmp.hbond(self.__id + 1, 0)
231
232 def energy(self):
233 return smmp.energy()
234
235 def rgyr(self):
236 return smmp.rgyr(self.__id + 1)
237
238 def rmsdfun(self):
239 return smmp.rmsdfun()
240
241 def savePDB(self, file=""):
242 """Write the current configuration to file using PDB formatting."""
243 smmp.outpdb(self.__id, file)
244
245 def saveVariables(self, file=""):
246 """Output the current conformation in the variable format. The output
247 can be read in again and used to restart/continue a simulation.
248
249 @param file if empty write to standard out
250 """
251 # Same functionality as smmp.outvar(self.__id + 1, 0)
252 if not file:
253 out = sys.stdout
254 else:
255 out = open(file, 'w')
256 for i in range(smmp.mol_i.ivrml1[self.__id] - 1 , smmp.mol_i.ivrml1[self.__id] + smmp.mol_i.nvrml[self.__id] - 1):
257 if not smmp.var_l.fxvr[i]:
258 name = ''.join([str(smmp.var_c.nmvr[i * 3 + j][0]) for j in range(0,3)])
259 print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd
260 else:
261 it=smmp.var_i.ityvr[i]
262 if it == 3:
263 print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd, ' &'
264
265 def zimmer(self):
266 first = smmp.mol_i.irsml1[self.__id] - 1
267 last = smmp.mol_i.irsml2[self.__id]
268 nrs = smmp.mol_i.irsml2[smmp.mol_i.ntlml - 1] - 1
269 smmp.zimmer(last)
270 return smmp.zimme.zimm[first:last]
271
272 def __len__(self):
273 return smmp.mol_i.irsml2[self.__id] - smmp.mol_i.irsml1[self.__id] + 1
274
275
276if __name__ == "__main__":
277 libdir = 'SMMP/'
278
279 smmp.epar_l.flex = 0
280 smmp.epar_l.sh2 = 0
281 smmp.epar_l.epsd = 0
282 smmp.epar_l.tesgrd = 0
283 smmp.isolty.itysol = 0
284 smmp.init_energy(libdir)
285
286 p1 = Protein('EXAMPLES/1bdd.pdb')
287 for i in p1.atoms():
288 print i
289 print p1.getDihedral('phi', 11)
Note: See TracBrowser for help on using the repository browser.