source: protein.py@ 5fef0d7

Last change on this file since 5fef0d7 was 5fef0d7, checked in by Jan Meinke <j.meinke@…>, 14 years ago

Changed module name from smmp to smmp_p.

Fixed conversion to residue and atom names.

  • Property mode set to 100644
File size: 10.6 KB
Line 
1#!/usr/bin/env python
2# protein.py
3#
4# Copyright 2007 Frank Eisenmenger, U.H.E. Hansmann,
5# Jan H. Meinke, Sandipan Mohanty
6#
7import sys
8import numpy as np
9import smmp_p as smmp
10from math import *
11
12crd = 57.2957795130823
13
14def nursvr(ivr):
15 res = 0
16 for i in range(smmp.mol_i.ntlml - 1, -1, -1):
17 ifirs = smmp.mol_i.irsml1[i] - 1
18 if ivr + 1 > smmp.res_i.ivrrs1[ifirs] and smmp.mol_i.nvrml[i] > 0:
19 for j in range(smmp.mol_i.irsml2[i] - 1, ifirs - 1, -1):
20 if ivr + 1 > smmp.res_i.ivrrs1[j] and smmp.res_i.nvrrs[j] > 0:
21 res = j + 1
22 return res
23 return res
24
25## Provides read-only access to SMMP's atoms. Usually, you won't initialize
26# an Atom directly since you need to know the internal array index of the
27# Atom in SMMP. You can get a list of all the Atoms in a Protein by using
28# protein.Protein.getAtoms().
29# Various properties are accessible. See the functions' documentations for
30# details.
31class Atom:
32 """Atoms are the basic chemical units."""
33 #TODO: insert namespace
34
35 ## Initialize the atom with its ID. The ID is the array index of the atom
36 # in SMMP.
37 # @param id index of atom in SMMP
38 def __init__(self, id):
39 self.__id = id
40
41 def position(self):
42 """Cartesian coordinates of this atom."""
43 res = (smmp.atm_r.xat[self.__id], smmp.atm_r.yat[self.__id], smmp.atm_r.zat[self.__id])
44 return res
45
46 def charge(self):
47 """Partial charge of this atom."""
48 return smmp.atm_r.cgat[self.__id]
49
50 def radius(self):
51 """Van-der-Waals Radius of this atom."""
52 return smmp.solvent.rvdw[self.__id]
53
54 def name(self):
55 return ''.join(smmp.atm_c.nmat[:,0].reshape([-1,4])[self.__id]).strip()
56
57 def __str__(self):
58 return "%s at (%s, %s, %s)" % (self.name(), self.position()[0], self.position()[1], self.position()[2])
59
60## Amino acids bound within a protein are often called residues. Strictly
61# speaking the residue is the part that's connected to the Calpha and not part
62# of the backbone, but we'll include the backbone atoms as well. Just like Atoms
63# Residues reference the corresponding data in SMMP and are not independent.
64class Residue:
65 """An amino acid within a Protein."""
66
67 ## Initialize the residue with the corresponding index in SMMP.
68 # @param id index of residue in SMMP
69 def __init__(self, id):
70 self.__id = id
71
72 def atoms(self):
73 """Returns the list of atoms belonging to this residue."""
74 first = smmp.res_i.iatrs1[self.__id]
75 last = smmp.res_i.iatrs2[self.__id]
76 res = [Atom(i) for i in range(first - 1, last - 1)]
77 return res
78
79
80## Proteins are constructed from a sequence file and a configuration file or
81# read from a PDB file.
82class Protein:
83 """A protein is a polypeptide made of the 20 naturally occuring amino
84 acids.
85 """
86
87 def __init__(self, seq, var=' '):
88 """The structure of the protein is read in from a file.
89 Pass the name of a textfile with a sequence string and--optionally--the name of a
90 variable file that describes the conformation. Or give the name of pdb
91 file---it must end with 'pdb'---that contains both information."""
92
93 grpn = 'nh2'
94 grpc = 'cooh'
95 prevID = smmp.mol_i.ntlml - 1
96 if seq[-3:].lower() == 'pdb':
97 if smmp.mol_i.ntlml:
98 print "Sorry, but I can only read the first protein from a pdb file."
99 raise ValueError
100 smmp.init_molecule(0, grpn, grpc, seq)
101 else:
102 smmp.init_molecule(1, grpn, grpc, seq, var)
103 self.__id = smmp.mol_i.ntlml - 1
104 if self.__id - prevID > 1:
105 print "Sorry, I don't know how to deal with multiple proteins in one sequence file, yet."
106 raise NotImplementedError
107
108 def setOrigin(self, x, a):
109 """Sets the position of the N terminal of the protein in the global
110 coordinate system and the orientation of the protein. Expects two
111 tuples. The first contains the coordinates of the first atom (x, y,
112 and z) and the second contains three angles.
113 """
114 #TODO: Implement type checking
115 smmp.mol_r.gbpr[0][self.__id] = x[0]
116 smmp.mol_r.gbpr[1][self.__id] = x[1]
117 smmp.mol_r.gbpr[2][self.__id] = x[2]
118 smmp.mol_r.gbpr[3][self.__id] = a[0]
119 smmp.mol_r.gbpr[4][self.__id] = a[1]
120 smmp.mol_r.gbpr[5][self.__id] = a[2]
121
122 def origin(self):
123 r = [smmp.mol_r.gbpr[i][self.__id] for i in range(0, 3)]
124 a = [smmp.mol_r.gbpr[i][self.__id] for i in range(3, 6)]
125 return r, a
126
127 def getDihedral(self, angle, residue):
128 try:
129 res = int(residue)
130 idx = res
131 except ValueError:
132 res = residue.split()
133 res[0] = res[0].lower()
134 res[1] = int(res[1])
135 idx = self.findResidue(res[0], res[1])
136 varIdx = ''
137 for i in range(smmp.res_i.ivrrs1[idx], smmp.res_i.ivrrs1[idx] + smmp.res_i.nvrrs[idx]):
138 varName = ''.join(list([str(smmp.var_c.nmvr[j][i]) for j in range(0, 3)]))
139 if varName == angle:
140 varIdx = i
141 break
142 if varIdx:
143 return smmp.var_r.vlvr[varIdx] * crd
144 raise KeyError, '%s not found for %s.' % (angle, residue)
145
146 ## Residues are given by their 3-letter abbreviation and their index.
147 # For example, findResidue('gly', 5) returns the index of the fifth occurence
148 # of glycine in the protein. If the residue is not found an exception is raised.
149 #
150 # @param name 3-letter abbreviation of the residue
151 # @param idx index of occurence of the amino acid given in name.
152
153 def findResidue(self, name, idx = 1):
154 """Find the idxth residue of type name and return the index."""
155 res = ''.join(self.residues()).lower()
156 name = name.lower()
157 j = 0
158 try:
159 for i in range(0, idx):
160 k = res.index(name, j)
161 j = j + k
162 except:
163 raise KeyError, "%s%s not found." % (name, idx)
164 if j >=len(res):
165 raise KeyError, "%s%s not found." % (name, idx)
166 return j / 3
167
168 def atoms(self):
169 """Returns the list of atoms belonging to this protein."""
170 first = smmp.res_i.iatrs1[smmp.mol_i.irsml1[self.__id] - 1]
171 last = smmp.res_i.iatrs2[smmp.mol_i.irsml2[self.__id] - 1]
172 res = [Atom(i) for i in range(first - 1, last - 1)]
173 return res
174
175 def residues(self):
176 """Returns a list of the amino acids in this protein."""
177 seq = [''.join(l) for l in np.array([str(c[0]) for c in smmp.res_c.seq]).reshape([-1, 4])]
178 first = smmp.mol_i.irsml1[self.__id]- 1
179 last = smmp.mol_i.irsml2[self.__id] - 1
180 return seq[first:last]
181
182 def id(self):
183 return self.__id
184
185 def strand(self):
186 """Returns the number of residues in a beta-strand-like configuration."""
187 maxDev = 30.0
188 phiSheet = -150.0
189 psiSheet = 150.0
190 strandCount = 0
191 ifivr = smmp.mol_i.ivrml1[self.__id]
192 i = 0
193 iv = smmp.var_i.idvr[i]
194 while iv < ifivr:
195 i += 1
196 iv = smmp.var_i.idvr[i]
197 for j in range(i, ifivr + smmp.mol_i.nvrml[self.__id]):
198 iv = smmp.var_i.idvr[j] - 1
199 name =''. join([str(smmp.var_c.nmvr[iv * 3 + k][0]) for k in range(0,3)])
200 if name == 'phi':
201 phi = smmp.var_r.vlvr[iv] * crd
202 psi = smmp.var_r.vlvr[smmp.var_i.idvr[j+1] - 1] * crd
203 if abs(phi - phiSheet) < maxDev and abs(psi - psiSheet) < maxDev:
204 strandCount += 1
205 return strandCount
206
207 def directionVector(self):
208 """Calculates the normalized end-to-end vector for this molecule.
209 The first and last c_alpha carbon is used as reference."""
210 atoms = self.atoms()
211 beginning = []
212 for a in atoms:
213 if a.name().strip() == 'ca':
214 if beginning:
215 end = a.position()
216 else:
217 beginning = a.position()
218 res = [end[i] - beginning[i] for i in range(0,3)]
219 sum = sqrt(reduce(lambda x, y: x + y ** 2, res))
220 res = map(lambda x: x / sum, res)
221 return res
222
223 def contacts(self):
224 """Returns a triplet containing the total number of contacts, the number
225 of native contacts, and the hemming distance. The latter two quantities
226 are only meaningful if a reference structure has been defined.
227 """
228 return smmp.contacts()
229
230 def hbond(self):
231 return smmp.hbond(self.__id + 1, 0)
232
233 def energy(self):
234 return smmp.energy()
235
236 def rgyr(self):
237 return smmp.rgyr(self.__id + 1)
238
239 def rmsdfun(self):
240 return smmp.rmsdfun()
241
242 def savePDB(self, file=""):
243 """Write the current configuration to file using PDB formatting."""
244 smmp.outpdb(self.__id, file)
245
246 def saveVariables(self, file=""):
247 """Output the current conformation in the variable format. The output
248 can be read in again and used to restart/continue a simulation.
249
250 @param file if empty write to standard out
251 """
252 # Same functionality as smmp.outvar(self.__id + 1, 0)
253 if not file:
254 out = sys.stdout
255 else:
256 out = open(file, 'w')
257 for i in range(smmp.mol_i.ivrml1[self.__id] - 1 , smmp.mol_i.ivrml1[self.__id] + smmp.mol_i.nvrml[self.__id] - 1):
258 if not smmp.var_l.fxvr[i]:
259 name = ''.join([str(smmp.var_c.nmvr[i * 3 + j][0]) for j in range(0,3)])
260 print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd
261 else:
262 it=smmp.var_i.ityvr[i]
263 if it == 3:
264 print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd, ' &'
265
266 def zimmer(self):
267 first = smmp.mol_i.irsml1[self.__id] - 1
268 last = smmp.mol_i.irsml2[self.__id]
269 nrs = smmp.mol_i.irsml2[smmp.mol_i.ntlml - 1] - 1
270 smmp.zimmer(last)
271 return smmp.zimme.zimm[first:last]
272
273 def __len__(self):
274 return smmp.mol_i.irsml2[self.__id] - smmp.mol_i.irsml1[self.__id] + 1
275
276
277if __name__ == "__main__":
278 libdir = 'SMMP/'
279
280 smmp.epar_l.flex = 0
281 smmp.epar_l.sh2 = 0
282 smmp.epar_l.epsd = 0
283 smmp.epar_l.tesgrd = 0
284 smmp.isolty.itysol = 0
285 smmp.init_energy(libdir)
286
287 p1 = Protein('EXAMPLES/1bdd.pdb')
288 for i in p1.atoms():
289 print i
290 print p1.getDihedral('phi', 11)
Note: See TracBrowser for help on using the repository browser.