1 | #!/usr/bin/env python
|
---|
2 | # protein.py
|
---|
3 | #
|
---|
4 | # Copyright 2007 Frank Eisenmenger, U.H.E. Hansmann,
|
---|
5 | # Jan H. Meinke, Sandipan Mohanty
|
---|
6 | #
|
---|
7 | import sys
|
---|
8 | import numpy as np
|
---|
9 | import smmp_p as smmp
|
---|
10 | from math import *
|
---|
11 |
|
---|
12 | crd = 57.2957795130823
|
---|
13 |
|
---|
14 | def nursvr(ivr):
|
---|
15 | res = 0
|
---|
16 | for i in range(smmp.mol_i.ntlml - 1, -1, -1):
|
---|
17 | ifirs = smmp.mol_i.irsml1[i] - 1
|
---|
18 | if ivr + 1 > smmp.res_i.ivrrs1[ifirs] and smmp.mol_i.nvrml[i] > 0:
|
---|
19 | for j in range(smmp.mol_i.irsml2[i] - 1, ifirs - 1, -1):
|
---|
20 | if ivr + 1 > smmp.res_i.ivrrs1[j] and smmp.res_i.nvrrs[j] > 0:
|
---|
21 | res = j + 1
|
---|
22 | return res
|
---|
23 | return res
|
---|
24 |
|
---|
25 | ## Provides read-only access to SMMP's atoms. Usually, you won't initialize
|
---|
26 | # an Atom directly since you need to know the internal array index of the
|
---|
27 | # Atom in SMMP. You can get a list of all the Atoms in a Protein by using
|
---|
28 | # protein.Protein.getAtoms().
|
---|
29 | # Various properties are accessible. See the functions' documentations for
|
---|
30 | # details.
|
---|
31 | class Atom:
|
---|
32 | """Atoms are the basic chemical units."""
|
---|
33 | #TODO: insert namespace
|
---|
34 |
|
---|
35 | ## Initialize the atom with its ID. The ID is the array index of the atom
|
---|
36 | # in SMMP.
|
---|
37 | # @param id index of atom in SMMP
|
---|
38 | def __init__(self, id):
|
---|
39 | self.__id = id
|
---|
40 |
|
---|
41 | def position(self):
|
---|
42 | """Cartesian coordinates of this atom."""
|
---|
43 | res = (smmp.atm_r.xat[self.__id], smmp.atm_r.yat[self.__id], smmp.atm_r.zat[self.__id])
|
---|
44 | return res
|
---|
45 |
|
---|
46 | def charge(self):
|
---|
47 | """Partial charge of this atom."""
|
---|
48 | return smmp.atm_r.cgat[self.__id]
|
---|
49 |
|
---|
50 | def radius(self):
|
---|
51 | """Van-der-Waals Radius of this atom."""
|
---|
52 | return smmp.solvent.rvdw[self.__id]
|
---|
53 |
|
---|
54 | def name(self):
|
---|
55 | return ''.join(smmp.atm_c.nmat[:,0].reshape([-1,4])[self.__id]).strip()
|
---|
56 |
|
---|
57 | def __str__(self):
|
---|
58 | return "%s at (%s, %s, %s)" % (self.name(), self.position()[0], self.position()[1], self.position()[2])
|
---|
59 |
|
---|
60 | ## Amino acids bound within a protein are often called residues. Strictly
|
---|
61 | # speaking the residue is the part that's connected to the Calpha and not part
|
---|
62 | # of the backbone, but we'll include the backbone atoms as well. Just like Atoms
|
---|
63 | # Residues reference the corresponding data in SMMP and are not independent.
|
---|
64 | class Residue:
|
---|
65 | """An amino acid within a Protein."""
|
---|
66 |
|
---|
67 | ## Initialize the residue with the corresponding index in SMMP.
|
---|
68 | # @param id index of residue in SMMP
|
---|
69 | def __init__(self, id):
|
---|
70 | self.__id = id
|
---|
71 |
|
---|
72 | def atoms(self):
|
---|
73 | """Returns the list of atoms belonging to this residue."""
|
---|
74 | first = smmp.res_i.iatrs1[self.__id]
|
---|
75 | last = smmp.res_i.iatrs2[self.__id]
|
---|
76 | res = [Atom(i) for i in range(first - 1, last - 1)]
|
---|
77 | return res
|
---|
78 |
|
---|
79 |
|
---|
80 | ## Proteins are constructed from a sequence file and a configuration file or
|
---|
81 | # read from a PDB file.
|
---|
82 | class Protein:
|
---|
83 | """A protein is a polypeptide made of the 20 naturally occuring amino
|
---|
84 | acids.
|
---|
85 | """
|
---|
86 |
|
---|
87 | def __init__(self, seq, var=' '):
|
---|
88 | """The structure of the protein is read in from a file.
|
---|
89 | Pass the name of a textfile with a sequence string and--optionally--the name of a
|
---|
90 | variable file that describes the conformation. Or give the name of pdb
|
---|
91 | file---it must end with 'pdb'---that contains both information."""
|
---|
92 |
|
---|
93 | grpn = 'nh2'
|
---|
94 | grpc = 'cooh'
|
---|
95 | prevID = smmp.mol_i.ntlml - 1
|
---|
96 | if seq[-3:].lower() == 'pdb':
|
---|
97 | if smmp.mol_i.ntlml:
|
---|
98 | print "Sorry, but I can only read the first protein from a pdb file."
|
---|
99 | raise ValueError
|
---|
100 | smmp.init_molecule(0, grpn, grpc, seq)
|
---|
101 | else:
|
---|
102 | smmp.init_molecule(1, grpn, grpc, seq, var)
|
---|
103 | self.__id = smmp.mol_i.ntlml - 1
|
---|
104 | if self.__id - prevID > 1:
|
---|
105 | print "Sorry, I don't know how to deal with multiple proteins in one sequence file, yet."
|
---|
106 | raise NotImplementedError
|
---|
107 |
|
---|
108 | def setOrigin(self, x, a):
|
---|
109 | """Sets the position of the N terminal of the protein in the global
|
---|
110 | coordinate system and the orientation of the protein. Expects two
|
---|
111 | tuples. The first contains the coordinates of the first atom (x, y,
|
---|
112 | and z) and the second contains three angles.
|
---|
113 | """
|
---|
114 | #TODO: Implement type checking
|
---|
115 | smmp.mol_r.gbpr[0][self.__id] = x[0]
|
---|
116 | smmp.mol_r.gbpr[1][self.__id] = x[1]
|
---|
117 | smmp.mol_r.gbpr[2][self.__id] = x[2]
|
---|
118 | smmp.mol_r.gbpr[3][self.__id] = a[0]
|
---|
119 | smmp.mol_r.gbpr[4][self.__id] = a[1]
|
---|
120 | smmp.mol_r.gbpr[5][self.__id] = a[2]
|
---|
121 |
|
---|
122 | def origin(self):
|
---|
123 | r = [smmp.mol_r.gbpr[i][self.__id] for i in range(0, 3)]
|
---|
124 | a = [smmp.mol_r.gbpr[i][self.__id] for i in range(3, 6)]
|
---|
125 | return r, a
|
---|
126 |
|
---|
127 | def getDihedral(self, angle, residue):
|
---|
128 | try:
|
---|
129 | res = int(residue)
|
---|
130 | idx = res
|
---|
131 | except ValueError:
|
---|
132 | res = residue.split()
|
---|
133 | res[0] = res[0].lower()
|
---|
134 | res[1] = int(res[1])
|
---|
135 | idx = self.findResidue(res[0], res[1])
|
---|
136 | varIdx = ''
|
---|
137 | for i in range(smmp.res_i.ivrrs1[idx], smmp.res_i.ivrrs1[idx] + smmp.res_i.nvrrs[idx]):
|
---|
138 | varName = ''.join(list([str(smmp.var_c.nmvr[j][i]) for j in range(0, 3)]))
|
---|
139 | if varName == angle:
|
---|
140 | varIdx = i
|
---|
141 | break
|
---|
142 | if varIdx:
|
---|
143 | return smmp.var_r.vlvr[varIdx] * crd
|
---|
144 | raise KeyError, '%s not found for %s.' % (angle, residue)
|
---|
145 |
|
---|
146 | ## Residues are given by their 3-letter abbreviation and their index.
|
---|
147 | # For example, findResidue('gly', 5) returns the index of the fifth occurence
|
---|
148 | # of glycine in the protein. If the residue is not found an exception is raised.
|
---|
149 | #
|
---|
150 | # @param name 3-letter abbreviation of the residue
|
---|
151 | # @param idx index of occurence of the amino acid given in name.
|
---|
152 |
|
---|
153 | def findResidue(self, name, idx = 1):
|
---|
154 | """Find the idxth residue of type name and return the index."""
|
---|
155 | res = ''.join(self.residues()).lower()
|
---|
156 | name = name.lower()
|
---|
157 | j = 0
|
---|
158 | try:
|
---|
159 | for i in range(0, idx):
|
---|
160 | k = res.index(name, j)
|
---|
161 | j = j + k
|
---|
162 | except:
|
---|
163 | raise KeyError, "%s%s not found." % (name, idx)
|
---|
164 | if j >=len(res):
|
---|
165 | raise KeyError, "%s%s not found." % (name, idx)
|
---|
166 | return j / 3
|
---|
167 |
|
---|
168 | def atoms(self):
|
---|
169 | """Returns the list of atoms belonging to this protein."""
|
---|
170 | first = smmp.res_i.iatrs1[smmp.mol_i.irsml1[self.__id] - 1]
|
---|
171 | last = smmp.res_i.iatrs2[smmp.mol_i.irsml2[self.__id] - 1]
|
---|
172 | res = [Atom(i) for i in range(first - 1, last - 1)]
|
---|
173 | return res
|
---|
174 |
|
---|
175 | def residues(self):
|
---|
176 | """Returns a list of the amino acids in this protein."""
|
---|
177 | seq = [''.join(l) for l in np.array([str(c[0]) for c in smmp.res_c.seq]).reshape([-1, 4])]
|
---|
178 | first = smmp.mol_i.irsml1[self.__id]- 1
|
---|
179 | last = smmp.mol_i.irsml2[self.__id] - 1
|
---|
180 | return seq[first:last]
|
---|
181 |
|
---|
182 | def id(self):
|
---|
183 | return self.__id
|
---|
184 |
|
---|
185 | def strand(self):
|
---|
186 | """Returns the number of residues in a beta-strand-like configuration."""
|
---|
187 | maxDev = 30.0
|
---|
188 | phiSheet = -150.0
|
---|
189 | psiSheet = 150.0
|
---|
190 | strandCount = 0
|
---|
191 | ifivr = smmp.mol_i.ivrml1[self.__id]
|
---|
192 | i = 0
|
---|
193 | iv = smmp.var_i.idvr[i]
|
---|
194 | while iv < ifivr:
|
---|
195 | i += 1
|
---|
196 | iv = smmp.var_i.idvr[i]
|
---|
197 | for j in range(i, ifivr + smmp.mol_i.nvrml[self.__id]):
|
---|
198 | iv = smmp.var_i.idvr[j] - 1
|
---|
199 | name =''. join([str(smmp.var_c.nmvr[iv * 3 + k][0]) for k in range(0,3)])
|
---|
200 | if name == 'phi':
|
---|
201 | phi = smmp.var_r.vlvr[iv] * crd
|
---|
202 | psi = smmp.var_r.vlvr[smmp.var_i.idvr[j+1] - 1] * crd
|
---|
203 | if abs(phi - phiSheet) < maxDev and abs(psi - psiSheet) < maxDev:
|
---|
204 | strandCount += 1
|
---|
205 | return strandCount
|
---|
206 |
|
---|
207 | def directionVector(self):
|
---|
208 | """Calculates the normalized end-to-end vector for this molecule.
|
---|
209 | The first and last c_alpha carbon is used as reference."""
|
---|
210 | atoms = self.atoms()
|
---|
211 | beginning = []
|
---|
212 | for a in atoms:
|
---|
213 | if a.name().strip() == 'ca':
|
---|
214 | if beginning:
|
---|
215 | end = a.position()
|
---|
216 | else:
|
---|
217 | beginning = a.position()
|
---|
218 | res = [end[i] - beginning[i] for i in range(0,3)]
|
---|
219 | sum = sqrt(reduce(lambda x, y: x + y ** 2, res))
|
---|
220 | res = map(lambda x: x / sum, res)
|
---|
221 | return res
|
---|
222 |
|
---|
223 | def contacts(self):
|
---|
224 | """Returns a triplet containing the total number of contacts, the number
|
---|
225 | of native contacts, and the hemming distance. The latter two quantities
|
---|
226 | are only meaningful if a reference structure has been defined.
|
---|
227 | """
|
---|
228 | return smmp.contacts()
|
---|
229 |
|
---|
230 | def hbond(self):
|
---|
231 | return smmp.hbond(self.__id + 1, 0)
|
---|
232 |
|
---|
233 | def energy(self):
|
---|
234 | return smmp.energy()
|
---|
235 |
|
---|
236 | def rgyr(self):
|
---|
237 | return smmp.rgyr(self.__id + 1)
|
---|
238 |
|
---|
239 | def rmsdfun(self):
|
---|
240 | return smmp.rmsdfun()
|
---|
241 |
|
---|
242 | def savePDB(self, file=""):
|
---|
243 | """Write the current configuration to file using PDB formatting."""
|
---|
244 | smmp.outpdb(self.__id, file)
|
---|
245 |
|
---|
246 | def saveVariables(self, file=""):
|
---|
247 | """Output the current conformation in the variable format. The output
|
---|
248 | can be read in again and used to restart/continue a simulation.
|
---|
249 |
|
---|
250 | @param file if empty write to standard out
|
---|
251 | """
|
---|
252 | # Same functionality as smmp.outvar(self.__id + 1, 0)
|
---|
253 | if not file:
|
---|
254 | out = sys.stdout
|
---|
255 | else:
|
---|
256 | out = open(file, 'w')
|
---|
257 | for i in range(smmp.mol_i.ivrml1[self.__id] - 1 , smmp.mol_i.ivrml1[self.__id] + smmp.mol_i.nvrml[self.__id] - 1):
|
---|
258 | if not smmp.var_l.fxvr[i]:
|
---|
259 | name = ''.join([str(smmp.var_c.nmvr[i * 3 + j][0]) for j in range(0,3)])
|
---|
260 | print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd
|
---|
261 | else:
|
---|
262 | it=smmp.var_i.ityvr[i]
|
---|
263 | if it == 3:
|
---|
264 | print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd, ' &'
|
---|
265 |
|
---|
266 | def zimmer(self):
|
---|
267 | first = smmp.mol_i.irsml1[self.__id] - 1
|
---|
268 | last = smmp.mol_i.irsml2[self.__id]
|
---|
269 | nrs = smmp.mol_i.irsml2[smmp.mol_i.ntlml - 1] - 1
|
---|
270 | smmp.zimmer(last)
|
---|
271 | return smmp.zimme.zimm[first:last]
|
---|
272 |
|
---|
273 | def __len__(self):
|
---|
274 | return smmp.mol_i.irsml2[self.__id] - smmp.mol_i.irsml1[self.__id] + 1
|
---|
275 |
|
---|
276 |
|
---|
277 | if __name__ == "__main__":
|
---|
278 | libdir = 'SMMP/'
|
---|
279 |
|
---|
280 | smmp.epar_l.flex = 0
|
---|
281 | smmp.epar_l.sh2 = 0
|
---|
282 | smmp.epar_l.epsd = 0
|
---|
283 | smmp.epar_l.tesgrd = 0
|
---|
284 | smmp.isolty.itysol = 0
|
---|
285 | smmp.init_energy(libdir)
|
---|
286 |
|
---|
287 | p1 = Protein('EXAMPLES/1bdd.pdb')
|
---|
288 | for i in p1.atoms():
|
---|
289 | print i
|
---|
290 | print p1.getDihedral('phi', 11)
|
---|