1 | #!/usr/bin/env python
|
---|
2 | # protein.py
|
---|
3 | #
|
---|
4 | # Copyright 2007 Frank Eisenmenger, U.H.E. Hansmann,
|
---|
5 | # Jan H. Meinke, Sandipan Mohanty
|
---|
6 | #
|
---|
7 | import sys
|
---|
8 | import smmp
|
---|
9 | from math import *
|
---|
10 |
|
---|
11 | crd = 57.2957795130823
|
---|
12 |
|
---|
13 | def nursvr(ivr):
|
---|
14 | res = 0
|
---|
15 | for i in range(smmp.mol_i.ntlml - 1, -1, -1):
|
---|
16 | ifirs = smmp.mol_i.irsml1[i] - 1
|
---|
17 | if ivr + 1 > smmp.res_i.ivrrs1[ifirs] and smmp.mol_i.nvrml[i] > 0:
|
---|
18 | for j in range(smmp.mol_i.irsml2[i] - 1, ifirs - 1, -1):
|
---|
19 | if ivr + 1 > smmp.res_i.ivrrs1[j] and smmp.res_i.nvrrs[j] > 0:
|
---|
20 | res = j + 1
|
---|
21 | return res
|
---|
22 | return res
|
---|
23 |
|
---|
24 | ## Provides read-only access to SMMP's atoms. Usually, you won't initialize
|
---|
25 | # an Atom directly since you need to know the internal array index of the
|
---|
26 | # Atom in SMMP. You can get a list of all the Atoms in a Protein by using
|
---|
27 | # protein.Protein.getAtoms().
|
---|
28 | # Various properties are accessible. See the functions' documentations for
|
---|
29 | # details.
|
---|
30 | class Atom:
|
---|
31 | """Atoms are the basic chemical units."""
|
---|
32 | #TODO: insert namespace
|
---|
33 |
|
---|
34 | ## Initialize the atom with its ID. The ID is the array index of the atom
|
---|
35 | # in SMMP.
|
---|
36 | # @param id index of atom in SMMP
|
---|
37 | def __init__(self, id):
|
---|
38 | self.__id = id
|
---|
39 |
|
---|
40 | def position(self):
|
---|
41 | """Cartesian coordinates of this atom."""
|
---|
42 | res = (smmp.atm_r.xat[self.__id], smmp.atm_r.yat[self.__id], smmp.atm_r.zat[self.__id])
|
---|
43 | return res
|
---|
44 |
|
---|
45 | def charge(self):
|
---|
46 | """Partial charge of this atom."""
|
---|
47 | return smmp.atm_r.cgat[self.__id]
|
---|
48 |
|
---|
49 | def radius(self):
|
---|
50 | """Van-der-Waals Radius of this atom."""
|
---|
51 | return smmp.solvent.rvdw[self.__id]
|
---|
52 |
|
---|
53 | def name(self):
|
---|
54 | return ''.join([smmp.atm_c.nmat[i][self.__id] for i in range(0,4)]).strip()
|
---|
55 |
|
---|
56 | def __str__(self):
|
---|
57 | return "%s at (%s, %s, %s)" % (self.name(), self.position()[0], self.position()[1], self.position()[2])
|
---|
58 |
|
---|
59 | ## Amino acids bound within a protein are often called residues. Strictly
|
---|
60 | # speaking the residue is the part that's connected to the Calpha and not part
|
---|
61 | # of the backbone, but we'll include the backbone atoms as well. Just like Atoms
|
---|
62 | # Residues reference the corresponding data in SMMP and are not independent.
|
---|
63 | class Residue:
|
---|
64 | """An amino acid within a Protein."""
|
---|
65 |
|
---|
66 | ## Initialize the residue with the corresponding index in SMMP.
|
---|
67 | # @param id index of residue in SMMP
|
---|
68 | def __init__(self, id):
|
---|
69 | self.__id = id
|
---|
70 |
|
---|
71 | def atoms(self):
|
---|
72 | """Returns the list of atoms belonging to this residue."""
|
---|
73 | first = smmp.res_i.iatrs1[self.__id]
|
---|
74 | last = smmp.res_i.iatrs2[self.__id]
|
---|
75 | res = [Atom(i) for i in range(first - 1, last - 1)]
|
---|
76 | return res
|
---|
77 |
|
---|
78 |
|
---|
79 | ## Proteins are constructed from a sequence file and a configuration file or
|
---|
80 | # read from a PDB file.
|
---|
81 | class Protein:
|
---|
82 | """A protein is a polypeptide made of the 20 naturally occuring amino
|
---|
83 | acids.
|
---|
84 | """
|
---|
85 |
|
---|
86 | def __init__(self, seq, var=' '):
|
---|
87 | """The structure of the protein is read in from a file.
|
---|
88 | Pass the name of a textfile with a sequence string and--optionally--the name of a
|
---|
89 | variable file that describes the conformation. Or give the name of pdb
|
---|
90 | file---it must end with 'pdb'---that contains both information."""
|
---|
91 |
|
---|
92 | grpn = 'nh2'
|
---|
93 | grpc = 'cooh'
|
---|
94 | prevID = smmp.mol_i.ntlml - 1
|
---|
95 | if seq[-3:].lower() == 'pdb':
|
---|
96 | if smmp.mol_i.ntlml:
|
---|
97 | print "Sorry, but I can only read the first protein from a pdb file."
|
---|
98 | raise ValueError
|
---|
99 | smmp.init_molecule(0, grpn, grpc, seq)
|
---|
100 | else:
|
---|
101 | smmp.init_molecule(1, grpn, grpc, seq, var)
|
---|
102 | self.__id = smmp.mol_i.ntlml - 1
|
---|
103 | if self.__id - prevID > 1:
|
---|
104 | print "Sorry, I don't know how to deal with multiple proteins in one sequence file, yet."
|
---|
105 | raise NotImplementedError
|
---|
106 |
|
---|
107 | def setOrigin(self, x, a):
|
---|
108 | """Sets the position of the N terminal of the protein in the global
|
---|
109 | coordinate system and the orientation of the protein. Expects two
|
---|
110 | tuples. The first contains the coordinates of the first atom (x, y,
|
---|
111 | and z) and the second contains three angles.
|
---|
112 | """
|
---|
113 | #TODO: Implement type checking
|
---|
114 | smmp.mol_r.gbpr[0][self.__id] = x[0]
|
---|
115 | smmp.mol_r.gbpr[1][self.__id] = x[1]
|
---|
116 | smmp.mol_r.gbpr[2][self.__id] = x[2]
|
---|
117 | smmp.mol_r.gbpr[3][self.__id] = a[0]
|
---|
118 | smmp.mol_r.gbpr[4][self.__id] = a[1]
|
---|
119 | smmp.mol_r.gbpr[5][self.__id] = a[2]
|
---|
120 |
|
---|
121 | def origin(self):
|
---|
122 | r = [smmp.mol_r.gbpr[i][self.__id] for i in range(0, 3)]
|
---|
123 | a = [smmp.mol_r.gbpr[i][self.__id] for i in range(3, 6)]
|
---|
124 | return r, a
|
---|
125 |
|
---|
126 | def getDihedral(self, angle, residue):
|
---|
127 | try:
|
---|
128 | res = int(residue)
|
---|
129 | idx = res
|
---|
130 | except ValueError:
|
---|
131 | res = residue.split()
|
---|
132 | res[0] = res[0].lower()
|
---|
133 | res[1] = int(res[1])
|
---|
134 | idx = self.findResidue(res[0], res[1])
|
---|
135 | varIdx = ''
|
---|
136 | for i in range(smmp.res_i.ivrrs1[idx], smmp.res_i.ivrrs1[idx] + smmp.res_i.nvrrs[idx]):
|
---|
137 | varName = ''.join(list([str(smmp.var_c.nmvr[j][i]) for j in range(0, 3)]))
|
---|
138 | if varName == angle:
|
---|
139 | varIdx = i
|
---|
140 | break
|
---|
141 | if varIdx:
|
---|
142 | return smmp.var_r.vlvr[varIdx] * crd
|
---|
143 | raise KeyError, '%s not found for %s.' % (angle, residue)
|
---|
144 |
|
---|
145 | ## Residues are given by their 3-letter abbreviation and their index.
|
---|
146 | # For example, findResidue('gly', 5) returns the index of the fifth occurence
|
---|
147 | # of glycine in the protein. If the residue is not found an exception is raised.
|
---|
148 | #
|
---|
149 | # @param name 3-letter abbreviation of the residue
|
---|
150 | # @param idx index of occurence of the amino acid given in name.
|
---|
151 |
|
---|
152 | def findResidue(self, name, idx = 1):
|
---|
153 | """Find the idxth residue of type name and return the index."""
|
---|
154 | res = ''.join(self.residues()).lower()
|
---|
155 | name = name.lower()
|
---|
156 | j = 0
|
---|
157 | try:
|
---|
158 | for i in range(0, idx):
|
---|
159 | k = res.index(name, j)
|
---|
160 | j = j + k
|
---|
161 | except:
|
---|
162 | raise KeyError, "%s%s not found." % (name, idx)
|
---|
163 | if j >=len(res):
|
---|
164 | raise KeyError, "%s%s not found." % (name, idx)
|
---|
165 | return j / 3
|
---|
166 |
|
---|
167 | def atoms(self):
|
---|
168 | """Returns the list of atoms belonging to this protein."""
|
---|
169 | first = smmp.res_i.iatrs1[smmp.mol_i.irsml1[self.__id] - 1]
|
---|
170 | last = smmp.res_i.iatrs2[smmp.mol_i.irsml2[self.__id] - 1]
|
---|
171 | res = [Atom(i) for i in range(first - 1, last - 1)]
|
---|
172 | return res
|
---|
173 |
|
---|
174 | def residues(self):
|
---|
175 | """Returns a list of the amino acids in this protein."""
|
---|
176 | seq = [str(c[0]) for c in smmp.res_c.seq]
|
---|
177 | first = smmp.mol_i.irsml1[self.__id]- 1
|
---|
178 | last = smmp.mol_i.irsml2[self.__id] - 1
|
---|
179 | return ''.join(seq).split()[first:last]
|
---|
180 |
|
---|
181 | def id(self):
|
---|
182 | return self.__id
|
---|
183 |
|
---|
184 | def strand(self):
|
---|
185 | """Returns the number of residues in a beta-strand-like configuration."""
|
---|
186 | maxDev = 30.0
|
---|
187 | phiSheet = -150.0
|
---|
188 | psiSheet = 150.0
|
---|
189 | strandCount = 0
|
---|
190 | ifivr = smmp.mol_i.ivrml1[self.__id]
|
---|
191 | i = 0
|
---|
192 | iv = smmp.var_i.idvr[i]
|
---|
193 | while iv < ifivr:
|
---|
194 | i += 1
|
---|
195 | iv = smmp.var_i.idvr[i]
|
---|
196 | for j in range(i, ifivr + smmp.mol_i.nvrml[self.__id]):
|
---|
197 | iv = smmp.var_i.idvr[j] - 1
|
---|
198 | name =''. join([str(smmp.var_c.nmvr[iv * 3 + k][0]) for k in range(0,3)])
|
---|
199 | if name == 'phi':
|
---|
200 | phi = smmp.var_r.vlvr[iv] * crd
|
---|
201 | psi = smmp.var_r.vlvr[smmp.var_i.idvr[j+1] - 1] * crd
|
---|
202 | if abs(phi - phiSheet) < maxDev and abs(psi - psiSheet) < maxDev:
|
---|
203 | strandCount += 1
|
---|
204 | return strandCount
|
---|
205 |
|
---|
206 | def directionVector(self):
|
---|
207 | """Calculates the normalized end-to-end vector for this molecule.
|
---|
208 | The first and last c_alpha carbon is used as reference."""
|
---|
209 | atoms = self.atoms()
|
---|
210 | beginning = []
|
---|
211 | for a in atoms:
|
---|
212 | if a.name().strip() == 'ca':
|
---|
213 | if beginning:
|
---|
214 | end = a.position()
|
---|
215 | else:
|
---|
216 | beginning = a.position()
|
---|
217 | res = [end[i] - beginning[i] for i in range(0,3)]
|
---|
218 | sum = sqrt(reduce(lambda x, y: x + y ** 2, res))
|
---|
219 | res = map(lambda x: x / sum, res)
|
---|
220 | return res
|
---|
221 |
|
---|
222 | def contacts(self):
|
---|
223 | """Returns a triplet containing the total number of contacts, the number
|
---|
224 | of native contacts, and the hemming distance. The latter two quantities
|
---|
225 | are only meaningful if a reference structure has been defined.
|
---|
226 | """
|
---|
227 | return smmp.contacts()
|
---|
228 |
|
---|
229 | def hbond(self):
|
---|
230 | return smmp.hbond(self.__id + 1, 0)
|
---|
231 |
|
---|
232 | def energy(self):
|
---|
233 | return smmp.energy()
|
---|
234 |
|
---|
235 | def rgyr(self):
|
---|
236 | return smmp.rgyr(self.__id + 1)
|
---|
237 |
|
---|
238 | def rmsdfun(self):
|
---|
239 | return smmp.rmsdfun()
|
---|
240 |
|
---|
241 | def savePDB(self, file=""):
|
---|
242 | """Write the current configuration to file using PDB formatting."""
|
---|
243 | smmp.outpdb(self.__id, file)
|
---|
244 |
|
---|
245 | def saveVariables(self, file=""):
|
---|
246 | """Output the current conformation in the variable format. The output
|
---|
247 | can be read in again and used to restart/continue a simulation.
|
---|
248 |
|
---|
249 | @param file if empty write to standard out
|
---|
250 | """
|
---|
251 | # Same functionality as smmp.outvar(self.__id + 1, 0)
|
---|
252 | if not file:
|
---|
253 | out = sys.stdout
|
---|
254 | else:
|
---|
255 | out = open(file, 'w')
|
---|
256 | for i in range(smmp.mol_i.ivrml1[self.__id] - 1 , smmp.mol_i.ivrml1[self.__id] + smmp.mol_i.nvrml[self.__id] - 1):
|
---|
257 | if not smmp.var_l.fxvr[i]:
|
---|
258 | name = ''.join([str(smmp.var_c.nmvr[i * 3 + j][0]) for j in range(0,3)])
|
---|
259 | print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd
|
---|
260 | else:
|
---|
261 | it=smmp.var_i.ityvr[i]
|
---|
262 | if it == 3:
|
---|
263 | print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd, ' &'
|
---|
264 |
|
---|
265 | def zimmer(self):
|
---|
266 | first = smmp.mol_i.irsml1[self.__id] - 1
|
---|
267 | last = smmp.mol_i.irsml2[self.__id]
|
---|
268 | nrs = smmp.mol_i.irsml2[smmp.mol_i.ntlml - 1] - 1
|
---|
269 | smmp.zimmer(last)
|
---|
270 | return smmp.zimme.zimm[first:last]
|
---|
271 |
|
---|
272 | def __len__(self):
|
---|
273 | return smmp.mol_i.irsml2[self.__id] - smmp.mol_i.irsml1[self.__id] + 1
|
---|
274 |
|
---|
275 |
|
---|
276 | if __name__ == "__main__":
|
---|
277 | libdir = 'SMMP/'
|
---|
278 |
|
---|
279 | smmp.epar_l.flex = 0
|
---|
280 | smmp.epar_l.sh2 = 0
|
---|
281 | smmp.epar_l.epsd = 0
|
---|
282 | smmp.epar_l.tesgrd = 0
|
---|
283 | smmp.isolty.itysol = 0
|
---|
284 | smmp.init_energy(libdir)
|
---|
285 |
|
---|
286 | p1 = Protein('EXAMPLES/1bdd.pdb')
|
---|
287 | for i in p1.atoms():
|
---|
288 | print i
|
---|
289 | print p1.getDihedral('phi', 11)
|
---|