[e40e335] | 1 | #!/usr/bin/env python
|
---|
| 2 | # protein.py
|
---|
| 3 | #
|
---|
| 4 | # Copyright 2007 Frank Eisenmenger, U.H.E. Hansmann,
|
---|
| 5 | # Jan H. Meinke, Sandipan Mohanty
|
---|
| 6 | #
|
---|
| 7 | import sys
|
---|
[3fbbfbb] | 8 | import smmp_p as smmp
|
---|
[e40e335] | 9 | from math import *
|
---|
| 10 |
|
---|
| 11 | crd = 57.2957795130823
|
---|
| 12 |
|
---|
| 13 | def nursvr(ivr):
|
---|
| 14 | res = 0
|
---|
| 15 | for i in range(smmp.mol_i.ntlml - 1, -1, -1):
|
---|
| 16 | ifirs = smmp.mol_i.irsml1[i] - 1
|
---|
| 17 | if ivr + 1 > smmp.res_i.ivrrs1[ifirs] and smmp.mol_i.nvrml[i] > 0:
|
---|
| 18 | for j in range(smmp.mol_i.irsml2[i] - 1, ifirs - 1, -1):
|
---|
| 19 | if ivr + 1 > smmp.res_i.ivrrs1[j] and smmp.res_i.nvrrs[j] > 0:
|
---|
| 20 | res = j + 1
|
---|
| 21 | return res
|
---|
| 22 | return res
|
---|
| 23 |
|
---|
| 24 | ## Provides read-only access to SMMP's atoms. Usually, you won't initialize
|
---|
| 25 | # an Atom directly since you need to know the internal array index of the
|
---|
| 26 | # Atom in SMMP. You can get a list of all the Atoms in a Protein by using
|
---|
| 27 | # protein.Protein.getAtoms().
|
---|
| 28 | # Various properties are accessible. See the functions' documentations for
|
---|
| 29 | # details.
|
---|
| 30 | class Atom:
|
---|
| 31 | """Atoms are the basic chemical units."""
|
---|
| 32 | #TODO: insert namespace
|
---|
| 33 |
|
---|
| 34 | ## Initialize the atom with its ID. The ID is the array index of the atom
|
---|
| 35 | # in SMMP.
|
---|
| 36 | # @param id index of atom in SMMP
|
---|
| 37 | def __init__(self, id):
|
---|
| 38 | self.__id = id
|
---|
| 39 |
|
---|
| 40 | def position(self):
|
---|
| 41 | """Cartesian coordinates of this atom."""
|
---|
| 42 | res = (smmp.atm_r.xat[self.__id], smmp.atm_r.yat[self.__id], smmp.atm_r.zat[self.__id])
|
---|
| 43 | return res
|
---|
| 44 |
|
---|
| 45 | def charge(self):
|
---|
| 46 | """Partial charge of this atom."""
|
---|
| 47 | return smmp.atm_r.cgat[self.__id]
|
---|
| 48 |
|
---|
| 49 | def radius(self):
|
---|
| 50 | """Van-der-Waals Radius of this atom."""
|
---|
| 51 | return smmp.solvent.rvdw[self.__id]
|
---|
| 52 |
|
---|
| 53 | def name(self):
|
---|
| 54 | return ''.join([smmp.atm_c.nmat[i][self.__id] for i in range(0,4)]).strip()
|
---|
| 55 |
|
---|
| 56 | def __str__(self):
|
---|
| 57 | return "%s at (%s, %s, %s)" % (self.name(), self.position()[0], self.position()[1], self.position()[2])
|
---|
| 58 |
|
---|
| 59 | ## Amino acids bound within a protein are often called residues. Strictly
|
---|
| 60 | # speaking the residue is the part that's connected to the Calpha and not part
|
---|
| 61 | # of the backbone, but we'll include the backbone atoms as well. Just like Atoms
|
---|
| 62 | # Residues reference the corresponding data in SMMP and are not independent.
|
---|
| 63 | class Residue:
|
---|
| 64 | """An amino acid within a Protein."""
|
---|
| 65 |
|
---|
| 66 | ## Initialize the residue with the corresponding index in SMMP.
|
---|
| 67 | # @param id index of residue in SMMP
|
---|
| 68 | def __init__(self, id):
|
---|
| 69 | self.__id = id
|
---|
| 70 |
|
---|
| 71 | def atoms(self):
|
---|
| 72 | """Returns the list of atoms belonging to this residue."""
|
---|
| 73 | first = smmp.res_i.iatrs1[self.__id]
|
---|
| 74 | last = smmp.res_i.iatrs2[self.__id]
|
---|
| 75 | res = [Atom(i) for i in range(first - 1, last - 1)]
|
---|
| 76 | return res
|
---|
| 77 |
|
---|
| 78 |
|
---|
| 79 | ## Proteins are constructed from a sequence file and a configuration file or
|
---|
| 80 | # read from a PDB file.
|
---|
| 81 | class Protein:
|
---|
| 82 | """A protein is a polypeptide made of the 20 naturally occuring amino
|
---|
| 83 | acids.
|
---|
| 84 | """
|
---|
| 85 |
|
---|
| 86 | def __init__(self, seq, var=' '):
|
---|
| 87 | """The structure of the protein is read in from a file.
|
---|
| 88 | Pass the name of a textfile with a sequence string and--optionally--the name of a
|
---|
| 89 | variable file that describes the conformation. Or give the name of pdb
|
---|
| 90 | file---it must end with 'pdb'---that contains both information."""
|
---|
| 91 |
|
---|
| 92 | grpn = 'nh2'
|
---|
| 93 | grpc = 'cooh'
|
---|
| 94 | prevID = smmp.mol_i.ntlml - 1
|
---|
| 95 | if seq[-3:].lower() == 'pdb':
|
---|
| 96 | if smmp.mol_i.ntlml:
|
---|
| 97 | print "Sorry, but I can only read the first protein from a pdb file."
|
---|
| 98 | raise ValueError
|
---|
| 99 | smmp.init_molecule(0, grpn, grpc, seq)
|
---|
| 100 | else:
|
---|
| 101 | smmp.init_molecule(1, grpn, grpc, seq, var)
|
---|
| 102 | self.__id = smmp.mol_i.ntlml - 1
|
---|
| 103 | if self.__id - prevID > 1:
|
---|
| 104 | print "Sorry, I don't know how to deal with multiple proteins in one sequence file, yet."
|
---|
| 105 | raise NotImplementedError
|
---|
| 106 |
|
---|
| 107 | def setOrigin(self, x, a):
|
---|
| 108 | """Sets the position of the N terminal of the protein in the global
|
---|
| 109 | coordinate system and the orientation of the protein. Expects two
|
---|
| 110 | tuples. The first contains the coordinates of the first atom (x, y,
|
---|
| 111 | and z) and the second contains three angles.
|
---|
| 112 | """
|
---|
| 113 | #TODO: Implement type checking
|
---|
| 114 | smmp.mol_r.gbpr[0][self.__id] = x[0]
|
---|
| 115 | smmp.mol_r.gbpr[1][self.__id] = x[1]
|
---|
| 116 | smmp.mol_r.gbpr[2][self.__id] = x[2]
|
---|
| 117 | smmp.mol_r.gbpr[3][self.__id] = a[0]
|
---|
| 118 | smmp.mol_r.gbpr[4][self.__id] = a[1]
|
---|
| 119 | smmp.mol_r.gbpr[5][self.__id] = a[2]
|
---|
| 120 |
|
---|
| 121 | def origin(self):
|
---|
| 122 | r = [smmp.mol_r.gbpr[i][self.__id] for i in range(0, 3)]
|
---|
| 123 | a = [smmp.mol_r.gbpr[i][self.__id] for i in range(3, 6)]
|
---|
| 124 | return r, a
|
---|
| 125 |
|
---|
| 126 | def getDihedral(self, angle, residue):
|
---|
| 127 | try:
|
---|
| 128 | res = int(residue)
|
---|
| 129 | idx = res
|
---|
| 130 | except ValueError:
|
---|
| 131 | res = residue.split()
|
---|
| 132 | res[0] = res[0].lower()
|
---|
| 133 | res[1] = int(res[1])
|
---|
| 134 | idx = self.findResidue(res[0], res[1])
|
---|
| 135 | varIdx = ''
|
---|
| 136 | for i in range(smmp.res_i.ivrrs1[idx], smmp.res_i.ivrrs1[idx] + smmp.res_i.nvrrs[idx]):
|
---|
| 137 | varName = ''.join(list([str(smmp.var_c.nmvr[j][i]) for j in range(0, 3)]))
|
---|
| 138 | if varName == angle:
|
---|
| 139 | varIdx = i
|
---|
| 140 | break
|
---|
| 141 | if varIdx:
|
---|
| 142 | return smmp.var_r.vlvr[varIdx] * crd
|
---|
| 143 | raise KeyError, '%s not found for %s.' % (angle, residue)
|
---|
| 144 |
|
---|
| 145 | ## Residues are given by their 3-letter abbreviation and their index.
|
---|
| 146 | # For example, findResidue('gly', 5) returns the index of the fifth occurence
|
---|
| 147 | # of glycine in the protein. If the residue is not found an exception is raised.
|
---|
| 148 | #
|
---|
| 149 | # @param name 3-letter abbreviation of the residue
|
---|
| 150 | # @param idx index of occurence of the amino acid given in name.
|
---|
| 151 |
|
---|
| 152 | def findResidue(self, name, idx = 1):
|
---|
| 153 | """Find the idxth residue of type name and return the index."""
|
---|
| 154 | res = ''.join(self.residues()).lower()
|
---|
| 155 | name = name.lower()
|
---|
| 156 | j = 0
|
---|
| 157 | try:
|
---|
| 158 | for i in range(0, idx):
|
---|
| 159 | k = res.index(name, j)
|
---|
| 160 | j = j + k
|
---|
| 161 | except:
|
---|
| 162 | raise KeyError, "%s%s not found." % (name, idx)
|
---|
| 163 | if j >=len(res):
|
---|
| 164 | raise KeyError, "%s%s not found." % (name, idx)
|
---|
| 165 | return j / 3
|
---|
| 166 |
|
---|
| 167 | def atoms(self):
|
---|
| 168 | """Returns the list of atoms belonging to this protein."""
|
---|
| 169 | first = smmp.res_i.iatrs1[smmp.mol_i.irsml1[self.__id] - 1]
|
---|
| 170 | last = smmp.res_i.iatrs2[smmp.mol_i.irsml2[self.__id] - 1]
|
---|
| 171 | res = [Atom(i) for i in range(first - 1, last - 1)]
|
---|
| 172 | return res
|
---|
| 173 |
|
---|
| 174 | def residues(self):
|
---|
| 175 | """Returns a list of the amino acids in this protein."""
|
---|
| 176 | seq = [str(c[0]) for c in smmp.res_c.seq]
|
---|
| 177 | first = smmp.mol_i.irsml1[self.__id]- 1
|
---|
| 178 | last = smmp.mol_i.irsml2[self.__id] - 1
|
---|
| 179 | return ''.join(seq).split()[first:last]
|
---|
| 180 |
|
---|
| 181 | def id(self):
|
---|
| 182 | return self.__id
|
---|
| 183 |
|
---|
| 184 | def strand(self):
|
---|
| 185 | """Returns the number of residues in a beta-strand-like configuration."""
|
---|
| 186 | maxDev = 30.0
|
---|
| 187 | phiSheet = -150.0
|
---|
| 188 | psiSheet = 150.0
|
---|
| 189 | strandCount = 0
|
---|
| 190 | ifivr = smmp.mol_i.ivrml1[self.__id]
|
---|
| 191 | i = 0
|
---|
| 192 | iv = smmp.var_i.idvr[i]
|
---|
| 193 | while iv < ifivr:
|
---|
| 194 | i += 1
|
---|
| 195 | iv = smmp.var_i.idvr[i]
|
---|
| 196 | for j in range(i, ifivr + smmp.mol_i.nvrml[self.__id]):
|
---|
| 197 | iv = smmp.var_i.idvr[j] - 1
|
---|
| 198 | name =''. join([str(smmp.var_c.nmvr[iv * 3 + k][0]) for k in range(0,3)])
|
---|
| 199 | if name == 'phi':
|
---|
| 200 | phi = smmp.var_r.vlvr[iv] * crd
|
---|
| 201 | psi = smmp.var_r.vlvr[smmp.var_i.idvr[j+1] - 1] * crd
|
---|
| 202 | if abs(phi - phiSheet) < maxDev and abs(psi - psiSheet) < maxDev:
|
---|
| 203 | strandCount += 1
|
---|
| 204 | return strandCount
|
---|
| 205 |
|
---|
| 206 | def directionVector(self):
|
---|
| 207 | """Calculates the normalized end-to-end vector for this molecule.
|
---|
| 208 | The first and last c_alpha carbon is used as reference."""
|
---|
| 209 | atoms = self.atoms()
|
---|
| 210 | beginning = []
|
---|
| 211 | for a in atoms:
|
---|
| 212 | if a.name().strip() == 'ca':
|
---|
| 213 | if beginning:
|
---|
| 214 | end = a.position()
|
---|
| 215 | else:
|
---|
| 216 | beginning = a.position()
|
---|
| 217 | res = [end[i] - beginning[i] for i in range(0,3)]
|
---|
| 218 | sum = sqrt(reduce(lambda x, y: x + y ** 2, res))
|
---|
| 219 | res = map(lambda x: x / sum, res)
|
---|
| 220 | return res
|
---|
| 221 |
|
---|
| 222 | def contacts(self):
|
---|
| 223 | """Returns a triplet containing the total number of contacts, the number
|
---|
| 224 | of native contacts, and the hemming distance. The latter two quantities
|
---|
| 225 | are only meaningful if a reference structure has been defined.
|
---|
| 226 | """
|
---|
| 227 | return smmp.contacts()
|
---|
| 228 |
|
---|
| 229 | def hbond(self):
|
---|
| 230 | return smmp.hbond(self.__id + 1, 0)
|
---|
| 231 |
|
---|
| 232 | def energy(self):
|
---|
| 233 | return smmp.energy()
|
---|
| 234 |
|
---|
| 235 | def rgyr(self):
|
---|
| 236 | return smmp.rgyr(self.__id + 1)
|
---|
| 237 |
|
---|
| 238 | def rmsdfun(self):
|
---|
| 239 | return smmp.rmsdfun()
|
---|
| 240 |
|
---|
| 241 | def savePDB(self, file=""):
|
---|
| 242 | """Write the current configuration to file using PDB formatting."""
|
---|
| 243 | smmp.outpdb(self.__id, file)
|
---|
| 244 |
|
---|
| 245 | def saveVariables(self, file=""):
|
---|
| 246 | """Output the current conformation in the variable format. The output
|
---|
| 247 | can be read in again and used to restart/continue a simulation.
|
---|
| 248 |
|
---|
| 249 | @param file if empty write to standard out
|
---|
| 250 | """
|
---|
| 251 | # Same functionality as smmp.outvar(self.__id + 1, 0)
|
---|
| 252 | if not file:
|
---|
| 253 | out = sys.stdout
|
---|
| 254 | else:
|
---|
| 255 | out = open(file, 'w')
|
---|
| 256 | for i in range(smmp.mol_i.ivrml1[self.__id] - 1 , smmp.mol_i.ivrml1[self.__id] + smmp.mol_i.nvrml[self.__id] - 1):
|
---|
| 257 | if not smmp.var_l.fxvr[i]:
|
---|
| 258 | name = ''.join([str(smmp.var_c.nmvr[i * 3 + j][0]) for j in range(0,3)])
|
---|
| 259 | print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd
|
---|
| 260 | else:
|
---|
| 261 | it=smmp.var_i.ityvr[i]
|
---|
| 262 | if it == 3:
|
---|
| 263 | print >>out, nursvr(i), ':', name, ':', smmp.var_r.vlvr[i] * crd, ' &'
|
---|
| 264 |
|
---|
| 265 | def zimmer(self):
|
---|
[4e219a3] | 266 | first = smmp.mol_i.irsml1[self.__id] - 1
|
---|
| 267 | last = smmp.mol_i.irsml2[self.__id]
|
---|
| 268 | nrs = smmp.mol_i.irsml2[smmp.mol_i.ntlml - 1] - 1
|
---|
[e40e335] | 269 | smmp.zimmer(last)
|
---|
| 270 | return smmp.zimme.zimm[first:last]
|
---|
| 271 |
|
---|
| 272 | def __len__(self):
|
---|
[4e219a3] | 273 | return smmp.mol_i.irsml2[self.__id] - smmp.mol_i.irsml1[self.__id] + 1
|
---|
[e40e335] | 274 |
|
---|
| 275 |
|
---|
| 276 | if __name__ == "__main__":
|
---|
| 277 | libdir = 'SMMP/'
|
---|
| 278 |
|
---|
| 279 | smmp.epar_l.flex = 0
|
---|
| 280 | smmp.epar_l.sh2 = 0
|
---|
| 281 | smmp.epar_l.epsd = 0
|
---|
| 282 | smmp.epar_l.tesgrd = 0
|
---|
| 283 | smmp.isolty.itysol = 0
|
---|
| 284 | smmp.init_energy(libdir)
|
---|
| 285 |
|
---|
| 286 | p1 = Protein('EXAMPLES/1bdd.pdb')
|
---|
| 287 | for i in p1.atoms():
|
---|
| 288 | print i
|
---|
| 289 | print p1.getDihedral('phi', 11)
|
---|