001/* 002 * BioJava development code 003 * 004 * This code may be freely distributed and modified under the 005 * terms of the GNU Lesser General Public Licence. This should 006 * be distributed with the code. If you do not have a copy, 007 * see: 008 * 009 * http://www.gnu.org/copyleft/lesser.html 010 * 011 * Copyright for this code is held jointly by the individual 012 * authors. These should be listed in @author doc comments. 013 * 014 * For more information on the BioJava project and its aims, 015 * or to join the biojava-l mailing list, visit the home page 016 * at: 017 * 018 * http://www.biojava.org/ 019 * 020 * Created on Jan 4, 2006 021 * 022 */ 023package org.biojava.bio.structure; 024 025import java.io.StringWriter; 026import java.util.ArrayList; 027import java.util.HashMap; 028import java.util.HashSet; 029import java.util.Iterator; 030import java.util.LinkedHashSet; 031import java.util.List; 032import java.util.Map; 033import java.util.Set; 034 035import java.util.logging.Level; 036import java.util.logging.Logger; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039 040import org.biojava.bio.structure.align.util.AtomCache; 041 042 043/** 044 * A class that provides some tool methods. 045 * 046 * @author Andreas Prlic, Jules Jacobsen 047 * @since 1.0 048 * @version %I% %G% 049 */ 050public class StructureTools { 051 052 /** The Atom name of C-alpha atoms. 053 * 054 */ 055 public static final String caAtomName = " CA "; 056 057 public static final String nAtomName = "N"; 058 059 public static final String oAtomName = "O"; 060 061 public static final String cbAtomName = "CB"; 062 063 064 /** The names of the Atoms that form the backbone. 065 * 066 */ 067 public static final String[] backboneAtomNames = {nAtomName,caAtomName,"C",oAtomName, cbAtomName}; 068 069 public static final Character UNKNOWN_GROUP_LABEL = new Character('x');; 070 071 072 //private static final String insertionCodeRegExp = "([0-9]+)([a-zA-Z]*)"; 073 //private static final Pattern insertionCodePattern = Pattern.compile(insertionCodeRegExp); 074 075 076 // there is a file format change in PDB 3.0 and nucleotides are being renamed 077 static private Map<String, Integer> nucleotides30 ; 078 static private Map<String, Integer> nucleotides23 ; 079 080 //amino acid 3 and 1 letter code definitions 081 private static final Map<String, Character> aminoAcids; 082 083 private static final Set<Element> hBondDonorAcceptors; 084 // // for conversion 3code 1code 085 // private static SymbolTokenization threeLetter ; 086 // private static SymbolTokenization oneLetter ; 087 088 public static Logger logger = Logger.getLogger("org.biojava.bio.structure"); 089 090 /** 091 * Pattern to describe subranges. Matches "A", "A:", "A:7-53","A_7-53", etc. 092 * @see #getSubRanges(Structure, String) 093 */ 094 public static final Pattern pdbNumRangeRegex = Pattern.compile( 095 "^\\s*(\\w)" + //chain ID 096 "(?:" + //begin range, this is a "non-capturing group" 097 "(?::|_|:$|_$|$)" + //colon or underscore, could be at the end of a line, another non-capt. group. 098 "(?:"+ // another non capturing group for the residue range 099 "([-+]?[0-9]+[A-Za-z]?)" + // first residue 100 "\\s*-\\s*" + // - 101 "([-+]?[0-9]+[A-Za-z]?)" + // second residue 102 ")?+"+ 103 ")?" + //end range 104 "\\s*"); 105 106 107 static { 108 nucleotides30 = new HashMap<String,Integer>(); 109 nucleotides30.put("DA",1); 110 nucleotides30.put("DC",1); 111 nucleotides30.put("DG",1); 112 nucleotides30.put("DT",1); 113 nucleotides30.put("DI",1); 114 nucleotides30.put("A",1); 115 nucleotides30.put("G",1); 116 nucleotides30.put("C",1); 117 nucleotides30.put("U",1); 118 nucleotides30.put("I",1); 119 120 //TODO: check if they are always HETATMs, in that case this will not be necessary 121 // the DNA linkers - the +C , +G, +A +T +U and +I have been replaced with these: 122 nucleotides30.put("TAF",1); // 2'-DEOXY-2'-FLUORO-ARABINO-FURANOSYL THYMINE-5'-PHOSPHATE 123 nucleotides30.put("TC1",1); // 3-(5-PHOSPHO-2-DEOXY-BETA-D-RIBOFURANOSYL)-2-OXO-1,3-DIAZA-PHENOTHIAZINE 124 nucleotides30.put("TFE",1); // 2'-O-[2-(TRIFLUORO)ETHYL] THYMIDINE-5'-MONOPHOSPHATE 125 nucleotides30.put("TFO",1); // [2-(6-AMINO-9H-PURIN-9-YL)-1-METHYLETHOXY]METHYLPHOSPHONIC ACID" 126 nucleotides30.put("TGP",1); // 5'-THIO-2'-DEOXY-GUANOSINE PHOSPHONIC ACID 127 nucleotides30.put("THX",1); // PHOSPHONIC ACID 6-({6-[6-(6-CARBAMOYL-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONYL)-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDOCENE-2-CARBONYL]-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONL}-AMINO)-HEXYL ESTER 5-(5-METHYL-2,4-DIOXO-3,4-DIHYDRO-2H-PYRIMIDIN-1-YL)-TETRAHYDRO-FURAN-2-YLMETHYL ESTER 128 nucleotides30.put("TLC",1); // 2-O,3-ETHDIYL-ARABINOFURANOSYL-THYMINE-5'-MONOPHOSPHATE 129 nucleotides30.put("TLN",1); // [(1R,3R,4R,7S)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL DIHYDROGEN PHOSPHATE" 130 nucleotides30.put("TP1",1); // 2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE 131 nucleotides30.put("TPC",1); // 5'-THIO-2'-DEOXY-CYTOSINE PHOSPHONIC ACID 132 nucleotides30.put("TPN",1); // 2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE 133 134 135 136 // store nucleic acids (C, G, A, T, U, and I), and 137 // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), and 138 nucleotides23 = new HashMap<String,Integer>(); 139 String[] names = {"C","G","A","T","U","I","+C","+G","+A","+T","+U","+I"}; 140 for (int i = 0; i < names.length; i++) { 141 String n = names[i]; 142 nucleotides23.put(n,1); 143 } 144 145 aminoAcids = new HashMap<String, Character>(); 146 aminoAcids.put("GLY", new Character('G')); 147 aminoAcids.put("ALA", new Character('A')); 148 aminoAcids.put("VAL", new Character('V')); 149 aminoAcids.put("LEU", new Character('L')); 150 aminoAcids.put("ILE", new Character('I')); 151 aminoAcids.put("PHE", new Character('F')); 152 aminoAcids.put("TYR", new Character('Y')); 153 aminoAcids.put("TRP", new Character('W')); 154 aminoAcids.put("PRO", new Character('P')); 155 aminoAcids.put("HIS", new Character('H')); 156 aminoAcids.put("LYS", new Character('K')); 157 aminoAcids.put("ARG", new Character('R')); 158 aminoAcids.put("SER", new Character('S')); 159 aminoAcids.put("THR", new Character('T')); 160 aminoAcids.put("GLU", new Character('E')); 161 aminoAcids.put("GLN", new Character('Q')); 162 aminoAcids.put("ASP", new Character('D')); 163 aminoAcids.put("ASN", new Character('N')); 164 aminoAcids.put("CYS", new Character('C')); 165 aminoAcids.put("MET", new Character('M')); 166 //MSE is only found as a molecular replacement for MET 167 aminoAcids.put("MSE", new Character('M')); 168 //'non-standard', genetically encoded 169 //http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html 170 //IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE' 171 //likewise 'PYL' (IUBMB) and 'PYH' (PDB) 172 aminoAcids.put("CSE", new Character('U')); 173 aminoAcids.put("SEC", new Character('U')); 174 aminoAcids.put("PYH", new Character('O')); 175 aminoAcids.put("PYL", new Character('O')); 176 177 hBondDonorAcceptors = new HashSet<Element>(); 178 hBondDonorAcceptors.add(Element.N); 179 hBondDonorAcceptors.add(Element.O); 180 hBondDonorAcceptors.add(Element.S); 181 182 } 183 184 185 /** Count how many number of Atoms are contained within a Structure object. 186 * 187 * @param s the structure object 188 * @return the number of Atoms in this Structure 189 */ 190 public static final int getNrAtoms(Structure s){ 191 192 int nrAtoms = 0; 193 194 Iterator<Group> iter = new GroupIterator(s); 195 196 while ( iter.hasNext()){ 197 Group g = (Group) iter.next(); 198 nrAtoms += g.size(); 199 } 200 201 return nrAtoms; 202 } 203 204 205 /** Count how many groups are contained within a structure object. 206 * 207 * @param s the structure object 208 * @return the number of groups in the structure 209 */ 210 public static final int getNrGroups(Structure s){ 211 int nrGroups = 0; 212 213 List<Chain> chains = s.getChains(0); 214 Iterator<Chain> iter = chains.iterator(); 215 while (iter.hasNext()){ 216 Chain c = (Chain) iter.next(); 217 nrGroups += c.getAtomLength(); 218 } 219 return nrGroups; 220 } 221 222 223 /** Returns an array of the requested Atoms from the Structure object. Iterates over all groups 224 * and checks if the requested atoms are in this group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl} group. 225 * For structures with more than one model, only model 0 will be used. 226 * 227 * @param s the structure to get the atoms from 228 * 229 * @param atomNames contains the atom names to be used. 230 * @return an Atom[] array 231 */ 232 public static final Atom[] getAtomArray(Structure s, String[] atomNames){ 233 List<Chain> chains = s.getModel(0); 234 235 List<Atom> atoms = new ArrayList<Atom>(); 236 237 extractCAatoms(atomNames, chains, atoms); 238 return (Atom[]) atoms.toArray(new Atom[atoms.size()]); 239 240 } 241 242 /** Returns an array of the requested Atoms from the Structure object. 243 * In contrast to {@link #getAtomArray(Structure, String[])} this method iterates over all chains. 244 * Iterates over all chains and groups 245 * and checks if the requested atoms are in this group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl} group. 246 * For structures with more than one model, only model 0 will be used. 247 * 248 * @param s the structure to get the atoms from 249 * 250 * @param atomNames contains the atom names to be used. 251 * @return an Atom[] array 252 */ 253 public static final Atom[] getAtomArrayAllModels(Structure s, String[] atomNames){ 254 255 List<Atom> atoms = new ArrayList<Atom>(); 256 257 for (int i =0 ; i < s.nrModels(); i++ ) { 258 List<Chain> chains = s.getModel(i); 259 extractCAatoms(atomNames, chains, atoms); 260 } 261 return (Atom[]) atoms.toArray(new Atom[atoms.size()]); 262 263 } 264 265 266 267 private static void extractCAatoms(String[] atomNames, List<Chain> chains, 268 List<Atom> atoms) { 269 for ( Chain c : chains) { 270 271 for ( Group g : c.getAtomGroups()) { 272 273 // a temp container for the atoms of this group 274 List<Atom> thisGroupAtoms = new ArrayList<Atom>(); 275 // flag to check if this group contains all the requested atoms. 276 boolean thisGroupAllAtoms = true; 277 for ( int i = 0 ; i < atomNames.length; i++){ 278 String atomName = atomNames[i]; 279 try { 280 Atom a = g.getAtom(atomName); 281 thisGroupAtoms.add(a); 282 } catch (StructureException e){ 283 // this group does not have a required atom, skip it... 284 thisGroupAllAtoms = false; 285 break; 286 } 287 } 288 if ( thisGroupAllAtoms){ 289 // add the atoms of this group to the array. 290 Iterator<Atom> aIter = thisGroupAtoms.iterator(); 291 while(aIter.hasNext()){ 292 Atom a = (Atom) aIter.next(); 293 atoms.add(a); 294 } 295 } 296 297 } 298 } 299 } 300 301 /** Returns an array of the requested Atoms from the Structure object. Iterates over all groups 302 * and checks if the requested atoms are in this group, no matter if this is a AminoAcid or Hetatom group. 303 * 304 * 305 * @param c the Chain to get the atoms from 306 * 307 * @param atomNames contains the atom names to be used. 308 * @return an Atom[] array 309 */ 310 public static final Atom[] getAtomArray(Chain c, String[] atomNames){ 311 312 List<Group> groups = c.getAtomGroups(); 313 314 List<Atom> atoms = new ArrayList<Atom>(); 315 316 for (Group g : groups){ 317 318 // a temp container for the atoms of this group 319 List<Atom> thisGroupAtoms = new ArrayList<Atom>(); 320 // flag to check if this group contains all the requested atoms. 321 boolean thisGroupAllAtoms = true; 322 for ( int i = 0 ; i < atomNames.length; i++){ 323 String atomName = atomNames[i]; 324 try { 325 Atom a = g.getAtom(atomName); 326 thisGroupAtoms.add(a); 327 } catch (StructureException e){ 328 // this group does not have a required atom, skip it... 329 thisGroupAllAtoms = false; 330 break; 331 } 332 } 333 if ( thisGroupAllAtoms){ 334 // add the atoms of this group to the array. 335 Iterator<Atom> aIter = thisGroupAtoms.iterator(); 336 while(aIter.hasNext()){ 337 Atom a = (Atom) aIter.next(); 338 atoms.add(a); 339 } 340 } 341 342 } 343 return (Atom[]) atoms.toArray(new Atom[atoms.size()]); 344 345 } 346 347 /** Returns an Atom array of the CA atoms. 348 * @param c the structure object 349 * @return an Atom[] array 350 */ 351 public static final Atom[] getAtomCAArray(Chain c){ 352 String[] atomNames = {" CA " }; 353 return getAtomArray(c,atomNames); 354 } 355 356 /** Provides an equivalent copy of Atoms in a new array. Clones everything, starting with parent 357 * groups and chains. The chain will only contain groups that are part of the CA array. 358 * 359 * @param ca array of CA atoms 360 * @return Atom array 361 */ 362 public static final Atom[] cloneCAArray(Atom[] ca) throws StructureException{ 363 Atom[] newCA = new Atom[ca.length]; 364 365 List<Chain> model = new ArrayList<Chain>(); 366 int apos = -1; 367 for(Atom a: ca){ 368 apos++; 369 Group parentG = a.getGroup(); 370 Chain parentC = parentG.getChain(); 371 372 Chain newChain = null; 373 for ( Chain c : model){ 374 if ( c.getChainID().equals(parentC.getChainID())){ 375 newChain = c; 376 break; 377 } 378 } 379 if ( newChain == null){ 380 newChain = new ChainImpl(); 381 newChain.setChainID(parentC.getChainID()); 382 model.add(newChain); 383 } 384 385 Group parentN = (Group)parentG.clone(); 386 387 newCA[apos] = parentN.getAtom(" CA "); 388 newChain.addGroup(parentN); 389 } 390 return newCA; 391 } 392 393 /** Clone a set of CA Atoms, but returns the parent groups 394 * 395 * @param ca Atom array 396 * @return Group array 397 */ 398 public static Group[] cloneGroups(Atom[] ca) { 399 Group[] newGroup = new Group[ca.length]; 400 401 List<Chain> model = new ArrayList<Chain>(); 402 int apos = -1; 403 for(Atom a: ca){ 404 apos++; 405 Group parentG = a.getGroup(); 406 Chain parentC = parentG.getChain(); 407 408 Chain newChain = null; 409 for ( Chain c : model){ 410 if ( c.getChainID().equals(parentC.getChainID())){ 411 newChain = c; 412 break; 413 } 414 } 415 if ( newChain == null){ 416 newChain = new ChainImpl(); 417 newChain.setChainID(parentC.getChainID()); 418 model.add(newChain); 419 } 420 421 Group ng = (Group)parentG.clone(); 422 newGroup[apos] = ng; 423 newChain.addGroup(ng); 424 } 425 return newGroup; 426 } 427 428 /** Utility method for working with circular permutations. Creates a duplicated and cloned set of Calpha atoms from the input array. 429 * 430 * @param ca2 atom array 431 * @return cloned and duplicated set of input array 432 * @throws StructureException 433 */ 434 public static Atom[] duplicateCA2(Atom[] ca2) throws StructureException{ 435 // we don't want to rotate input atoms, do we? 436 Atom[] ca2clone = new Atom[ca2.length*2]; 437 438 int pos = 0; 439 440 Chain c = null; 441 String prevChainId = ""; 442 for (Atom a : ca2){ 443 Group g = (Group) a.getGroup().clone(); // works because each group has only a CA atom 444 445 if (c == null ) { 446 c = new ChainImpl(); 447 Chain orig= a.getGroup().getChain(); 448 c.setChainID(orig.getChainID()); 449 } else { 450 Chain orig= a.getGroup().getChain(); 451 if ( ! orig.getChainID().equals(prevChainId)){ 452 c = new ChainImpl(); 453 c.setChainID(orig.getChainID()); 454 } 455 } 456 457 c.addGroup(g); 458 ca2clone[pos] = g.getAtom(StructureTools.caAtomName); 459 460 pos++; 461 } 462 463 // Duplicate ca2! 464 c = null; 465 prevChainId = ""; 466 for (Atom a : ca2){ 467 Group g = (Group)a.getGroup().clone(); 468 469 if (c == null ) { 470 c = new ChainImpl(); 471 Chain orig= a.getGroup().getChain(); 472 c.setChainID(orig.getChainID()); 473 } else { 474 Chain orig= a.getGroup().getChain(); 475 if ( ! orig.getChainID().equals(prevChainId)){ 476 c = new ChainImpl(); 477 c.setChainID(orig.getChainID()); 478 } 479 } 480 481 c.addGroup(g); 482 ca2clone[pos] = g.getAtom(StructureTools.caAtomName); 483 484 pos++; 485 } 486 487 return ca2clone; 488 489 } 490 491 492 493 /** Returns an Atom array of the CA atoms. 494 * @param s the structure object 495 * @return an Atom[] array 496 */ 497 public static Atom[] getAtomCAArray(Structure s){ 498 String[] atomNames = {" CA "}; 499 return getAtomArray(s,atomNames); 500 } 501 502 /** Returns an Atom array of the MainChain atoms. 503 504 * @param s the structure object 505 * @return an Atom[] array 506 */ 507 public static Atom[] getBackboneAtomArray(Structure s){ 508 String[] atomNames = backboneAtomNames; 509 return getAtomArray(s,atomNames); 510 } 511 512 513 /** convert three character amino acid codes into single character 514 * e.g. convert CYS to C 515 * @return a character 516 * @param code3 a three character amino acid representation String 517 * @throws IllegalSymbolException 518 */ 519 520 public static final Character convert_3code_1code(String code3) 521 throws UnknownPdbAminoAcidException { 522 // { 523 // Symbol sym = threeLetter.parseToken(code3) ; 524 // String code1 = oneLetter.tokenizeSymbol(sym); 525 // 526 // return new Character(code1.charAt(0)) ; 527 Character code1 = null; 528 code1 = aminoAcids.get(code3); 529 530 if (code1 == null) { 531 throw new UnknownPdbAminoAcidException(code3 + " not a standard amino acid"); 532 } else { 533 return code1; 534 } 535 536 } 537 538 /** convert a three letter code into single character. 539 * catches for unusual characters 540 * 541 * @param groupCode3 three letter representation 542 * @return null if group is a nucleotide code 543 */ 544 public static final Character get1LetterCode(String groupCode3){ 545 546 Character aminoCode1 = null; 547 try { 548 // is it a standard amino acid ? 549 aminoCode1 = convert_3code_1code(groupCode3); 550 } catch (UnknownPdbAminoAcidException e){ 551 // hm groupCode3 is not standard 552 // perhaps it is an nucleotide? 553 if ( isNucleotide(groupCode3) ) { 554 //System.out.println("nucleotide, aminoCode1:"+aminoCode1); 555 aminoCode1= null; 556 } else { 557 // does not seem to be so let's assume it is 558 // nonstandard aminoacid and label it "X" 559 //logger.warning("unknown group name "+groupCode3 ); 560 aminoCode1 = UNKNOWN_GROUP_LABEL; 561 } 562 } 563 564 return aminoCode1; 565 566 } 567 568 569 /* Test if the threelettercode of an ATOM entry corresponds to a 570 * nucleotide or to an aminoacid. 571 * @param a 3-character code for a group. 572 * 573 */ 574 public static final boolean isNucleotide(String groupCode3){ 575 576 String code = groupCode3.trim(); 577 if ( nucleotides30.containsKey(code)){ 578 return true; 579 } 580 581 if ( nucleotides23.containsKey(code)){ 582 return true; 583 } 584 585 return false ; 586 } 587 588 /** Reduce a structure to provide a smaller representation . Only takes the first model of the structure. If chainId is provided only return a structure containing that Chain ID. 589 * Converts lower case chain IDs to upper case if structure does not contain a chain with that ID. 590 * 591 * @param s 592 * @param chainId 593 * @return Structure 594 * @since 3.0 595 */ 596 @SuppressWarnings("deprecation") 597 public static final Structure getReducedStructure(Structure s, String chainId) throws StructureException{ 598 // since we deal here with structure alignments, 599 // only use Model 1... 600 601 Structure newS = new StructureImpl(); 602 newS.setHeader(s.getHeader()); 603 newS.setPDBCode(s.getPDBCode()); 604 newS.setPDBHeader(s.getPDBHeader()); 605 newS.setName(s.getName()); 606 newS.setSSBonds(s.getSSBonds()); 607 newS.setDBRefs(s.getDBRefs()); 608 newS.setSites(s.getSites()); 609 newS.setNmr(s.isNmr()); 610 newS.setBiologicalAssembly(s.isBiologicalAssembly()); 611 newS.setCompounds(s.getCompounds()); 612 newS.setConnections(s.getConnections()); 613 newS.setSSBonds(s.getSSBonds()); 614 newS.setSites(s.getSites()); 615 616 if ( chainId != null) 617 chainId = chainId.trim(); 618 619 if ( chainId == null || chainId.equals("")){ 620 // only get model 0 621 List<Chain> model0 = s.getModel(0); 622 for (Chain c : model0){ 623 newS.addChain(c); 624 } 625 return newS; 626 627 } 628 629 Chain c = null; 630 try { 631 c = s.getChainByPDB(chainId); 632 } catch (StructureException e){ 633 System.err.println(e.getMessage() + " trying upper case Chain id..."); 634 c = s.getChainByPDB(chainId.toUpperCase()); 635 636 637 } 638 if ( c != null) { 639 newS.addChain(c); 640 for ( Compound comp : s.getCompounds()){ 641 if ( comp.getChainId().contains(c.getChainID())){ 642 // found matching compound. set description... 643 newS.getPDBHeader().setDescription("Chain " + c.getChainID() + " of " + s.getPDBCode() + " " + comp.getMolName()); 644 } 645 } 646 } 647 648 649 return newS; 650 } 651 652 653 /** Reduce a structure to provide a smaller representation. 654 * Only takes the first model of the structure. If chainNr >=0 only takes 655 * the chain at that position into account. 656 * 657 * @param s 658 * @param chainNr can be -1 to request all chains of model 0, otherwise will only add chain at this position 659 * @return Structure object 660 * @since 3.0 661 */ 662 @SuppressWarnings("deprecation") 663 public static final Structure getReducedStructure(Structure s, int chainNr) throws StructureException{ 664 // since we deal here with structure alignments, 665 // only use Model 1... 666 667 Structure newS = new StructureImpl(); 668 newS.setHeader(s.getHeader()); 669 newS.setPDBCode(s.getPDBCode()); 670 newS.setPDBHeader(s.getPDBHeader()); 671 newS.setName(s.getName()); 672 newS.setSSBonds(s.getSSBonds()); 673 newS.setDBRefs(s.getDBRefs()); 674 newS.setSites(s.getSites()); 675 newS.setNmr(s.isNmr()); 676 newS.setBiologicalAssembly(s.isBiologicalAssembly()); 677 newS.setCompounds(s.getCompounds()); 678 newS.setConnections(s.getConnections()); 679 newS.setSSBonds(s.getSSBonds()); 680 newS.setSites(s.getSites()); 681 newS.getPDBHeader().setDescription("subset of " + s.getPDBCode() + " " + s.getPDBHeader().getDescription() ); 682 683 if ( chainNr < 0 ) { 684 685 // only get model 0 686 List<Chain> model0 = s.getModel(0); 687 for (Chain c : model0){ 688 newS.addChain(c); 689 } 690 return newS; 691 } 692 Chain c = null; 693 694 c = s.getChain(0, chainNr); 695 696 newS.addChain(c); 697 698 return newS; 699 } 700 701 702 703 /** In addition to the functionality provided by getReducedStructure also provides a way to specify sub-regions of a structure with the following 704 * specification: 705 * 706 * 707 * ranges can be surrounded by ( and ). (but will be removed). 708 * ranges are specified as 709 * PDBresnum1 : PDBresnum2 710 * 711 * a list of ranges is separated by , 712 * 713 * Example 714 * 4GCR (A:1-83) 715 * 1CDG (A:407-495,A:582-686) 716 * 1CDG (A_407-495,A_582-686) 717 * 718 * @param s The full structure 719 * @param ranges A comma-seperated list of ranges, optionally surrounded by parentheses 720 * @return Substructure of s specified by ranges 721 */ 722 723 @SuppressWarnings("deprecation") 724 public static final Structure getSubRanges(Structure s, String ranges ) 725 throws StructureException 726 { 727 Structure struc = getReducedStructure(s, null); 728 729 if ( ranges == null || ranges.equals("")) 730 throw new IllegalArgumentException("ranges can't be null or empty"); 731 732 ranges = ranges.trim(); 733 734 if ( ranges.startsWith("(")) 735 ranges = ranges.substring(1); 736 if ( ranges.endsWith(")")) { 737 ranges = ranges.substring(0,ranges.length()-1); 738 } 739 740 //special case: '-' means 'everything' 741 if ( ranges.equals("-") ) { 742 return s; 743 } 744 745 Structure newS = new StructureImpl(); 746 747 newS.setHeader(s.getHeader()); 748 newS.setPDBCode(s.getPDBCode()); 749 newS.setPDBHeader(s.getPDBHeader()); 750 newS.setName(s.getName()); 751 newS.setDBRefs(s.getDBRefs()); 752 newS.setNmr(s.isNmr()); 753 newS.setBiologicalAssembly(s.isBiologicalAssembly()); 754 newS.getPDBHeader().setDescription("sub-range " + ranges + " of " + newS.getPDBCode() + " " + s.getPDBHeader().getDescription()); 755 756 // TODO The following should be only copied for atoms which are present in the range. 757 //newS.setCompounds(s.getCompounds()); 758 //newS.setConnections(s.getConnections()); 759 //newS.setSSBonds(s.getSSBonds()); 760 //newS.setSites(s.getSites()); 761 762 String[] rangS =ranges.split(","); 763 764 StringWriter name = new StringWriter(); 765 name.append(s.getName()); 766 boolean firstRange = true; 767 String prevChainId = null; 768 769 // parse the ranges, adding the specified residues to newS 770 for ( String r: rangS){ 771 //System.out.println(">"+r+"<"); 772 // Match a single range, eg "A_4-27" 773 774 Matcher matcher = pdbNumRangeRegex.matcher(r); 775 if( ! matcher.matches() ){ 776 throw new StructureException("wrong range specification, should be provided as chainID_pdbResnum1-pdbRensum2: "+ranges); 777 } 778 String chainId = matcher.group(1); 779 Chain chain; 780 781 if(chainId.equals("_") && struc.size() == 1) { 782 // Handle special case of "_" chain for single-chain proteins 783 chain = struc.getChain(0); 784 } else { 785 // Explicit chain 786 chain = struc.getChainByPDB(chainId); 787 } 788 789 Group[] groups; 790 791 String pdbresnumStart = matcher.group(2); 792 String pdbresnumEnd = matcher.group(3); 793 794 if ( ! firstRange){ 795 name.append( ","); 796 } else { 797 name.append(AtomCache.CHAIN_SPLIT_SYMBOL); 798 } 799 if( pdbresnumStart != null && pdbresnumEnd != null) { 800 // not a full chain 801 //since Java doesn't allow '+' before integers, fix this up. 802 if(pdbresnumStart.charAt(0) == '+') 803 pdbresnumStart = pdbresnumStart.substring(1); 804 if(pdbresnumEnd.charAt(0) == '+') 805 pdbresnumEnd = pdbresnumEnd.substring(1); 806 groups = chain.getGroupsByPDB(pdbresnumStart, pdbresnumEnd); 807 808 name.append( chainId + AtomCache.UNDERSCORE + pdbresnumStart+"-" + pdbresnumEnd); 809 810 } else { 811 // full chain 812 groups = chain.getAtomGroups().toArray(new Group[chain.getAtomGroups().size()]); 813 name.append(chainId); 814 } 815 firstRange = true; 816 817 // Create new chain, if needed 818 Chain c = null; 819 if ( prevChainId == null) { 820 // first chain... 821 c = new ChainImpl(); 822 c.setChainID(chain.getChainID()); 823 newS.addChain(c); 824 } else if ( prevChainId.equals(chain.getChainID())) { 825 c = newS.getChainByPDB(prevChainId); 826 827 } else { 828 try { 829 c = newS.getChainByPDB(chain.getChainID()); 830 } catch (StructureException e){ 831 // chain not in structure yet... 832 c = new ChainImpl(); 833 c.setChainID(chain.getChainID()); 834 newS.addChain(c); 835 } 836 } 837 838 // add the groups to the chain: 839 for ( Group g: groups) { 840 c.addGroup(g); 841 } 842 843 prevChainId = c.getChainID(); 844 } 845 846 newS.setName(name.toString()); 847 848 return newS; 849 } 850 851 public static final String convertAtomsToSeq(Atom[] atoms) { 852 853 StringBuffer buf = new StringBuffer(); 854 Group prevGroup = null; 855 for (Atom a : atoms){ 856 Group g = a.getGroup(); 857 if ( prevGroup != null) { 858 if ( prevGroup.equals(g)) { 859 // we add each group only once. 860 continue; 861 } 862 } 863 String code3 = g.getPDBName(); 864 try { 865 buf.append(convert_3code_1code(code3) ); 866 } catch (UnknownPdbAminoAcidException e){ 867 buf.append('X'); 868 } 869 prevGroup = g; 870 871 } 872 return buf.toString(); 873 } 874 875 /** get a PDB residue number object for this group 876 * 877 * @param g Group object 878 * @return a ResidueNumber object 879 * @deprecated replaced by Group.getResidueNumber() 880 */ 881 public static final ResidueNumber getPDBResidueNumber(Group g){ 882 883 return g.getResidueNumber(); 884 885 } 886 887 /** Get a group represented by a ResidueNumber. 888 * 889 * @param struc a {@link Structure} 890 * @param pdbResNum a {@link ResidueNumber} 891 * @return a group in the structure that is represented by the pdbResNum. 892 * @throws StructureException if the group cannot be found. 893 */ 894 public static final Group getGroupByPDBResidueNumber(Structure struc, 895 ResidueNumber pdbResNum) throws StructureException { 896 if (struc == null || pdbResNum==null) { 897 throw new IllegalArgumentException("Null argument(s)."); 898 } 899 900 Chain chain = struc.findChain(pdbResNum.getChainId()); 901 902// String numIns = "" + pdbResNum.getSeqNum(); 903// if (pdbResNum.getInsCode() != null) { 904// numIns += pdbResNum.getInsCode(); 905// } 906 907 908 return chain.getGroupByPDB(pdbResNum); 909 } 910 911 /* 912 * Returns a List of Groups in a structure within the distance specified of a given group. 913 */ 914 public static List<Group> getGroupsWithinShell(Structure structure, Group group, double distance, boolean includeWater) { 915 Set<Group> returnSet = new LinkedHashSet<Group>(); 916 917 //square the distance to use as a comparison against getDistanceFast which returns the square of a distance. 918 distance = distance * distance; 919 920 for (Atom atomA : group.getAtoms()) { 921 for (Chain chain : structure.getChains()) { 922 for (Group chainGroup : chain.getAtomGroups()) { 923// System.out.println("Checking group: " + chainGroup); 924 if (chainGroup.getResidueNumber().equals(group.getResidueNumber())) { 925 continue; 926 } 927 else if (!includeWater && chainGroup.getPDBName().equals("HOH")) { 928 continue; 929 } 930 else { 931 for (Atom atomB : chainGroup.getAtoms()) { 932 try { 933 //use getDistanceFast as we are doing a lot of comparisons 934 double dist = Calc.getDistanceFast(atomA, atomB); 935 if (dist <= distance) { 936 returnSet.add(chainGroup); 937// System.out.println(String.format("%s within %s of %s", atomB, dist, atomA)); 938 break; 939 } 940 941 } catch (StructureException ex) { 942 Logger.getLogger(StructureTools.class.getName()).log(Level.SEVERE, null, ex); 943 } 944 945 } 946 } 947 } 948 } 949 } 950 List<Group> returnList = new ArrayList<Group>(); 951 returnList.addAll(returnSet); 952 return returnList; 953 } 954 955 /* 956 * Very simple distance-based bond calculator. Will give approximations, 957 * but do not rely on this to be chemically correct. 958 */ 959 public static List<Bond> findBonds(Group group, List<Group> groups) { 960 List<Bond> bondList = new ArrayList<Bond>(); 961 for (Atom atomA : group.getAtoms()) { 962 for (Group groupB : groups) { 963 if (groupB.getType().equals(GroupType.HETATM)) { 964 continue; 965 } 966 for (Atom atomB : groupB.getAtoms()) { 967 try { 968 double dist = Calc.getDistance(atomA, atomB); 969 BondType bondType = BondType.UNDEFINED; 970 if (dist <= 2) { 971 bondType = BondType.COVALENT; 972 Bond bond = new Bond(dist, bondType, group, atomA, groupB, atomB); 973 bondList.add(bond); 974// System.out.println(String.format("%s within %s of %s", atomB, dist, atomA)); 975 } 976 else if (dist <= 3.25) { 977 978 if (isHbondDonorAcceptor(atomA) && isHbondDonorAcceptor(atomB)) { 979 bondType = BondType.HBOND; 980 } 981 else if (atomA.getElement().isMetal() && isHbondDonorAcceptor(atomB)) { 982 bondType = BondType.METAL; 983 } 984 else if (atomA.getElement().equals(Element.C) && atomB.getElement().equals(Element.C)) { 985 bondType = BondType.HYDROPHOBIC; 986 } 987 //not really interested in 'undefined' types 988 if (bondType != BondType.UNDEFINED) { 989 Bond bond = new Bond(dist, bondType, group, atomA, groupB, atomB); 990 bondList.add(bond); 991 } 992// System.out.println(String.format("%s within %s of %s", atomB, dist, atomA)); 993 } else if (dist <= 3.9) { 994 if (atomA.getElement().equals(Element.C) && atomB.getElement().equals(Element.C)) { 995 bondType = BondType.HYDROPHOBIC; 996 } 997 //not really interested in 'undefined' types 998 if (bondType != BondType.UNDEFINED) { 999 Bond bond = new Bond(dist, bondType, group, atomA, groupB, atomB); 1000 bondList.add(bond); 1001 } 1002 } 1003 1004 } catch (StructureException ex) { 1005 Logger.getLogger(StructureTools.class.getName()).log(Level.SEVERE, null, ex); 1006 } 1007 1008 } 1009 } 1010 } 1011 1012 1013 return bondList; 1014 } 1015 1016 private static boolean isHbondDonorAcceptor(Atom atom) { 1017 if (hBondDonorAcceptors.contains(atom.getElement())) { 1018 return true; 1019 } 1020 return false; 1021 } 1022}