001/*
002 *                  BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on Jan 4, 2006
021 *
022 */
023package org.biojava.bio.structure;
024
025import java.io.StringWriter;
026import java.util.ArrayList;
027import java.util.HashMap;
028import java.util.HashSet;
029import java.util.Iterator;
030import java.util.LinkedHashSet;
031import java.util.List;
032import java.util.Map;
033import java.util.Set;
034
035import java.util.logging.Level;
036import java.util.logging.Logger;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039
040import org.biojava.bio.structure.align.util.AtomCache;
041
042
043/**
044 * A class that provides some tool methods.
045 *
046 * @author Andreas Prlic, Jules Jacobsen
047 * @since 1.0
048 * @version %I% %G%
049 */
050public class StructureTools {
051
052        /** The Atom name of C-alpha atoms.
053         *
054         */
055        public static final String caAtomName = " CA ";
056
057        public static final String nAtomName = "N";
058
059        public static final String oAtomName = "O";
060
061        public static final String cbAtomName = "CB";
062
063
064        /** The names of the Atoms that form the backbone.
065         *
066         */
067        public static final String[] backboneAtomNames = {nAtomName,caAtomName,"C",oAtomName, cbAtomName};
068
069        public static final Character UNKNOWN_GROUP_LABEL = new Character('x');;
070
071
072        //private static final String insertionCodeRegExp = "([0-9]+)([a-zA-Z]*)";
073        //private static final Pattern insertionCodePattern = Pattern.compile(insertionCodeRegExp);
074
075
076        // there is a file format change in PDB 3.0 and nucleotides are being renamed
077        static private Map<String, Integer> nucleotides30 ;
078        static private Map<String, Integer> nucleotides23 ;
079
080        //amino acid 3 and 1 letter code definitions
081        private static final Map<String, Character> aminoAcids;
082
083        private static final Set<Element> hBondDonorAcceptors;
084        //      // for conversion 3code 1code
085        //      private static  SymbolTokenization threeLetter ;
086        //      private static  SymbolTokenization oneLetter ;
087
088        public static Logger logger =  Logger.getLogger("org.biojava.bio.structure");
089
090        /**
091         * Pattern to describe subranges. Matches "A", "A:", "A:7-53","A_7-53", etc.
092         * @see #getSubRanges(Structure, String)
093         */
094        public static final Pattern pdbNumRangeRegex = Pattern.compile(
095                        "^\\s*(\\w)" + //chain ID
096                        "(?:" + //begin range, this is a "non-capturing group"
097                          "(?::|_|:$|_$|$)" + //colon or underscore, could be at the end of a line, another non-capt. group.
098                                "(?:"+ // another non capturing group for the residue range
099                                 "([-+]?[0-9]+[A-Za-z]?)" + // first residue
100                                 "\\s*-\\s*" + // -
101                                 "([-+]?[0-9]+[A-Za-z]?)" + // second residue
102                                ")?+"+
103                        ")?" + //end range
104                        "\\s*");
105        
106        
107        static {
108                nucleotides30 = new HashMap<String,Integer>();
109                nucleotides30.put("DA",1);
110                nucleotides30.put("DC",1);
111                nucleotides30.put("DG",1);
112                nucleotides30.put("DT",1);
113                nucleotides30.put("DI",1);
114                nucleotides30.put("A",1);
115                nucleotides30.put("G",1);
116                nucleotides30.put("C",1);
117                nucleotides30.put("U",1);
118                nucleotides30.put("I",1);
119
120                //TODO: check if they are always HETATMs, in that case this will not be necessary
121                // the DNA linkers - the +C , +G, +A  +T +U and +I have been replaced with these:
122                nucleotides30.put("TAF",1); // 2'-DEOXY-2'-FLUORO-ARABINO-FURANOSYL THYMINE-5'-PHOSPHATE
123                nucleotides30.put("TC1",1); // 3-(5-PHOSPHO-2-DEOXY-BETA-D-RIBOFURANOSYL)-2-OXO-1,3-DIAZA-PHENOTHIAZINE
124                nucleotides30.put("TFE",1); // 2'-O-[2-(TRIFLUORO)ETHYL] THYMIDINE-5'-MONOPHOSPHATE
125                nucleotides30.put("TFO",1); // [2-(6-AMINO-9H-PURIN-9-YL)-1-METHYLETHOXY]METHYLPHOSPHONIC ACID"
126                nucleotides30.put("TGP",1); // 5'-THIO-2'-DEOXY-GUANOSINE PHOSPHONIC ACID
127                nucleotides30.put("THX",1); // PHOSPHONIC ACID 6-({6-[6-(6-CARBAMOYL-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONYL)-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDOCENE-2-CARBONYL]-3,6,7,8-TETRAHYDRO-3,6-DIAZA-AS-INDACENE-2-CARBONL}-AMINO)-HEXYL ESTER 5-(5-METHYL-2,4-DIOXO-3,4-DIHYDRO-2H-PYRIMIDIN-1-YL)-TETRAHYDRO-FURAN-2-YLMETHYL ESTER
128                nucleotides30.put("TLC",1); // 2-O,3-ETHDIYL-ARABINOFURANOSYL-THYMINE-5'-MONOPHOSPHATE
129                nucleotides30.put("TLN",1); //  [(1R,3R,4R,7S)-7-HYDROXY-3-(THYMIN-1-YL)-2,5-DIOXABICYCLO[2.2.1]HEPT-1-YL]METHYL DIHYDROGEN PHOSPHATE"
130                nucleotides30.put("TP1",1); // 2-(METHYLAMINO)-ETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE
131                nucleotides30.put("TPC",1); // 5'-THIO-2'-DEOXY-CYTOSINE PHOSPHONIC ACID
132                nucleotides30.put("TPN",1); // 2-AMINOETHYLGLYCINE-CARBONYLMETHYLENE-THYMINE
133
134
135
136                // store nucleic acids (C, G, A, T, U, and I), and
137                // the modified versions of nucleic acids (+C, +G, +A, +T, +U, and +I), and
138                nucleotides23  = new HashMap<String,Integer>();
139                String[] names = {"C","G","A","T","U","I","+C","+G","+A","+T","+U","+I"};
140                for (int i = 0; i < names.length; i++) {
141                        String n = names[i];
142                        nucleotides23.put(n,1);
143                }
144
145                aminoAcids = new HashMap<String, Character>();
146                aminoAcids.put("GLY", new Character('G'));
147                aminoAcids.put("ALA", new Character('A'));
148                aminoAcids.put("VAL", new Character('V'));
149                aminoAcids.put("LEU", new Character('L'));
150                aminoAcids.put("ILE", new Character('I'));
151                aminoAcids.put("PHE", new Character('F'));
152                aminoAcids.put("TYR", new Character('Y'));
153                aminoAcids.put("TRP", new Character('W'));
154                aminoAcids.put("PRO", new Character('P'));
155                aminoAcids.put("HIS", new Character('H'));
156                aminoAcids.put("LYS", new Character('K'));
157                aminoAcids.put("ARG", new Character('R'));
158                aminoAcids.put("SER", new Character('S'));
159                aminoAcids.put("THR", new Character('T'));
160                aminoAcids.put("GLU", new Character('E'));
161                aminoAcids.put("GLN", new Character('Q'));
162                aminoAcids.put("ASP", new Character('D'));
163                aminoAcids.put("ASN", new Character('N'));
164                aminoAcids.put("CYS", new Character('C'));
165                aminoAcids.put("MET", new Character('M'));
166                //MSE is only found as a molecular replacement for MET
167                aminoAcids.put("MSE", new Character('M'));
168                //'non-standard', genetically encoded
169                //http://www.chem.qmul.ac.uk/iubmb/newsletter/1999/item3.html
170                //IUBMB recommended name is 'SEC' but the wwPDB currently use 'CSE'
171                //likewise 'PYL' (IUBMB) and 'PYH' (PDB)
172                aminoAcids.put("CSE", new Character('U'));
173                aminoAcids.put("SEC", new Character('U'));
174                aminoAcids.put("PYH", new Character('O'));
175                aminoAcids.put("PYL", new Character('O'));
176
177                hBondDonorAcceptors = new HashSet<Element>();
178                hBondDonorAcceptors.add(Element.N);
179                hBondDonorAcceptors.add(Element.O);
180                hBondDonorAcceptors.add(Element.S);
181
182        }
183
184
185        /** Count how many number of Atoms are contained within a Structure object.
186         *
187         * @param s the structure object
188         * @return the number of Atoms in this Structure
189         */
190        public static final int getNrAtoms(Structure s){
191
192                int nrAtoms = 0;
193
194                Iterator<Group> iter = new GroupIterator(s);
195
196                while ( iter.hasNext()){
197                        Group g = (Group) iter.next();
198                        nrAtoms += g.size();
199                }
200
201                return nrAtoms;
202        }
203
204
205        /** Count how many groups are contained within a structure object.
206         *
207         * @param s the structure object
208         * @return the number of groups in the structure
209         */
210        public static final int getNrGroups(Structure s){
211                int nrGroups = 0;
212
213                List<Chain> chains = s.getChains(0);
214                Iterator<Chain> iter = chains.iterator();
215                while (iter.hasNext()){
216                        Chain c = (Chain) iter.next();
217                        nrGroups += c.getAtomLength();
218                }
219                return nrGroups;
220        }
221
222
223        /** Returns an array of the requested Atoms from the Structure object. Iterates over all groups
224         * and checks if the requested atoms are in this group, no matter if this is a  {@link AminoAcid} or {@link HetatomImpl} group.
225         * For structures with more than one model, only model 0 will be used.
226         *
227         * @param s the structure to get the atoms from
228         *
229         * @param atomNames  contains the atom names to be used.
230         * @return an Atom[] array
231         */
232        public static final Atom[] getAtomArray(Structure s, String[] atomNames){
233                List<Chain> chains = s.getModel(0);
234
235                List<Atom> atoms = new ArrayList<Atom>();
236
237                extractCAatoms(atomNames, chains, atoms);
238                return (Atom[]) atoms.toArray(new Atom[atoms.size()]);
239
240        }
241        
242        /** Returns an array of the requested Atoms from the Structure object. 
243         * In contrast to {@link #getAtomArray(Structure, String[])} this method iterates over all chains.
244         * Iterates over all chains and groups
245         * and checks if the requested atoms are in this group, no matter if this is a {@link AminoAcid} or {@link HetatomImpl} group.
246         * For structures with more than one model, only model 0 will be used.
247         *
248         * @param s the structure to get the atoms from
249         *
250         * @param atomNames  contains the atom names to be used.
251         * @return an Atom[] array
252         */
253        public static final Atom[] getAtomArrayAllModels(Structure s, String[] atomNames){
254                
255                List<Atom> atoms = new ArrayList<Atom>();
256
257                for (int i =0 ; i < s.nrModels(); i++ ) {
258                        List<Chain> chains = s.getModel(i);
259                        extractCAatoms(atomNames, chains, atoms);
260                }
261                return (Atom[]) atoms.toArray(new Atom[atoms.size()]);
262
263        }
264
265
266
267        private static void extractCAatoms(String[] atomNames, List<Chain> chains,
268                        List<Atom> atoms) {
269                for ( Chain c : chains) {
270                        
271                        for ( Group g : c.getAtomGroups()) {
272
273                                // a temp container for the atoms of this group
274                                List<Atom> thisGroupAtoms = new ArrayList<Atom>();
275                                // flag to check if this group contains all the requested atoms.
276                                boolean thisGroupAllAtoms = true;
277                                for ( int i = 0 ; i < atomNames.length; i++){
278                                        String atomName = atomNames[i];
279                                        try {
280                                                Atom a = g.getAtom(atomName);
281                                                thisGroupAtoms.add(a);
282                                        } catch (StructureException e){
283                                                // this group does not have a required atom, skip it...
284                                                thisGroupAllAtoms = false;
285                                                break;
286                                        }
287                                }
288                                if ( thisGroupAllAtoms){
289                                        // add the atoms of this group to the array.
290                                        Iterator<Atom> aIter = thisGroupAtoms.iterator();
291                                        while(aIter.hasNext()){
292                                                Atom a = (Atom) aIter.next();
293                                                atoms.add(a);
294                                        }
295                                }
296
297                        }
298                }
299        }
300
301        /** Returns an array of the requested Atoms from the Structure object. Iterates over all groups
302         * and checks if the requested atoms are in this group, no matter if this is a AminoAcid or Hetatom group.
303         *
304         *
305         * @param c the Chain to get the atoms from
306         *
307         * @param atomNames  contains the atom names to be used.
308         * @return an Atom[] array
309         */
310        public static final Atom[] getAtomArray(Chain c, String[] atomNames){
311
312                List<Group> groups = c.getAtomGroups();
313
314                List<Atom> atoms = new ArrayList<Atom>();
315
316                for (Group g : groups){
317
318                        // a temp container for the atoms of this group
319                        List<Atom> thisGroupAtoms = new ArrayList<Atom>();
320                        // flag to check if this group contains all the requested atoms.
321                        boolean thisGroupAllAtoms = true;
322                        for ( int i = 0 ; i < atomNames.length; i++){
323                                String atomName = atomNames[i];
324                                try {
325                                        Atom a = g.getAtom(atomName);
326                                        thisGroupAtoms.add(a);
327                                } catch (StructureException e){
328                                        // this group does not have a required atom, skip it...
329                                        thisGroupAllAtoms = false;
330                                        break;
331                                }
332                        }
333                        if ( thisGroupAllAtoms){
334                                // add the atoms of this group to the array.
335                                Iterator<Atom> aIter = thisGroupAtoms.iterator();
336                                while(aIter.hasNext()){
337                                        Atom a = (Atom) aIter.next();
338                                        atoms.add(a);
339                                }
340                        }
341
342                }
343                return (Atom[]) atoms.toArray(new Atom[atoms.size()]);
344
345        }
346
347        /** Returns an Atom array of the CA atoms.
348         * @param c the structure object
349         * @return an Atom[] array
350         */
351        public static final Atom[] getAtomCAArray(Chain c){
352                String[] atomNames = {" CA " };
353                return getAtomArray(c,atomNames);
354        }
355
356        /** Provides an equivalent copy of Atoms in a new array. Clones everything, starting with parent 
357         * groups and chains. The chain will only contain groups that are part of the CA array.
358         * 
359         * @param ca array of CA atoms
360         * @return Atom array
361         */
362        public static final Atom[] cloneCAArray(Atom[] ca) throws StructureException{
363                Atom[] newCA = new Atom[ca.length];
364
365                List<Chain> model = new ArrayList<Chain>();
366                int apos = -1;
367                for(Atom a: ca){
368                        apos++;
369                        Group parentG = a.getGroup();
370                        Chain parentC = parentG.getChain();
371
372                        Chain newChain = null;
373                        for ( Chain c : model){
374                                if ( c.getChainID().equals(parentC.getChainID())){
375                                        newChain = c;
376                                        break;
377                                }
378                        }
379                        if ( newChain == null){
380                                newChain = new ChainImpl();
381                                newChain.setChainID(parentC.getChainID());
382                                model.add(newChain);
383                        }
384
385                        Group parentN = (Group)parentG.clone();
386
387                        newCA[apos] = parentN.getAtom(" CA ");
388                        newChain.addGroup(parentN);
389                }
390                return newCA;
391        }
392
393        /** Clone a set of CA Atoms, but returns the parent groups
394         *  
395         * @param ca Atom array
396         * @return Group array
397         */
398        public static Group[] cloneGroups(Atom[] ca) {
399                Group[] newGroup = new Group[ca.length]; 
400
401                List<Chain> model = new ArrayList<Chain>();
402                int apos = -1;
403                for(Atom a: ca){
404                        apos++;
405                        Group parentG = a.getGroup();
406                        Chain parentC = parentG.getChain();
407
408                        Chain newChain = null;
409                        for ( Chain c : model){
410                                if ( c.getChainID().equals(parentC.getChainID())){
411                                        newChain = c;
412                                        break;
413                                }
414                        }
415                        if ( newChain == null){
416                                newChain = new ChainImpl();
417                                newChain.setChainID(parentC.getChainID());
418                                model.add(newChain);
419                        }
420
421                        Group ng = (Group)parentG.clone();
422                        newGroup[apos] = ng;
423                        newChain.addGroup(ng);
424                }
425                return newGroup;
426        }
427        
428        /** Utility method for working with circular permutations. Creates a duplicated and cloned set of Calpha atoms from the input array.
429         * 
430         * @param ca2 atom array
431         * @return cloned and duplicated set of input array
432         * @throws StructureException
433         */
434        public static Atom[] duplicateCA2(Atom[] ca2) throws StructureException{
435                // we don't want to rotate input atoms, do we?
436                Atom[] ca2clone = new Atom[ca2.length*2];
437
438                int pos = 0;
439
440                Chain c = null;
441                String prevChainId = "";
442                for (Atom a : ca2){                     
443                        Group g = (Group) a.getGroup().clone(); // works because each group has only a CA atom
444                        
445                        if (c == null ) {
446                                c = new ChainImpl();
447                                Chain orig= a.getGroup().getChain();
448                                c.setChainID(orig.getChainID());
449                        } else {
450                                Chain orig= a.getGroup().getChain();
451                                if ( ! orig.getChainID().equals(prevChainId)){
452                                        c = new ChainImpl();
453                                        c.setChainID(orig.getChainID());
454                                }
455                        }
456                        
457                        c.addGroup(g);
458                        ca2clone[pos] = g.getAtom(StructureTools.caAtomName);
459
460                        pos++;
461                }
462
463                // Duplicate ca2!
464                c = null;
465                prevChainId = "";
466                for (Atom a : ca2){
467                        Group g = (Group)a.getGroup().clone();
468                        
469                        if (c == null ) {
470                                c = new ChainImpl();
471                                Chain orig= a.getGroup().getChain();
472                                c.setChainID(orig.getChainID());
473                        } else {
474                                Chain orig= a.getGroup().getChain();
475                                if ( ! orig.getChainID().equals(prevChainId)){
476                                        c = new ChainImpl();
477                                        c.setChainID(orig.getChainID());
478                                }
479                        }
480                        
481                        c.addGroup(g);
482                        ca2clone[pos] = g.getAtom(StructureTools.caAtomName);
483
484                        pos++;
485                }
486
487                return ca2clone;
488
489        }
490
491        
492
493        /** Returns an Atom array of the CA atoms.
494         * @param s the structure object
495         * @return an Atom[] array
496         */
497        public static Atom[] getAtomCAArray(Structure s){
498                String[] atomNames = {" CA "};
499                return getAtomArray(s,atomNames);
500        }
501
502        /** Returns an Atom array of the MainChain atoms.
503
504         * @param s the structure object
505         * @return an Atom[] array
506         */
507        public static Atom[] getBackboneAtomArray(Structure s){
508                String[] atomNames = backboneAtomNames;
509                return getAtomArray(s,atomNames);
510        }
511
512
513        /** convert three character amino acid codes into single character
514         *  e.g. convert CYS to C
515         *  @return a character
516         *  @param code3 a three character amino acid representation String
517         *  @throws IllegalSymbolException
518         */
519
520        public static final Character convert_3code_1code(String code3)
521        throws UnknownPdbAminoAcidException {
522                //      {
523                //              Symbol sym   =  threeLetter.parseToken(code3) ;
524                //              String code1 =  oneLetter.tokenizeSymbol(sym);
525                //
526                //              return new Character(code1.charAt(0)) ;
527                Character code1 = null;
528                code1 = aminoAcids.get(code3);
529
530                if (code1 == null) {
531                        throw new UnknownPdbAminoAcidException(code3 + " not a standard amino acid");
532                } else {
533                        return code1;
534                }
535
536        }
537
538        /** convert a three letter code into single character.
539         * catches for unusual characters
540         *
541         * @param groupCode3 three letter representation
542         * @return null if group is a nucleotide code
543         */
544        public static final Character get1LetterCode(String groupCode3){
545
546                Character aminoCode1 = null;
547                try {
548                        // is it a standard amino acid ?
549                        aminoCode1 = convert_3code_1code(groupCode3);
550                } catch (UnknownPdbAminoAcidException e){
551                        // hm groupCode3 is not standard
552                        // perhaps it is an nucleotide?
553                        if ( isNucleotide(groupCode3) ) {
554                                //System.out.println("nucleotide, aminoCode1:"+aminoCode1);
555                                aminoCode1= null;
556                        } else {
557                                // does not seem to be so let's assume it is
558                                //  nonstandard aminoacid and label it "X"
559                                //logger.warning("unknown group name "+groupCode3 );
560                                aminoCode1 = UNKNOWN_GROUP_LABEL;
561                        }
562                }
563
564                return aminoCode1;
565
566        }
567
568
569        /* Test if the threelettercode of an ATOM entry corresponds to a
570         * nucleotide or to an aminoacid.
571         * @param a 3-character code for a group.
572         *
573         */
574        public static final boolean isNucleotide(String groupCode3){
575
576                String code = groupCode3.trim();
577                if ( nucleotides30.containsKey(code)){
578                        return true;
579                }
580
581                if ( nucleotides23.containsKey(code)){
582                        return true;
583                }
584
585                return false ;
586        }
587
588        /** Reduce a structure to provide a smaller representation . Only takes the first model of the structure. If chainId is provided only return a structure containing that Chain ID. 
589         * Converts lower case chain IDs to upper case if structure does not contain a chain with that ID. 
590         * 
591         * @param s
592         * @param chainId
593         * @return Structure
594         * @since 3.0
595         */
596        @SuppressWarnings("deprecation")
597        public static final Structure getReducedStructure(Structure s, String chainId) throws StructureException{
598                // since we deal here with structure alignments,
599                // only use Model 1...
600
601                Structure newS = new StructureImpl();
602                newS.setHeader(s.getHeader());
603                newS.setPDBCode(s.getPDBCode());
604                newS.setPDBHeader(s.getPDBHeader());
605                newS.setName(s.getName());
606                newS.setSSBonds(s.getSSBonds());
607                newS.setDBRefs(s.getDBRefs());
608                newS.setSites(s.getSites());
609                newS.setNmr(s.isNmr());
610                newS.setBiologicalAssembly(s.isBiologicalAssembly());
611                newS.setCompounds(s.getCompounds());
612                newS.setConnections(s.getConnections());
613                newS.setSSBonds(s.getSSBonds());
614                newS.setSites(s.getSites());
615                
616                if ( chainId != null)
617                        chainId = chainId.trim();
618
619                if ( chainId == null || chainId.equals("")){
620                        // only get model 0
621                        List<Chain> model0 = s.getModel(0);
622                        for (Chain c : model0){
623                                newS.addChain(c);
624                        }
625                        return newS;
626
627                }
628
629                Chain c =  null;
630                try {
631                        c = s.getChainByPDB(chainId);
632                } catch (StructureException e){
633                        System.err.println(e.getMessage() + " trying upper case Chain id...");
634                        c = s.getChainByPDB(chainId.toUpperCase());
635                        
636                        
637                }
638                if ( c != null) {
639                        newS.addChain(c);
640                        for ( Compound comp : s.getCompounds()){
641                                if ( comp.getChainId().contains(c.getChainID())){
642                                        // found matching compound. set description...
643                                        newS.getPDBHeader().setDescription("Chain " + c.getChainID() + " of " + s.getPDBCode() + " " + comp.getMolName());
644                                }
645                        }
646                }
647
648
649                return newS;
650        }
651
652
653        /** Reduce a structure to provide a smaller representation.
654         * Only takes the first model of the structure. If chainNr >=0 only takes
655         * the chain at that position into account.
656         * 
657         * @param s
658         * @param chainNr can be -1 to request all chains of model 0, otherwise will only add chain at this position 
659         * @return Structure object
660         * @since 3.0
661         */
662        @SuppressWarnings("deprecation")
663        public static final Structure getReducedStructure(Structure s, int chainNr) throws StructureException{
664                // since we deal here with structure alignments,
665                // only use Model 1...
666
667                Structure newS = new StructureImpl();
668                newS.setHeader(s.getHeader());
669                newS.setPDBCode(s.getPDBCode());
670                newS.setPDBHeader(s.getPDBHeader());
671                newS.setName(s.getName());
672                newS.setSSBonds(s.getSSBonds());
673                newS.setDBRefs(s.getDBRefs());
674                newS.setSites(s.getSites());
675                newS.setNmr(s.isNmr());
676                newS.setBiologicalAssembly(s.isBiologicalAssembly());
677                newS.setCompounds(s.getCompounds());
678                newS.setConnections(s.getConnections());
679                newS.setSSBonds(s.getSSBonds());
680                newS.setSites(s.getSites());
681                newS.getPDBHeader().setDescription("subset of " + s.getPDBCode() + " " + s.getPDBHeader().getDescription() );
682                
683                if ( chainNr < 0 ) {
684
685                        // only get model 0
686                        List<Chain> model0 = s.getModel(0);
687                        for (Chain c : model0){
688                                newS.addChain(c);
689                        }
690                        return newS;
691                }
692                Chain c =  null;
693
694                c = s.getChain(0, chainNr);
695
696                newS.addChain(c);
697
698                return newS;
699        }
700
701        
702        
703        /** In addition to the functionality provided by getReducedStructure also provides a way to specify sub-regions of a structure with the following 
704         * specification:
705         * 
706         * 
707         * ranges can be surrounded by ( and ). (but will be removed).
708         * ranges are specified as
709         * PDBresnum1 : PDBresnum2
710         * 
711         *  a list of ranges is separated by ,
712         *  
713         *  Example
714         *  4GCR (A:1-83)
715         *  1CDG (A:407-495,A:582-686)
716         *  1CDG (A_407-495,A_582-686)
717         * 
718         * @param s The full structure
719         * @param ranges A comma-seperated list of ranges, optionally surrounded by parentheses
720         * @return Substructure of s specified by ranges
721         */
722        
723        @SuppressWarnings("deprecation")
724        public static final Structure getSubRanges(Structure s, String ranges ) 
725        throws StructureException
726        {
727                Structure struc = getReducedStructure(s, null);
728
729                if ( ranges == null || ranges.equals(""))
730                        throw new IllegalArgumentException("ranges can't be null or empty");
731
732                ranges = ranges.trim();
733
734                if ( ranges.startsWith("("))
735                        ranges = ranges.substring(1);
736                if ( ranges.endsWith(")")) {
737                        ranges = ranges.substring(0,ranges.length()-1);
738                }
739
740                //special case: '-' means 'everything'
741                if ( ranges.equals("-") ) {
742                        return s;
743                }
744
745                Structure newS = new StructureImpl();
746                
747                newS.setHeader(s.getHeader());
748                newS.setPDBCode(s.getPDBCode());
749                newS.setPDBHeader(s.getPDBHeader());
750                newS.setName(s.getName());
751                newS.setDBRefs(s.getDBRefs());
752                newS.setNmr(s.isNmr());
753                newS.setBiologicalAssembly(s.isBiologicalAssembly());
754                newS.getPDBHeader().setDescription("sub-range " + ranges + " of "  + newS.getPDBCode() + " " + s.getPDBHeader().getDescription());
755                
756                // TODO The following should be only copied for atoms which are present in the range.
757                //newS.setCompounds(s.getCompounds());
758                //newS.setConnections(s.getConnections());
759                //newS.setSSBonds(s.getSSBonds());
760                //newS.setSites(s.getSites());
761                
762                String[] rangS =ranges.split(",");
763
764                StringWriter name = new StringWriter();
765                name.append(s.getName());
766                boolean firstRange = true;
767                String prevChainId = null;
768
769                // parse the ranges, adding the specified residues to newS
770                for ( String r: rangS){
771                        //System.out.println(">"+r+"<");
772                        // Match a single range, eg "A_4-27"
773                        
774                        Matcher matcher = pdbNumRangeRegex.matcher(r);
775                        if( ! matcher.matches() ){
776                                throw new StructureException("wrong range specification, should be provided as chainID_pdbResnum1-pdbRensum2: "+ranges);
777                        }
778                        String chainId = matcher.group(1);
779                        Chain chain;
780                        
781                        if(chainId.equals("_") && struc.size() == 1) {
782                                // Handle special case of "_" chain for single-chain proteins
783                                chain = struc.getChain(0);
784                        } else {
785                                // Explicit chain
786                                chain = struc.getChainByPDB(chainId);
787                        }
788                        
789                        Group[] groups;
790                        
791                        String pdbresnumStart = matcher.group(2);
792                        String pdbresnumEnd   = matcher.group(3);
793                        
794                        if ( ! firstRange){
795                                name.append( ",");
796                        } else {
797                                name.append(AtomCache.CHAIN_SPLIT_SYMBOL);
798                        }
799                        if( pdbresnumStart != null && pdbresnumEnd != null) {
800                                // not a full chain
801                                //since Java doesn't allow '+' before integers, fix this up.
802                                if(pdbresnumStart.charAt(0) == '+')
803                                        pdbresnumStart = pdbresnumStart.substring(1);
804                                if(pdbresnumEnd.charAt(0) == '+')
805                                        pdbresnumEnd = pdbresnumEnd.substring(1);
806                                groups = chain.getGroupsByPDB(pdbresnumStart, pdbresnumEnd);
807                                
808                                name.append( chainId + AtomCache.UNDERSCORE + pdbresnumStart+"-" + pdbresnumEnd);
809                                
810                        } else {
811                                // full chain
812                                groups = chain.getAtomGroups().toArray(new Group[chain.getAtomGroups().size()]);
813                                name.append(chainId);
814                        }
815                        firstRange = true;
816                        
817                        // Create new chain, if needed
818                        Chain c = null;
819                        if ( prevChainId == null) {
820                                // first chain...
821                                c = new ChainImpl();
822                                c.setChainID(chain.getChainID());
823                                newS.addChain(c);
824                        } else if ( prevChainId.equals(chain.getChainID())) {
825                                c = newS.getChainByPDB(prevChainId);
826
827                        } else {
828                                try {
829                                        c = newS.getChainByPDB(chain.getChainID());
830                                } catch (StructureException e){
831                                        // chain not in structure yet...
832                                        c = new ChainImpl();
833                                        c.setChainID(chain.getChainID());
834                                        newS.addChain(c);
835                                }
836                        }
837
838                        // add the groups to the chain:
839                        for ( Group g: groups) {
840                                c.addGroup(g);
841                        }
842
843                        prevChainId = c.getChainID();
844                }
845                
846                newS.setName(name.toString());
847                
848                return newS;
849        }
850
851        public static final String convertAtomsToSeq(Atom[] atoms) {
852
853                StringBuffer buf = new StringBuffer();
854                Group prevGroup  = null;
855                for (Atom a : atoms){
856                        Group g = a.getGroup();
857                        if ( prevGroup != null) {
858                                if ( prevGroup.equals(g)) {
859                                        // we add each group only once.
860                                        continue;
861                                }
862                        }
863                        String code3 = g.getPDBName();
864                        try {
865                                buf.append(convert_3code_1code(code3) );
866                        } catch (UnknownPdbAminoAcidException e){
867                                buf.append('X');
868                        }
869                        prevGroup = g;
870
871                }
872                return buf.toString();
873        }
874
875        /** get a PDB residue number object for this group
876         * 
877         * @param g Group object
878         * @return a ResidueNumber object
879         * @deprecated replaced by  Group.getResidueNumber()
880         */
881        public static final ResidueNumber getPDBResidueNumber(Group g){
882
883                return g.getResidueNumber();
884
885        }
886
887        /** Get a group represented by a ResidueNumber.
888         * 
889         * @param struc a {@link Structure}
890         * @param pdbResNum a {@link ResidueNumber}
891         * @return a group in the structure that is represented by the pdbResNum. 
892         * @throws StructureException if the group cannot be found.
893         */
894        public static final Group getGroupByPDBResidueNumber(Structure struc, 
895                        ResidueNumber pdbResNum) throws StructureException {
896                if (struc == null || pdbResNum==null) {
897                        throw new IllegalArgumentException("Null argument(s).");
898                }
899
900                Chain chain = struc.findChain(pdbResNum.getChainId());
901
902//              String numIns = "" + pdbResNum.getSeqNum();
903//              if (pdbResNum.getInsCode() != null) {
904//                      numIns += pdbResNum.getInsCode();
905//              }
906
907                
908                return chain.getGroupByPDB(pdbResNum);
909        }
910
911        /*
912         * Returns a List of Groups in a structure within the distance specified of a given group.
913         */
914        public static List<Group> getGroupsWithinShell(Structure structure, Group group, double distance, boolean includeWater) {
915            Set<Group> returnSet = new LinkedHashSet<Group>();
916
917            //square the distance to use as a comparison against getDistanceFast which returns the square of a distance.
918            distance = distance * distance;
919
920            for (Atom atomA : group.getAtoms()) {
921                for (Chain chain : structure.getChains()) {
922                    for (Group chainGroup : chain.getAtomGroups()) {
923//                        System.out.println("Checking group: " + chainGroup);
924                        if (chainGroup.getResidueNumber().equals(group.getResidueNumber())) {
925                            continue;
926                        }
927                        else if (!includeWater && chainGroup.getPDBName().equals("HOH")) {
928                            continue;
929                        }
930                        else {
931                            for (Atom atomB : chainGroup.getAtoms()) {
932                            try {
933                                //use getDistanceFast as we are doing a lot of comparisons
934                                double dist = Calc.getDistanceFast(atomA, atomB);
935                                if (dist <= distance) {
936                                    returnSet.add(chainGroup);
937//                                    System.out.println(String.format("%s within %s of %s", atomB, dist, atomA));
938                                    break;
939                                }
940                                
941                            } catch (StructureException ex) {
942                                Logger.getLogger(StructureTools.class.getName()).log(Level.SEVERE, null, ex);
943                            }
944                                
945                            }
946                        }
947                    }
948                }
949            }
950            List<Group> returnList = new ArrayList<Group>();
951            returnList.addAll(returnSet);
952            return returnList;
953        }
954
955        /*
956         * Very simple distance-based bond calculator. Will give approximations,
957         * but do not rely on this to be chemically correct.
958         */
959        public static List<Bond> findBonds(Group group, List<Group> groups) {
960            List<Bond> bondList = new ArrayList<Bond>();
961            for (Atom atomA : group.getAtoms()) {
962                for (Group groupB : groups) {
963                    if (groupB.getType().equals(GroupType.HETATM)) {
964                        continue;
965                    }
966                    for (Atom atomB : groupB.getAtoms()) {
967                        try {
968                            double dist = Calc.getDistance(atomA, atomB);
969                            BondType bondType = BondType.UNDEFINED;
970                            if (dist <= 2) {
971                                bondType = BondType.COVALENT;
972                                Bond bond = new Bond(dist, bondType, group, atomA, groupB, atomB);
973                                bondList.add(bond);
974//                                    System.out.println(String.format("%s within %s of %s", atomB, dist, atomA));
975                            }
976                            else if (dist <= 3.25) {
977                                
978                                if (isHbondDonorAcceptor(atomA) && isHbondDonorAcceptor(atomB)) {
979                                    bondType = BondType.HBOND;
980                                }
981                                else if (atomA.getElement().isMetal() && isHbondDonorAcceptor(atomB)) {
982                                    bondType = BondType.METAL;
983                                }
984                                else if (atomA.getElement().equals(Element.C) && atomB.getElement().equals(Element.C)) {
985                                    bondType = BondType.HYDROPHOBIC;
986                                }
987                                //not really interested in 'undefined' types
988                                if (bondType != BondType.UNDEFINED) {
989                                    Bond bond = new Bond(dist, bondType, group, atomA, groupB, atomB);
990                                    bondList.add(bond);
991                                }
992//                                    System.out.println(String.format("%s within %s of %s", atomB, dist, atomA));
993                            } else if (dist <= 3.9) {
994                                if (atomA.getElement().equals(Element.C) && atomB.getElement().equals(Element.C)) {
995                                    bondType = BondType.HYDROPHOBIC;
996                                }
997                                //not really interested in 'undefined' types
998                                if (bondType != BondType.UNDEFINED) {
999                                    Bond bond = new Bond(dist, bondType, group, atomA, groupB, atomB);
1000                                    bondList.add(bond);
1001                                }
1002                            }
1003
1004                        } catch (StructureException ex) {
1005                            Logger.getLogger(StructureTools.class.getName()).log(Level.SEVERE, null, ex);
1006                        }
1007
1008                    }
1009                }
1010            }
1011
1012
1013            return bondList;
1014        }
1015
1016        private static boolean isHbondDonorAcceptor(Atom atom) {
1017            if (hBondDonorAcceptors.contains(atom.getElement())) {
1018                return true;
1019            }
1020            return false;
1021        }
1022}