001/*
002 *                    BioJava development code
003 *
004 * This code may be freely distributed and modified under the
005 * terms of the GNU Lesser General Public Licence.  This should
006 * be distributed with the code.  If you do not have a copy,
007 * see:
008 *
009 *      http://www.gnu.org/copyleft/lesser.html
010 *
011 * Copyright for this code is held jointly by the individual
012 * authors.  These should be listed in @author doc comments.
013 *
014 * For more information on the BioJava project and its aims,
015 * or to join the biojava-l mailing list, visit the home page
016 * at:
017 *
018 *      http://www.biojava.org/
019 *
020 * Created on June 7, 2010
021 * Author: Mark Chapman
022 */
023
024package org.biojava3.alignment;
025
026import java.util.ArrayList;
027import java.util.List;
028import java.util.concurrent.ExecutionException;
029import java.util.concurrent.Future;
030
031import org.biojava3.alignment.template.*;
032import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet;
033import org.biojava3.core.sequence.compound.AminoAcidCompoundSet;
034import org.biojava3.core.sequence.compound.DNACompoundSet;
035import org.biojava3.core.sequence.template.Compound;
036import org.biojava3.core.sequence.template.CompoundSet;
037import org.biojava3.core.sequence.template.Sequence;
038import org.biojava3.core.util.ConcurrencyTools;
039
040/**
041 * Static utility to easily run alignment routines.  To exit cleanly after running any parallel method that mentions
042 * use of the {@link ConcurrencyTools} utility, {@link ConcurrencyTools#shutdown()} or
043 * {@link ConcurrencyTools#shutdownAndAwaitTermination()} must be called.
044 *
045 * @author Mark Chapman
046 */
047public class Alignments {
048
049    /**
050     * List of implemented sequence pair in a profile scoring routines.
051     */
052    public static enum PairInProfileScorerType {
053        IDENTITIES,  // similar to MUSCLE
054        SIMILARITIES
055    }
056
057    /**
058     * List of implemented pairwise sequence alignment routines.
059     */
060    public static enum PairwiseSequenceAlignerType {
061        GLOBAL,              // Needleman-Wunsch/Gotoh
062        GLOBAL_LINEAR_SPACE, // Guan-Uberbacher
063        LOCAL,               // Smith-Waterman/Gotoh
064        LOCAL_LINEAR_SPACE   // Smith-Waterman/Gotoh with smart traceback at each maximum
065    }
066
067    /**
068     * List of implemented pairwise sequence scoring routines.
069     */
070    public static enum PairwiseSequenceScorerType {
071        GLOBAL,
072        GLOBAL_IDENTITIES,   // similar to CLUSTALW and CLUSTALW2
073        GLOBAL_SIMILARITIES,
074        LOCAL,
075        LOCAL_IDENTITIES,
076        LOCAL_SIMILARITIES,
077        KMERS,               // similar to CLUSTAL and MUSCLE
078        WU_MANBER            // similar to KALIGN
079    }
080
081    /**
082     * List of implemented profile-profile alignment routines.
083     */
084    public static enum ProfileProfileAlignerType {
085        GLOBAL,              // similar to MUSCLE and KALIGN
086        GLOBAL_LINEAR_SPACE, // similar to CLUSTALW and CLUSTALW2
087        GLOBAL_CONSENSUS,    // similar to CLUSTAL
088        LOCAL,
089        LOCAL_LINEAR_SPACE,
090        LOCAL_CONSENSUS
091    }
092
093    /**
094     * List of implemented profile refinement routines.
095     */
096    public static enum RefinerType {
097        PARTITION_SINGLE,     // similar to CLUSTALW2
098        PARTITION_SINGLE_ALL, // similar to CLUSTALW2
099        PARTITION_TREE,       // similar to MUSCLE
100        PARTITION_TREE_ALL,
101        RESCORE_IDENTITIES,   // similar to MUSCLE
102        RESCORE_SIMILARITIES
103    }
104
105    // prevents instantiation
106    private Alignments() { }
107
108    // public factory methods
109
110    /**
111     * Factory method which computes a sequence alignment for all {@link Sequence} pairs in the given {@link List}.
112     * This method runs the alignments in parallel by submitting all of the alignments to the shared thread pool of the
113     * {@link ConcurrencyTools} utility.
114     *
115     * @param <S> each {@link Sequence} of an alignment pair is of type S
116     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
117     * @param sequences the {@link List} of {@link Sequence}s to align
118     * @param type chosen type from list of pairwise sequence alignment routines
119     * @param gapPenalty the gap penalties used during alignment
120     * @param subMatrix the set of substitution scores used during alignment
121     * @return list of sequence alignment pairs
122     */
123    public static <S extends Sequence<C>, C extends Compound> List<SequencePair<S, C>> getAllPairsAlignments(
124            List<S> sequences, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
125            SubstitutionMatrix<C> subMatrix) {
126        return runPairwiseAligners(getAllPairsAligners(sequences, type, gapPenalty, subMatrix));
127    }
128
129    /**
130     * Factory method which computes a multiple sequence alignment for the given {@link List} of {@link Sequence}s.
131     *
132     * @param <S> each {@link Sequence} of the {@link List} is of type S
133     * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
134     * @param sequences the {@link List} of {@link Sequence}s to align
135     * @param settings optional settings that adjust the alignment
136     * @return multiple sequence alignment {@link Profile}
137     */
138    public static <S extends Sequence<C>, C extends Compound> Profile<S, C> getMultipleSequenceAlignment(
139            List<S> sequences, Object... settings) { // TODO convert other factories to this parameter style?
140        CompoundSet<C> cs = sequences.get(0).getCompoundSet();
141        PairwiseSequenceScorerType ps = PairwiseSequenceScorerType.GLOBAL_IDENTITIES;
142        GapPenalty gapPenalty = new SimpleGapPenalty();
143        SubstitutionMatrix<C> subMatrix = null;
144        if (cs == AminoAcidCompoundSet.getAminoAcidCompoundSet()) {
145            @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
146            SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getBlosum62();
147            subMatrix = temp;
148        } else if (cs == DNACompoundSet.getDNACompoundSet()) {
149            @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
150            SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getNuc4_4();
151            subMatrix = temp;
152            
153        } else if (cs == AmbiguityDNACompoundSet.getDNACompoundSet()) {
154            @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
155            SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) SubstitutionMatrixHelper.getNuc4_4();
156            subMatrix = temp;
157            
158        }
159        ProfileProfileAlignerType pa = ProfileProfileAlignerType.GLOBAL;
160        for (Object o : settings) {
161            if (o instanceof PairwiseSequenceScorerType) {
162                ps = (PairwiseSequenceScorerType) o;
163            } else if (o instanceof GapPenalty) {
164                gapPenalty = (GapPenalty) o;
165            } else if (o instanceof SubstitutionMatrix<?>) {
166                if (cs != ((SubstitutionMatrix<?>) o).getCompoundSet()) {
167                    throw new IllegalArgumentException(
168                            "Compound sets of the sequences and substitution matrix must match.");
169                }
170                @SuppressWarnings("unchecked") // compound types must be equal since compound sets are equal
171                SubstitutionMatrix<C> temp = (SubstitutionMatrix<C>) o;
172                subMatrix = temp;
173            } else if (o instanceof ProfileProfileAlignerType) {
174                pa = (ProfileProfileAlignerType) o;
175            }
176        }
177
178        // stage 1: pairwise similarity calculation
179        List<PairwiseSequenceScorer<S, C>> scorers = getAllPairsScorers(sequences, ps, gapPenalty, subMatrix);
180        runPairwiseScorers(scorers);
181
182        // stage 2: hierarchical clustering into a guide tree
183        GuideTree<S, C> tree = new GuideTree<S, C>(sequences, scorers);
184        scorers = null;
185
186        // stage 3: progressive alignment
187        Profile<S, C> msa = getProgressiveAlignment(tree, pa, gapPenalty, subMatrix);
188
189        // TODO stage 4: refinement
190        return msa;
191    }
192
193    /**
194     * Factory method which computes a sequence alignment for the given {@link Sequence} pair.
195     *
196     * @param <S> each {@link Sequence} of the pair is of type S
197     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
198     * @param query the first {@link Sequence}s to align
199     * @param target the second {@link Sequence}s to align
200     * @param type chosen type from list of pairwise sequence alignment routines
201     * @param gapPenalty the gap penalties used during alignment
202     * @param subMatrix the set of substitution scores used during alignment
203     * @return sequence alignment pair
204     */
205    public static <S extends Sequence<C>, C extends Compound> SequencePair<S, C> getPairwiseAlignment(
206            S query, S target, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
207            SubstitutionMatrix<C> subMatrix) {
208        return getPairwiseAligner(query, target, type, gapPenalty, subMatrix).getPair();
209    }
210
211    // default access (package private) factory methods
212
213    /**
214     * Factory method which sets up a sequence alignment for all {@link Sequence} pairs in the given {@link List}.
215     *
216     * @param <S> each {@link Sequence} of an alignment pair is of type S
217     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
218     * @param sequences the {@link List} of {@link Sequence}s to align
219     * @param type chosen type from list of pairwise sequence alignment routines
220     * @param gapPenalty the gap penalties used during alignment
221     * @param subMatrix the set of substitution scores used during alignment
222     * @return list of pairwise sequence aligners
223     */
224    static <S extends Sequence<C>, C extends Compound> List<PairwiseSequenceAligner<S, C>> getAllPairsAligners(
225            List<S> sequences, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
226            SubstitutionMatrix<C> subMatrix) {
227        List<PairwiseSequenceAligner<S, C>> allPairs = new ArrayList<PairwiseSequenceAligner<S, C>>();
228        for (int i = 0; i < sequences.size(); i++) {
229            for (int j = i+1; j < sequences.size(); j++) {
230                allPairs.add(getPairwiseAligner(sequences.get(i), sequences.get(j), type, gapPenalty, subMatrix));
231            }
232        }
233        return allPairs;
234    }
235
236    /**
237     * Factory method which sets up a sequence pair scorer for all {@link Sequence} pairs in the given {@link List}.
238     *
239     * @param <S> each {@link Sequence} of a pair is of type S
240     * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
241     * @param sequences the {@link List} of {@link Sequence}s to align
242     * @param type chosen type from list of pairwise sequence scoring routines
243     * @param gapPenalty the gap penalties used during alignment
244     * @param subMatrix the set of substitution scores used during alignment
245     * @return list of sequence pair scorers
246     */
247    public static <S extends Sequence<C>, C extends Compound> List<PairwiseSequenceScorer<S, C>> getAllPairsScorers(
248            List<S> sequences, PairwiseSequenceScorerType type, GapPenalty gapPenalty,
249            SubstitutionMatrix<C> subMatrix) {
250        List<PairwiseSequenceScorer<S, C>> allPairs = new ArrayList<PairwiseSequenceScorer<S, C>>();
251        for (int i = 0; i < sequences.size(); i++) {
252            for (int j = i+1; j < sequences.size(); j++) {
253                allPairs.add(getPairwiseScorer(sequences.get(i), sequences.get(j), type, gapPenalty, subMatrix));
254            }
255        }
256        return allPairs;
257    }
258
259    /**
260     * Factory method which computes a sequence pair score for all {@link Sequence} pairs in the given {@link List}.
261     * This method runs the scorings in parallel by submitting all of the scorings to the shared thread pool of the
262     * {@link ConcurrencyTools} utility.
263     *
264     * @param <S> each {@link Sequence} of a pair is of type S
265     * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
266     * @param sequences the {@link List} of {@link Sequence}s to align
267     * @param type chosen type from list of pairwise sequence scoring routines
268     * @param gapPenalty the gap penalties used during alignment
269     * @param subMatrix the set of substitution scores used during alignment
270     * @return list of sequence pair scores
271     */
272    public static <S extends Sequence<C>, C extends Compound> double[] getAllPairsScores( List<S> sequences,
273            PairwiseSequenceScorerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
274        return runPairwiseScorers(getAllPairsScorers(sequences, type, gapPenalty, subMatrix));
275    }
276
277    /**
278     * Factory method which retrieves calculated elements from a list of tasks on the concurrent execution queue.
279     *
280     * @param <E> each task calculates a value of type E
281     * @param futures list of tasks
282     * @return calculated elements
283     */
284    static <E> List<E> getListFromFutures(List<Future<E>> futures) {
285        List<E> list = new ArrayList<E>();
286        for (Future<E> f : futures) {
287            // TODO when added to ConcurrencyTools, log completions and exceptions instead of printing stack traces
288            try {
289                list.add(f.get());
290            } catch (InterruptedException e) {
291                e.printStackTrace();
292            } catch (ExecutionException e) {
293                e.printStackTrace();
294            }
295        }
296        return list;
297    }
298
299    /**
300     * Factory method which constructs a pairwise sequence aligner.
301     *
302     * @param <S> each {@link Sequence} of an alignment pair is of type S
303     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
304     * @param query the first {@link Sequence} to align
305     * @param target the second {@link Sequence} to align
306     * @param type chosen type from list of pairwise sequence alignment routines
307     * @param gapPenalty the gap penalties used during alignment
308     * @param subMatrix the set of substitution scores used during alignment
309     * @return pairwise sequence aligner
310     */
311    public static <S extends Sequence<C>, C extends Compound> PairwiseSequenceAligner<S, C> getPairwiseAligner(
312            S query, S target, PairwiseSequenceAlignerType type, GapPenalty gapPenalty,
313            SubstitutionMatrix<C> subMatrix) {
314        if (!query.getCompoundSet().equals(target.getCompoundSet())) {
315                throw new IllegalArgumentException("Sequence compound sets must be the same");
316        }
317        switch (type) {
318        default:
319        case GLOBAL:
320            return new NeedlemanWunsch<S, C>(query, target, gapPenalty, subMatrix);
321        case LOCAL:
322            return new SmithWaterman<S, C>(query, target, gapPenalty, subMatrix);
323        case GLOBAL_LINEAR_SPACE:
324        case LOCAL_LINEAR_SPACE:
325            // TODO other alignment options (Myers-Miller, Thompson)
326            return null;
327        }
328    }
329
330    /**
331     * Factory method which computes a similarity score for the given {@link Sequence} pair.
332     *
333     * @param <S> each {@link Sequence} of the pair is of type S
334     * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
335     * @param query the first {@link Sequence} to score
336     * @param target the second {@link Sequence} to score
337     * @param type chosen type from list of pairwise sequence scoring routines
338     * @param gapPenalty the gap penalties used during alignment
339     * @param subMatrix the set of substitution scores used during alignment
340     * @return sequence pair score
341     */
342    static <S extends Sequence<C>, C extends Compound> double getPairwiseScore(S query, S target,
343            PairwiseSequenceScorerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
344        return getPairwiseScorer(query, target, type, gapPenalty, subMatrix).getScore();
345    }
346
347    /**
348     * Factory method which constructs a pairwise sequence scorer.
349     *
350     * @param <S> each {@link Sequence} of a pair is of type S
351     * @param <C> each element of a {@link Sequence} is a {@link Compound} of type C
352     * @param query the first {@link Sequence} to score
353     * @param target the second {@link Sequence} to score
354     * @param type chosen type from list of pairwise sequence scoring routines
355     * @param gapPenalty the gap penalties used during alignment
356     * @param subMatrix the set of substitution scores used during alignment
357     * @return sequence pair scorer
358     */
359    static <S extends Sequence<C>, C extends Compound> PairwiseSequenceScorer<S, C> getPairwiseScorer(
360            S query, S target, PairwiseSequenceScorerType type, GapPenalty gapPenalty,
361            SubstitutionMatrix<C> subMatrix) {
362        switch (type) {
363        default:
364        case GLOBAL:
365            return getPairwiseAligner(query, target, PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix);
366        case GLOBAL_IDENTITIES:
367            return new FractionalIdentityScorer<S, C>(getPairwiseAligner(query, target,
368                    PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix));
369        case GLOBAL_SIMILARITIES:
370            return new FractionalSimilarityScorer<S, C>(getPairwiseAligner(query, target,
371                    PairwiseSequenceAlignerType.GLOBAL, gapPenalty, subMatrix));
372        case LOCAL:
373            return getPairwiseAligner(query, target, PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix);
374        case LOCAL_IDENTITIES:
375            return new FractionalIdentityScorer<S, C>(getPairwiseAligner(query, target,
376                    PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix));
377        case LOCAL_SIMILARITIES:
378            return new FractionalSimilarityScorer<S, C>(getPairwiseAligner(query, target,
379                    PairwiseSequenceAlignerType.LOCAL, gapPenalty, subMatrix));
380        case KMERS:
381        case WU_MANBER:
382            // TODO other scoring options
383            return null;
384        }
385    }
386
387    /**
388     * Factory method which constructs a profile-profile aligner.
389     *
390     * @param <S> each {@link Sequence} of an alignment profile is of type S
391     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
392     * @param profile1 the first {@link Profile} to align
393     * @param profile2 the second {@link Profile} to align
394     * @param type chosen type from list of profile-profile alignment routines
395     * @param gapPenalty the gap penalties used during alignment
396     * @param subMatrix the set of substitution scores used during alignment
397     * @return profile-profile aligner
398     */
399    static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
400            Profile<S, C> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type, GapPenalty gapPenalty,
401            SubstitutionMatrix<C> subMatrix) {
402        switch (type) {
403        default:
404        case GLOBAL:
405            return new SimpleProfileProfileAligner<S, C>(profile1, profile2, gapPenalty, subMatrix);
406        case GLOBAL_LINEAR_SPACE:
407        case GLOBAL_CONSENSUS:
408        case LOCAL:
409        case LOCAL_LINEAR_SPACE:
410        case LOCAL_CONSENSUS:
411            // TODO other alignment options (Myers-Miller, consensus, local)
412            return null;
413        }
414    }
415
416    /**
417     * Factory method which constructs a profile-profile aligner.
418     *
419     * @param <S> each {@link Sequence} of an alignment profile is of type S
420     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
421     * @param profile1 the first {@link Profile} to align
422     * @param profile2 the second {@link Profile} to align
423     * @param type chosen type from list of profile-profile alignment routines
424     * @param gapPenalty the gap penalties used during alignment
425     * @param subMatrix the set of substitution scores used during alignment
426     * @return profile-profile aligner
427     */
428    static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
429            Future<ProfilePair<S, C>> profile1, Future<ProfilePair<S, C>> profile2, ProfileProfileAlignerType type,
430            GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
431        switch (type) {
432        default:
433        case GLOBAL:
434            return new SimpleProfileProfileAligner<S, C>(profile1, profile2, gapPenalty, subMatrix);
435        case GLOBAL_LINEAR_SPACE:
436        case GLOBAL_CONSENSUS:
437        case LOCAL:
438        case LOCAL_LINEAR_SPACE:
439        case LOCAL_CONSENSUS:
440            // TODO other alignment options (Myers-Miller, consensus, local)
441            return null;
442        }
443    }
444
445    /**
446     * Factory method which constructs a profile-profile aligner.
447     *
448     * @param <S> each {@link Sequence} of an alignment profile is of type S
449     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
450     * @param profile1 the first {@link Profile} to align
451     * @param profile2 the second {@link Profile} to align
452     * @param type chosen type from list of profile-profile alignment routines
453     * @param gapPenalty the gap penalties used during alignment
454     * @param subMatrix the set of substitution scores used during alignment
455     * @return profile-profile aligner
456     */
457    static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
458            Profile<S, C> profile1, Future<ProfilePair<S, C>> profile2, ProfileProfileAlignerType type,
459            GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
460        switch (type) {
461        default:
462        case GLOBAL:
463            return new SimpleProfileProfileAligner<S, C>(profile1, profile2, gapPenalty, subMatrix);
464        case GLOBAL_LINEAR_SPACE:
465        case GLOBAL_CONSENSUS:
466        case LOCAL:
467        case LOCAL_LINEAR_SPACE:
468        case LOCAL_CONSENSUS:
469            // TODO other alignment options (Myers-Miller, consensus, local)
470            return null;
471        }
472    }
473
474    /**
475     * Factory method which constructs a profile-profile aligner.
476     *
477     * @param <S> each {@link Sequence} of an alignment profile is of type S
478     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
479     * @param profile1 the first {@link Profile} to align
480     * @param profile2 the second {@link Profile} to align
481     * @param type chosen type from list of profile-profile alignment routines
482     * @param gapPenalty the gap penalties used during alignment
483     * @param subMatrix the set of substitution scores used during alignment
484     * @return profile-profile aligner
485     */
486    static <S extends Sequence<C>, C extends Compound> ProfileProfileAligner<S, C> getProfileProfileAligner(
487            Future<ProfilePair<S, C>> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type,
488            GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
489        switch (type) {
490        default:
491        case GLOBAL:
492            return new SimpleProfileProfileAligner<S, C>(profile1, profile2, gapPenalty, subMatrix);
493        case GLOBAL_LINEAR_SPACE:
494        case GLOBAL_CONSENSUS:
495        case LOCAL:
496        case LOCAL_LINEAR_SPACE:
497        case LOCAL_CONSENSUS:
498            // TODO other alignment options (Myers-Miller, consensus, local)
499            return null;
500        }
501    }
502
503    /**
504     * Factory method which computes a profile alignment for the given {@link Profile} pair.
505     *
506     * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S
507     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
508     * @param profile1 the first {@link Profile} to align
509     * @param profile2 the second {@link Profile} to align
510     * @param type chosen type from list of profile-profile alignment routines
511     * @param gapPenalty the gap penalties used during alignment
512     * @param subMatrix the set of substitution scores used during alignment
513     * @return alignment profile
514     */
515    static <S extends Sequence<C>, C extends Compound> ProfilePair<S, C> getProfileProfileAlignment(
516            Profile<S, C> profile1, Profile<S, C> profile2, ProfileProfileAlignerType type, GapPenalty gapPenalty,
517            SubstitutionMatrix<C> subMatrix) {
518        return getProfileProfileAligner(profile1, profile2, type, gapPenalty, subMatrix).getPair();
519    }
520
521    /**
522     * Factory method to run the profile-profile alignments of a progressive multiple sequence alignment concurrently.
523     * This method runs the alignments in parallel by submitting all of the alignment tasks to the shared thread pool
524     * of the {@link ConcurrencyTools} utility.
525     *
526     * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S
527     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
528     * @param tree guide tree to follow aligning profiles from leaves to root
529     * @param type chosen type from list of profile-profile alignment routines
530     * @param gapPenalty the gap penalties used during alignment
531     * @param subMatrix the set of substitution scores used during alignment
532     * @return multiple sequence alignment
533     */
534    public static <S extends Sequence<C>, C extends Compound> Profile<S, C> getProgressiveAlignment(GuideTree<S, C> tree,
535            ProfileProfileAlignerType type, GapPenalty gapPenalty, SubstitutionMatrix<C> subMatrix) {
536
537        // find inner nodes in post-order traversal of tree (each leaf node has a single sequence profile)
538        List<GuideTreeNode<S, C>> innerNodes = new ArrayList<GuideTreeNode<S, C>>();
539        for (GuideTreeNode<S, C> n : tree) {
540            if (n.getProfile() == null) {
541                innerNodes.add(n);
542            }
543        }
544
545        // submit alignment tasks to the shared thread pool
546        int i = 1, all = innerNodes.size();
547        for (GuideTreeNode<S, C> n : innerNodes) {
548            Profile<S, C> p1 = n.getChild1().getProfile(), p2 = n.getChild2().getProfile();
549            Future<ProfilePair<S, C>> pf1 = n.getChild1().getProfileFuture(), pf2 = n.getChild2().getProfileFuture();
550            ProfileProfileAligner<S, C> aligner =
551                    (p1 != null) ? ((p2 != null) ? getProfileProfileAligner(p1, p2, type, gapPenalty, subMatrix) :
552                            getProfileProfileAligner(p1, pf2, type, gapPenalty, subMatrix)) :
553                    ((p2 != null) ? getProfileProfileAligner(pf1, p2, type, gapPenalty, subMatrix) :
554                            getProfileProfileAligner(pf1, pf2, type, gapPenalty, subMatrix));
555            n.setProfileFuture(ConcurrencyTools.submit(new CallableProfileProfileAligner<S, C>(aligner), String.format(
556                    "Aligning pair %d of %d", i++, all)));
557        }
558
559        // retrieve the alignment results
560        for (GuideTreeNode<S, C> n : innerNodes) {
561            // TODO when added to ConcurrencyTools, log completions and exceptions instead of printing stack traces
562            try {
563                n.setProfile(n.getProfileFuture().get());
564            } catch (InterruptedException e) {
565                e.printStackTrace();
566            } catch (ExecutionException e) {
567                e.printStackTrace();
568            }
569        }
570
571        // the alignment profile at the root of the tree is the full multiple sequence alignment
572        return tree.getRoot().getProfile();
573    }
574
575    /**
576     * Factory method to run a list of alignments concurrently.  This method runs the alignments in parallel by
577     * submitting all of the alignment tasks to the shared thread pool of the {@link ConcurrencyTools} utility.
578     *
579     * @param <S> each {@link Sequence} of an alignment pair is of type S
580     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
581     * @param aligners list of alignments to run
582     * @return list of {@link SequencePair} results from running alignments
583     */
584    static <S extends Sequence<C>, C extends Compound> List<SequencePair<S, C>>
585            runPairwiseAligners(List<PairwiseSequenceAligner<S, C>> aligners) {
586        int n = 1, all = aligners.size();
587        List<Future<SequencePair<S, C>>> futures = new ArrayList<Future<SequencePair<S, C>>>();
588        for (PairwiseSequenceAligner<S, C> aligner : aligners) {
589            futures.add(ConcurrencyTools.submit(new CallablePairwiseSequenceAligner<S, C>(aligner),
590                    String.format("Aligning pair %d of %d", n++, all)));
591        }
592        return getListFromFutures(futures);
593    }
594
595    /**
596     * Factory method to run a list of scorers concurrently.  This method runs the scorers in parallel by submitting
597     * all of the scoring tasks to the shared thread pool of the {@link ConcurrencyTools} utility.
598     *
599     * @param <S> each {@link Sequence} of an alignment pair is of type S
600     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
601     * @param scorers list of scorers to run
602     * @return list of score results from running scorers
603     */
604    public static <S extends Sequence<C>, C extends Compound> double[] runPairwiseScorers(
605            List<PairwiseSequenceScorer<S, C>> scorers) {
606        int n = 1, all = scorers.size();
607        List<Future<Double>> futures = new ArrayList<Future<Double>>();
608        for (PairwiseSequenceScorer<S, C> scorer : scorers) {
609            futures.add(ConcurrencyTools.submit(new CallablePairwiseSequenceScorer<S, C>(scorer),
610                    String.format("Scoring pair %d of %d", n++, all)));
611        }
612        List<Double> results = getListFromFutures(futures);
613        double[] scores = new double[results.size()];
614        for (int i = 0; i < scores.length; i++) {
615            scores[i] = results.get(i);
616        }
617        return scores;
618    }
619
620    /**
621     * Factory method to run a list of alignments concurrently.  This method runs the alignments in parallel by
622     * submitting all of the alignment tasks to the shared thread pool of the {@link ConcurrencyTools} utility.
623     *
624     * @param <S> each {@link Sequence} of the {@link Profile} pair is of type S
625     * @param <C> each element of an {@link AlignedSequence} is a {@link Compound} of type C
626     * @param aligners list of alignments to run
627     * @return list of {@link ProfilePair} results from running alignments
628     */
629    static <S extends Sequence<C>, C extends Compound> List<ProfilePair<S, C>>
630            runProfileAligners(List<ProfileProfileAligner<S, C>> aligners) {
631        int n = 1, all = aligners.size();
632        List<Future<ProfilePair<S, C>>> futures = new ArrayList<Future<ProfilePair<S, C>>>();
633        for (ProfileProfileAligner<S, C> aligner : aligners) {
634            futures.add(ConcurrencyTools.submit(new CallableProfileProfileAligner<S, C>(aligner),
635                    String.format("Aligning pair %d of %d", n++, all)));
636        }
637        return getListFromFutures(futures);
638    }
639
640}