/****************************************************************\
*                                                                *
*  Analysis module for exonerate                                 *
*                                                                *
*  Guy St.C. Slater..   mailto:guy@ebi.ac.uk                     *
*  Copyright (C) 2000-2006.  All Rights Reserved.                *
*                                                                *
*  This source code is distributed under the terms of the        *
*  GNU Lesser General Public License. See the file COPYING       *
*  or http://www.fsf.org/copyleft/lesser.html for details        *
*                                                                *
*  If you use this code, please keep this notice intact.         *
*                                                                *
\****************************************************************/

#include "analysis.h"
#include "ungapped.h"
#include "compoundfile.h"

Analysis_ArgumentSet *Analysis_ArgumentSet_create(Argument *arg){
    register ArgumentSet *as;
    static Analysis_ArgumentSet aas;
    if(arg){
        as = ArgumentSet_create("Analysis Options");
        ArgumentSet_add_option(as, 'E', "exhaustive", NULL,
            "Perform exhaustive alignment (slow)", "FALSE",
            Argument_parse_boolean, &aas.use_exhaustive);
        ArgumentSet_add_option(as, 'B', "bigseq", NULL,
            "Allow rapid comparison between big sequences", "FALSE",
            Argument_parse_boolean, &aas.use_bigseq);
        ArgumentSet_add_option(as, '\0', "forcescan", "[q|t]",
            "Force FSM scan on query or target sequences", "none",
            Argument_parse_string, &aas.force_scan);
        /**/
        ArgumentSet_add_option(as, 0, "saturatethreshold", "int",
            "Word saturation threshold", "0",
            Argument_parse_int, &aas.saturate_threshold);
        Argument_absorb_ArgumentSet(arg, as);
        }
    return &aas;
    }

/**/

static void Analysis_report_func(Comparison *comparison,
                                 gpointer user_data){
    register Analysis *analysis = user_data;
    register GAM_Result *gam_result;
    g_assert(Comparison_has_hsps(comparison));
    if(Model_Type_is_gapped(analysis->gam->gas->type)){
        gam_result = GAM_Result_heuristic_create(analysis->gam,
                                                 comparison);
    } else {
        gam_result = GAM_Result_ungapped_create(analysis->gam,
                                                comparison);
        }
    if(gam_result){
        GAM_Result_submit(gam_result);
        GAM_Result_destroy(gam_result);
        }
    return;
    }

/**/

static void Analysis_FastaPipe_Pair_init_func(gpointer user_data){
    register Analysis *analysis = user_data;
    g_assert(!analysis->curr_query);
    return;
    }
/* Called before query pipeline loading */

static void Analysis_FastaPipe_Pair_prep_func(gpointer user_data){
    register Analysis *analysis = user_data;
    g_assert(analysis->curr_query);
    return;
    }
/* Called after query pipeline loading */

static void Analysis_FastaPipe_Pair_term_func(gpointer user_data){
    register Analysis *analysis = user_data;
    FastaDB_Seq_destroy(analysis->curr_query);
    analysis->curr_query = NULL;
    return;
    }
/* Called after query pipeline analysis */

static gboolean Analysis_FastaPipe_Pair_query_func(FastaDB_Seq *fdbs,
                                                   gpointer user_data){
    register Analysis *analysis = user_data;
    g_assert(!analysis->curr_query);
    /*
    if(analysis->aas->use_exhaustive)
        g_strup(fdbs->seq->seq);
    */
    if(analysis->verbosity > 1)
        g_message("Load query for pairwise comparision [%s] (%d)",
                  fdbs->seq->id, fdbs->seq->len);
    analysis->curr_query = FastaDB_Seq_share(fdbs);
    return TRUE; /* take queries one at a time */
    }
/* Called on query loading */

static void Analysis_BSAM_compare(Analysis *analysis,
                                  FastaDB_Seq *query,
                                  FastaDB_Seq *target){
    register Comparison *comparison = BSAM_compare(analysis->bsam,
                                                   query->seq,
                                                   target->seq);
    if(comparison){
        if(Comparison_has_hsps(comparison))
            Analysis_report_func(comparison, analysis);
        Comparison_destroy(comparison);
        }
    return;
    }

/**/

static void Analysis_Pair_compare(Analysis *analysis,
                                  FastaDB_Seq *fdbs){
    register GAM_Result *gam_result;
    if(analysis->aas->use_exhaustive){
        gam_result = GAM_Result_exhaustive_create(analysis->gam,
                         analysis->curr_query->seq, fdbs->seq);
        if(gam_result){
            GAM_Result_submit(gam_result);
            GAM_Result_destroy(gam_result);
            }
    } else {
        Analysis_BSAM_compare(analysis, analysis->curr_query, fdbs);
        }
    return;
    }

static gboolean Analysis_FastaPipe_Pair_target_func(FastaDB_Seq *fdbs,
                                                    gpointer user_data){
    register Analysis *analysis = user_data;
    g_assert(analysis->gam);
    g_assert(analysis->curr_query);
    /*
    if(analysis->aas->use_exhaustive)
        g_strup(fdbs->seq->seq);
    */
    if(analysis->verbosity > 1)
        g_message("Load target for pairwise comparison [%s] (%d)",
                fdbs->seq->id, fdbs->seq->len);
    /**/
    Analysis_Pair_compare(analysis, fdbs);
    return FALSE;
    }
/* Called on target loading */

/**/

static void Analysis_FastaPipe_Seeder_init_func(gpointer user_data){
    register Analysis *analysis = user_data;
    g_assert(!analysis->curr_seeder);
    analysis->curr_seeder = Seeder_create(analysis->verbosity,
            analysis->comparison_param,
            analysis->aas->saturate_threshold,
            analysis->scan_query,
            Analysis_report_func, analysis);
    return;
    }
/* Called before query pipeline loading */

static void Analysis_FastaPipe_Seeder_prep_func(gpointer user_data){
    /* does nothing */
    return;
    }
/* Called after query pipeline loading */

static void Analysis_FastaPipe_Seeder_term_func(gpointer user_data){
    register Analysis *analysis = user_data;
    Seeder_destroy(analysis->curr_seeder);
    if(analysis->verbosity > 2){
        g_message("### Seeder destroyed ###");
        RecycleBin_profile();
        }
    analysis->curr_seeder = NULL;
    return;
    }
/* Called after query pipeline analysis */

static gboolean Analysis_FastaPipe_Seeder_query_func(FastaDB_Seq *fdbs,
                                                    gpointer user_data){
    register Analysis *analysis = user_data;
    if(analysis->verbosity > 1)
        g_message("Load query for Seeder [%s] (%d)",
                fdbs->seq->id, fdbs->seq->len);
    return Seeder_add_query(analysis->curr_seeder, fdbs->seq);
    }
/* Called on query loading */

static gboolean Analysis_FastaPipe_Seeder_target_func(FastaDB_Seq *fdbs,
                                                    gpointer user_data){
    register Analysis *analysis = user_data;
    if(analysis->verbosity > 1)
        g_message("Load target for Seeder [%s] (%d)",
                fdbs->seq->id, fdbs->seq->len);
    Seeder_add_target(analysis->curr_seeder, fdbs->seq);
    return FALSE;
    }
/* Called on target loading */

/**/

static gboolean Analysis_decide_scan_query(FastaDB *query_fdb,
                                           FastaDB *target_fdb,
                                           gchar *force_scan){
    register CompoundFile_Pos query_size, target_size;
    if(!g_strcasecmp(force_scan, "none")){
        query_size = CompoundFile_get_length(query_fdb->cf);
        target_size = CompoundFile_get_length(target_fdb->cf);
        if((query_size >> 4) < target_size)
            return FALSE;
        else
            return TRUE;
    } else if((!g_strcasecmp(force_scan, "query"))
           || (!g_strcasecmp(force_scan, "q"))){
        return TRUE;
    } else if((!g_strcasecmp(force_scan, "target"))
           || (!g_strcasecmp(force_scan, "t"))){
        return FALSE;
    } else {
        g_error("Unknown force_scan command [%s]", force_scan);
        }
    return FALSE; /* not reached */
    }

static void Analysis_find_matches(Analysis *analysis,
                       Match **dna_match, Match **protein_match,
                       Match **codon_match){
    register GPtrArray *transition_list
        = C4_Model_select_transitions(analysis->gam->model,
                                      C4_Label_MATCH);
    register gint i;
    register C4_Transition *transition;
    register Match *match;
    for(i = 0; i < transition_list->len; i++){
        transition = transition_list->pdata[i];
        g_assert(transition);
        g_assert(transition->label == C4_Label_MATCH);
        match = transition->label_data;
        if(match){
            switch(match->type){
                case Match_Type_DNA2DNA:
                    if((*dna_match) && ((*dna_match) != match))
                        g_error("Multiple DNA matches not implemented");
                    (*dna_match) = match;
                    break;
                case Match_Type_PROTEIN2PROTEIN:
                case Match_Type_DNA2PROTEIN:
                case Match_Type_PROTEIN2DNA:
                    if((*protein_match) && ((*protein_match) != match))
                        g_error("Multiple protein matches"
                                " not supported");
                    (*protein_match) = match;
                    break;
                case Match_Type_CODON2CODON:
                    if((*codon_match) && ((*codon_match) != match))
                        g_error("Multiple codon matches"
                                " not implemented");
                    (*codon_match) = match;
                    break;
                default:
                    break;
                }
            }
        }
    g_ptr_array_free(transition_list, TRUE);
    return;
    }

Analysis *Analysis_create(
              GPtrArray *query_path_list, Alphabet_Type query_type,
              gint query_chunk_id, gint query_chunk_total,
              GPtrArray *target_path_list, Alphabet_Type target_type,
              gint target_chunk_id, gint target_chunk_total,
              gint verbosity){
    register Analysis *analysis = g_new0(Analysis, 1);
    register FastaDB *query_fdb, *target_fdb,
                     *seeder_query_fdb, *seeder_target_fdb;
    register Match *match;
    Match *dna_match = NULL, *protein_match = NULL,
          *codon_match = NULL;
    register HSP_Param *dna_hsp_param, *protein_hsp_param,
                       *codon_hsp_param;
    register Match_ArgumentSet *mas = Match_ArgumentSet_create(NULL);
    register gboolean use_horizon;
    g_assert(query_path_list);
    g_assert(target_path_list);
    g_assert(query_path_list->len);
    g_assert(target_path_list->len);
    analysis->aas = Analysis_ArgumentSet_create(NULL);
    analysis->verbosity = verbosity;
    if(query_type == Alphabet_Type_UNKNOWN){
        query_type = FastaDB_guess_type(
                          (gchar*)query_path_list->pdata[0]);
        if(verbosity > 1)
            g_message("Guessed query type [%s]",
                    Alphabet_Type_get_name(query_type));
        }
    if(target_type == Alphabet_Type_UNKNOWN){
        target_type = FastaDB_guess_type(
                          (gchar*)target_path_list->pdata[0]);
        if(verbosity > 1)
            g_message("Guessed target type [%s]",
                    Alphabet_Type_get_name(target_type));
        }
    g_assert((query_type == Alphabet_Type_DNA)
           ||(query_type == Alphabet_Type_PROTEIN));
    g_assert((target_type == Alphabet_Type_DNA)
           ||(target_type == Alphabet_Type_PROTEIN));
    if(verbosity > 1)
        g_message("Creating analysis with query[%s] target[%s]",
                Alphabet_Type_get_name(query_type),
                Alphabet_Type_get_name(target_type));
    analysis->gam = GAM_create(query_type, target_type,
                               mas->dna_submat,
                               mas->protein_submat,
                               mas->translate,
                               analysis->aas->use_exhaustive,
                               verbosity);
    Analysis_find_matches(analysis, &dna_match, &protein_match,
                                    &codon_match);
    match = dna_match;
    if(!match)
        match = protein_match;
    if(!match)
        match = codon_match;
    g_assert(match);
    query_fdb = FastaDB_open_list_with_limit(query_path_list,
            match->query->alphabet, query_chunk_id, query_chunk_total);
    target_fdb = FastaDB_open_list_with_limit(target_path_list,
            match->target->alphabet, target_chunk_id, target_chunk_total);
    if(analysis->aas->use_exhaustive){
        analysis->fasta_pipe = FastaPipe_create(
                                  query_fdb, target_fdb,
                                  Analysis_FastaPipe_Pair_init_func,
                                  Analysis_FastaPipe_Pair_prep_func,
                                  Analysis_FastaPipe_Pair_term_func,
                                  Analysis_FastaPipe_Pair_query_func,
                                  Analysis_FastaPipe_Pair_target_func,
                                  FastaDB_Mask_ALL,
                                  analysis->gam->translate_both);
        analysis->curr_query = NULL;
    } else { /* Not exhaustive */
        Analysis_find_matches(analysis, &dna_match, &protein_match,
                                        &codon_match);
        /**/
        use_horizon = !analysis->aas->use_bigseq;
        dna_hsp_param = dna_match
                      ? HSP_Param_create(dna_match, use_horizon)
                      : NULL;
        protein_hsp_param = protein_match
                          ? HSP_Param_create(protein_match, use_horizon)
                          : NULL;
        codon_hsp_param = codon_match
                        ? HSP_Param_create(codon_match, use_horizon)
                        : NULL;
        analysis->comparison_param = Comparison_Param_create(
                query_type, target_type,
                dna_hsp_param, protein_hsp_param, codon_hsp_param);
        if(dna_hsp_param)
            HSP_Param_destroy(dna_hsp_param);
        if(protein_hsp_param)
            HSP_Param_destroy(protein_hsp_param);
        if(codon_hsp_param)
            HSP_Param_destroy(codon_hsp_param);
        /* Raise HSP thresholds to score if ungapped */
        if(!Model_Type_is_gapped(analysis->gam->gas->type)){
            if(analysis->comparison_param->dna_hsp_param
            && (analysis->comparison_param->dna_hsp_param->threshold
              < analysis->gam->gas->threshold))
                analysis->comparison_param->dna_hsp_param->threshold
                    = analysis->gam->gas->threshold;
            if(analysis->comparison_param->protein_hsp_param
            && (analysis->comparison_param->protein_hsp_param->threshold
              < analysis->gam->gas->threshold))
                analysis->comparison_param->protein_hsp_param->threshold
                    = analysis->gam->gas->threshold;
            if(analysis->comparison_param->codon_hsp_param
            && (analysis->comparison_param->codon_hsp_param->threshold
              < analysis->gam->gas->threshold))
                analysis->comparison_param->codon_hsp_param->threshold
                    = analysis->gam->gas->threshold;
            }
        /* Don't need HSP horizon for bigseq comparison */
        if(analysis->aas->use_bigseq){
            analysis->bsam = BSAM_create(analysis->comparison_param,
                    analysis->aas->saturate_threshold,
                    verbosity);
            analysis->fasta_pipe = FastaPipe_create(
                                  query_fdb, target_fdb,
                                  Analysis_FastaPipe_Pair_init_func,
                                  Analysis_FastaPipe_Pair_prep_func,
                                  Analysis_FastaPipe_Pair_term_func,
                                  Analysis_FastaPipe_Pair_query_func,
                                  Analysis_FastaPipe_Pair_target_func,
                                  FastaDB_Mask_ALL,
                                  analysis->gam->translate_both);
            analysis->curr_query = NULL;
        } else { /* Use Seeder */
            analysis->scan_query = Analysis_decide_scan_query(query_fdb,
                                                    target_fdb,
                                             analysis->aas->force_scan);
            if(verbosity > 1)
                g_message("Applying FSM scan to [%s]",
                          analysis->scan_query?"query":"target");
            /* Swap paths and types
             * for query and target when scan on query
             */
            if(analysis->scan_query){
                seeder_query_fdb = target_fdb;
                seeder_target_fdb = query_fdb;
            } else {
                seeder_query_fdb = query_fdb;
                seeder_target_fdb = target_fdb;
                }
            analysis->curr_seeder = NULL;
            analysis->fasta_pipe = FastaPipe_create(
                seeder_query_fdb, seeder_target_fdb,
                Analysis_FastaPipe_Seeder_init_func,
                Analysis_FastaPipe_Seeder_prep_func,
                Analysis_FastaPipe_Seeder_term_func,
                Analysis_FastaPipe_Seeder_query_func,
                Analysis_FastaPipe_Seeder_target_func,
                FastaDB_Mask_ALL, analysis->gam->translate_both);
            }
        }
    FastaDB_close(query_fdb);
    FastaDB_close(target_fdb);
    return analysis;
    }

void Analysis_destroy(Analysis *analysis){
    FastaPipe_destroy(analysis->fasta_pipe);
    if(analysis->curr_query)
        FastaDB_Seq_destroy(analysis->curr_query);
    if(analysis->curr_seeder)
        Seeder_destroy(analysis->curr_seeder);
    if(analysis->bsam)
        BSAM_destroy(analysis->bsam);
    if(analysis->comparison_param)
        Comparison_Param_destroy(analysis->comparison_param);
    GAM_destroy(analysis->gam);
    g_free(analysis);
    return;
    }

void Analysis_process(Analysis *analysis){
    while(FastaPipe_process(analysis->fasta_pipe, analysis));
    GAM_report(analysis->gam);
    return;
    }

