
/*******************************************************************************
* Copyright (c) 2016 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
*
* This file is part of peptider.
*
*     peptider is free software: you can redistribute it and/or modify
*     it under the terms of the GNU General Public License as published by
*     the Free Software Foundation, either version 3 of the License, or
*     (at your option) any later version.
*
*     peptider is distributed in the hope that it will be useful,
*     but WITHOUT ANY WARRANTY; without even the implied warranty of
*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*     GNU General Public License for more details.
*
*     You should have received a copy of the GNU General Public License
*     along with peptider.  If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
*     Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
******************************************************************************/
#include "spectrumdatacollector.h"
#include <QDebug>
#include "fastasource.h"
#include <pappsomspp/peptide/peptide.h>
#include <pappsomspp/psm/xtandem/xtandemhyperscorebis.h>
#include "../reporter/basepsmreporter.h"
#include <cmath>
#include <functional>
#include <pappsomspp/exception/exceptionnotfound.h>
#include "../utils/peptiderparams.h"


bool OriginalScan::isPhysikronPair(const SpectrumDataCollector * p_sdatac) const {
    //qDebug() << "OriginalScan::isPhysikronPair begin " << p_sdatac->getQualifiedSpectrum().getOriginalSpectrumSp().get()->size() << " " << _original_spectrum_sp.get()->size();
    if (p_sdatac->getQualifiedSpectrum().getOriginalSpectrumSp().get()->size() < _original_spectrum_sp.get()->size()) {
        return true;
    }
    return false;
}

SpectrumDataCollector::SpectrumDataCollector(const CustomSpectrum & q_spectrum, pappso::PrecisionP precision_lower, pappso::PrecisionP precision_upper)
    :_q_spectrum(q_spectrum), _mass_range(q_spectrum.getPrecursorMz(), precision_lower, precision_upper)
{
    const PeptiderParams & params = PeptiderParams::Instance();
    _msms_precision = params.getMsmsPrecision();
    _maximum_evalue = params.get(PeptiderParamPappsoDouble::MaximumPeptideEvalueThreshold);
    _minimum_ion_match_to_keep_score = params.get(PeptiderParamPappsoDouble::MinimumIonMatch);
    //::Precision::getDaltonInstance(0.02);
    _refine_peptide_model = params.get(PeptiderParamBool::TandemSpectrumModelRefinePeptideModel);
    _ion_list = params.getTandemIonScoreList();
    _maximum_peptides=20;
    _minimum_ratio = 0.08;
}

SpectrumDataCollector::SpectrumDataCollector(const SpectrumDataCollector& other):_q_spectrum(other._q_spectrum), _mass_range(other._mass_range) {
    //_spectrum_process = other._spectrum_process;
    _msms_precision = other._msms_precision;
    _ion_list = other._ion_list;
    _minimum_ion_match_to_keep_score = other._minimum_ion_match_to_keep_score;
    _already_scored.clear();
    _cumul_scores.clear();
    _spectrum_by_charge = other._spectrum_by_charge;
    
    _maximum_peptides=other._maximum_peptides;
    _minimum_ratio = other._minimum_ratio;

}


void SpectrumDataCollector::addSpectrumSpCharge(pappso::SpectrumSp new_sp, unsigned int charge) {
    if (_spectrum_by_charge.size() < charge+1) {
        _spectrum_by_charge.resize(charge+1);
    }
    _spectrum_by_charge[charge] = new_sp;
    qDebug() << "SpectrumDataCollector::addSpectrumSpCharge " << charge << " " << _spectrum_by_charge.size();
}

SpectrumDataCollector::~SpectrumDataCollector()
{
    if (_linear_regression != nullptr) delete _linear_regression;
}


SpectrumDataCollectorSp SpectrumDataCollector::makeSpectrumDataCollectorSp() const {
    return std::make_shared<SpectrumDataCollector>(*this);
}


const pappso::SpectrumSp & SpectrumDataCollector::getProcessedSpectrum() const {
    const pappso::Spectrum * p_spectrum = nullptr;
    for  (size_t i = 0; i < _spectrum_by_charge.size(); i++) {
        p_spectrum = _spectrum_by_charge[i].get();
        if (p_spectrum != nullptr) {
            return _spectrum_by_charge[i];
        }
    }
    throw pappso::ExceptionNotFound(QObject::tr("no processed spectrum found for scan number ").arg(this->getQualifiedSpectrum().getSpectrumId().getScanNum()));
}

void SpectrumDataCollector::scorePeptideSp (const DigestProduct & digest_peptide_product) {
    //if (_mass_range.contains(digest_peptide_product.mz)) {
    //qDebug() << " SpectrumDataCollector::scorePeptideSp _p_mass_range " << _spectrum_by_charge.size() << " " << digest_peptide_product.charge+1;
    //}
    if (_spectrum_by_charge.size() < digest_peptide_product.charge+1) {
        return;
    }
    const pappso::Spectrum * p_spectrum = _spectrum_by_charge[digest_peptide_product.charge].get();
    if (p_spectrum == nullptr) {
        return;
    }
//qDebug() << " SpectrumDataCollector::scorePeptideSp 2";
    //QMutexLocker lock(&_mutex);

    //QString li_sequence = digest_peptide_product.peptide_sp.get()->getLiAbsoluteString();
    size_t sequence_li_crc = _hash_fn ( digest_peptide_product.peptide_sp.get()->getLiAbsoluteString().toStdString());

    _mutex.lock();
    std::unordered_set< size_t >::iterator it_scend = _already_scored.end();
    bool already_scored = false;
    if (_already_scored.find(sequence_li_crc) != it_scend) {
        //this peptide was already scored
        //perhaps there is a good enough score ?
        already_scored = true;
    }
    if (already_scored) {
        _mutex.unlock();
        return;
    }
    else {
        _already_scored.insert(sequence_li_crc);
    }
    _mutex.unlock();

    //qDebug() << "SpectrumDataCollector::scorePeptideSp _msms_precision="<<_msms_precision->toString() << " " <<  _ion_list.size();
    pappso::XtandemHyperscoreBis hyperscore_withxtspectrum (_refine_peptide_model, _msms_precision, _ion_list);

    //qDebug() << "SpectrumDataCollector::scorePeptideSp digest_peptide_product.charge="<<digest_peptide_product.charge << " " <<  spectrum_simple.size() << " "<< digest_peptide_product.peptide_sp.get()->toString();

    if (hyperscore_withxtspectrum.computeXtandemHyperscore(*p_spectrum, *(digest_peptide_product.peptide_sp.get()),digest_peptide_product.charge)) {
        //qDebug() << "SpectrumDataCollector::scorePeptideSp ok "<<hyperscore_withxtspectrum.getHyperscore();
        /// if (test_tandem > 40) {
        //  qDebug() << " peptide match " <<  digest_peptide_product.peptide_sp.get()->toString() << " scan="  << _q_spectrum.getSpectrumId().getScanNum() << " score=" << test_tandem ;
        //}
        unsigned int total_ions = hyperscore_withxtspectrum.getTotalMatchedIons();


        _mutex.lock();
        //if ((!already_scored) && (total_ions > 0)) {
        //_cumul_scores.push_back(hyperscore_withxtspectrum.getHyperscore());
        //}
        if (total_ions >= _minimum_ion_match_to_keep_score) {

            unsigned int spectrum_size = p_spectrum->size();
            if (((double)total_ions/(double)spectrum_size)> _minimum_ratio) {

                //qDebug() << "SpectrumDataCollector::getProcessedSpectrum push_back "<<  total_ions << " " << spectrum_size;
                _result_list.push_back( {digest_peptide_product,hyperscore_withxtspectrum.getHyperscore(), total_ions
                                        });
                //}
                if (_result_list.size() > _maximum_peptides) {
                    std::sort(_result_list.begin(), _result_list.end(),[](const PsmScore & a, const PsmScore & b)
                    {
                        return a.hyperscore > b.hyperscore;
                    });
                    _result_list.pop_back();

                }
            }
        }
        _mutex.unlock();
    }
    else {
        //qDebug() << "SpectrumDataCollector::scorePeptideSp NOT ok "<<hyperscore_withxtspectrum.getHyperscore();
        /// if (test_tandem > 40) {

    }
}



const CustomSpectrum & SpectrumDataCollector::getQualifiedSpectrum() const {
    return _q_spectrum;
}


pappso::pappso_double SpectrumDataCollector::getEvalue (pappso::pappso_double score) const {

    return (std::pow(10,_linear_regression->getYfromX(score)));
}

std::vector<PsmScore> SpectrumDataCollector::getPsmScoreListBeneathEvalue(pappso::pappso_double evalue_threshold) const {
    std::vector<PsmScore> psm_score_list;
    for (const PsmScore & score: _result_list) {
        if ((getEvalue( score.hyperscore) < evalue_threshold)) {
            psm_score_list.push_back(score);
        }
    }
// try to keep only best prediction
    /*
        if (psm_score_list.size() > 1) {
            std::sort(psm_score_list.begin(), psm_score_list.end(),
            [](const PsmScore & a, const PsmScore & b) {
                return (a.hyperscore > b.hyperscore);
            });

            pappso::pappso_double best_hyperscore = psm_score_list[0].hyperscore;

            std::vector<PsmScore> psm_score_list_b;
            for (const PsmScore & score: psm_score_list) {
                if ((best_hyperscore - score.hyperscore) < 1) {
                    psm_score_list_b.push_back(score);
                }
                else {
                    if ((score.hyperscore - _last_ccdr_score) > 7) {
                        psm_score_list_b.push_back(score);
                    }
                }
            }

            return psm_score_list_b;
        }*/

    return psm_score_list;
}

std::vector<PsmScore> SpectrumDataCollector::getPsmScoreList() const {
    std::vector<PsmScore> psm_score_list;
    for (const PsmScore & score: _result_list) {
        psm_score_list.push_back(score);
    }
    return psm_score_list;
}

void SpectrumDataCollector::removeDecoyWithSameSequenceLi() {
    QStringList sequence_li_collection;
    for (const PsmScore & result : _result_list) {
        if (!result.digest_product.reverse) {
            sequence_li_collection << result.digest_product.peptide_sp.get()->getLiAbsoluteString();
        }
    }


    _result_list.erase(std::remove_if(_result_list.begin(),
                                      _result_list.end(),
    [this,sequence_li_collection](const PsmScore & a) {
        if (!a.digest_product.reverse) return false;
        return (sequence_li_collection.contains(a.digest_product.peptide_sp.get()->getLiAbsoluteString()));
    }),
    _result_list.end());
}

void SpectrumDataCollector::peptideDeduplication(PeptideStore & peptide_store) {

    //peptide deduplication
    vector< PsmScore >::iterator it = _result_list.begin();
    vector< PsmScore >::iterator itend = _result_list.end();

    while (it != itend) {
        it->digest_product.peptide_sp = peptide_store.getInstance(it->digest_product.peptide_sp);
        it++;
    }




    qDebug() << "SpectrumDataCollector::peptideDeduplication 2 " << _result_list.size();


    // reporter.write(*this, **it_peptides);
}


std::vector<pappso::pappso_double> SpectrumDataCollector::getBestScoreList() const {
    return _best_scores;
}

std::vector<pappso::pappso_double> SpectrumDataCollector::getFakeScoreList() const {
    return _cumul_scores;
}


const LinearRegression & SpectrumDataCollector::getLinearRegression() const {
    if (_linear_regression == nullptr) {
        throw pappso::PappsoException(QObject::tr("_linear_regression == nullptr"));
    }
    return *_linear_regression;
}

std::vector<size_t> SpectrumDataCollector::getFakeScoreHistogram() const {


    std::vector<size_t> histogram;
    std::size_t total_values = _cumul_scores.size();

    size_t count_values=0;
    pappso::pappso_double range_i=0;
    while ((count_values < total_values) && (range_i < 150)) {
        size_t count_range = count_if(_cumul_scores.begin(), _cumul_scores.end(), [range_i](pappso::pappso_double score) {
            if ((score >= range_i) && (score < range_i+1)) {
                return true;
            }
            return false;
        });
        histogram.push_back(count_range);
        count_values += count_range;
        range_i++;
    }

    return histogram;
}

std::vector<size_t> SpectrumDataCollector::getFakeCcdr() const {

    std::vector<size_t> histogram = getFakeScoreHistogram();
    // compute  Complementary cumulative distribution function (tail distribution)
    std::vector<size_t>::reverse_iterator itr = histogram.rbegin();
    std::vector<size_t>::reverse_iterator itrend = histogram.rend();

    std::size_t total_values = 0;
    unsigned int position = histogram.size();
    while (itr != itrend) {
        total_values = *itr + total_values;

        *itr = total_values;
        itr++;
        position--;
        if ((*itr == 0) && (position > 10)) {
            total_values = 0;
            itr--;
            *itr = 0;
            itr++;
        }
    }


    return histogram;
}

void cutCcdr(std::vector< pappso::pappso_double > & x_vec, std::vector< pappso::pappso_double > & y_vec) {
    std::size_t max_nb = y_vec[0];
    std::size_t middle_nb = max_nb /2;
    std::size_t min_nb = max_nb* 0.05;
    max_nb = max_nb* 0.9;
    /*
        while ((x_vec.size() > 4) && (y_vec[0] > max_nb) ) {
            x_vec.erase(x_vec.begin());
            y_vec.erase(y_vec.begin());
        }
    */
    while ((x_vec.size() > 4) && (y_vec.back() < min_nb) ) {
        x_vec.pop_back();
        y_vec.pop_back();
    }

}

void SpectrumDataCollector::computeSlope() {


    _best_scores.resize(0);

    std::vector<size_t>  histogram  = getFakeCcdr();
    //std::vector<size_t>  histogram  = getFakeScoreHistogram();

    std::vector<size_t>::iterator it = histogram.begin();
    std::vector<size_t>::iterator itend = histogram.end();
    std::vector<size_t>::iterator itmax = std::max_element(histogram.begin(),histogram.end());
    //itmax +=2;

    std::vector<pappso::pappso_double> x_vec;
    std::vector<pappso::pappso_double> y_vec;

    //cerr <<" histogram.size() > 2" << endl;

    _last_ccdr_score = 0;

    if (histogram.size() > 2) {
        bool start = false;
        pappso::pappso_double indice_x=0.5;
        while (it != itend) {
            if (it == itmax) {
                start = true;
            }
            pappso::pappso_double nbscore = (pappso::pappso_double)*it;
            if ((nbscore > 0) && (start)) {
                y_vec.push_back(std::log10(nbscore));
                x_vec.push_back(indice_x);

                //cerr << indice_x << " " << std::log(nbscore) << endl;
            }
            it++;
            indice_x+=1;
        }
        _last_ccdr_score = x_vec.back();
    }

    _linear_regression = new LinearRegression(x_vec, y_vec);

    if  (x_vec.size() > 5) {

        cutCcdr(x_vec, y_vec);
        delete _linear_regression;
        _linear_regression = new LinearRegression(x_vec, y_vec);

        x_vec.erase(x_vec.begin());
        y_vec.erase(y_vec.begin());
        LinearRegression * p_linear_regression_better_mega_gruge =  new LinearRegression(x_vec, y_vec);

        while ((x_vec.size() > 4) && (p_linear_regression_better_mega_gruge->getSlope() < _linear_regression->getSlope())) {
            delete _linear_regression;
            _linear_regression = p_linear_regression_better_mega_gruge;

            x_vec.erase(x_vec.begin());
            y_vec.erase(y_vec.begin());
            //delete p_linear_regression_better_mega_gruge;
            p_linear_regression_better_mega_gruge =  new LinearRegression(x_vec, y_vec);
        }
        delete p_linear_regression_better_mega_gruge;


        /*
                x_vec.pop_back();
                y_vec.pop_back();

                p_linear_regression_better_mega_gruge =  new LinearRegression(x_vec, y_vec);
                while ((x_vec.size() > 5) && (p_linear_regression_better_mega_gruge->getIntercept() > _linear_regression->getIntercept())) {
                    delete _linear_regression;
                    _linear_regression = p_linear_regression_better_mega_gruge;


                    x_vec.pop_back();
                    y_vec.pop_back();
                    //delete p_linear_regression_better_mega_gruge;
                    p_linear_regression_better_mega_gruge =  new LinearRegression(x_vec, y_vec);
                }
                delete p_linear_regression_better_mega_gruge;
        */

    }

}


void SpectrumDataCollector::setOriginalScan(const OriginalScan * original_scan) {
    _p_original_scan = original_scan;
}


const OriginalScan * SpectrumDataCollector::getOriginalScan() const {
    return _p_original_scan;
}

void SpectrumDataCollector::completeSpectrum(const SpectrumCompletor * p_completor) {
    //qDebug() << "SpectrumDataCollector::completeSpectrum begin " << _result_list.size();
    _best_complete_spectrum_sp = nullptr;
    if (!_p_original_scan->isPhysikronPair(this)) return;
    //qDebug() << "SpectrumDataCollector::completeSpectrum is a physikron pair " << _result_list.size();
    for (PsmScore & score :_result_list) {
        pappso::SpectrumSp spectrum_sp = p_completor->completeSpectrum(score.digest_product.peptide_sp, _p_original_scan->_original_spectrum_sp.get(), this->_q_spectrum.getOriginalSpectrumSp().get(), score.digest_product.charge);

        if (_best_complete_spectrum_sp.get() == nullptr) {
            _best_complete_spectrum_sp = spectrum_sp;
            _best_psm_score = score;
        }
        else {
            if (_best_complete_spectrum_sp.get()->size() < spectrum_sp.get()->size()) {
                _best_complete_spectrum_sp = spectrum_sp;
                _best_psm_score = score;
            }
        }
    }

    if (_best_complete_spectrum_sp.get() != nullptr) {
        pappso::XtandemHyperscoreBis hyperscore_withxtspectrum (_refine_peptide_model, _msms_precision, _ion_list);

        if (hyperscore_withxtspectrum.computeXtandemHyperscore(*_best_complete_spectrum_sp.get(), *(_best_psm_score.digest_product.peptide_sp.get()),_best_psm_score.digest_product.charge)) {
            _best_psm_score.hyperscore = hyperscore_withxtspectrum.getHyperscore();
            _best_psm_score.total_ion_match = hyperscore_withxtspectrum.getTotalMatchedIons();
        }
    }
    //qDebug() << "SpectrumDataCollector::completeSpectrum end";

}

const pappso::SpectrumSp & SpectrumDataCollector::getBestCompletedSpectrum() const {
    return _best_complete_spectrum_sp;
}
PsmScore SpectrumDataCollector::getBestCompletedSpectrumScore() const {
    return _best_psm_score;
}

