
/*******************************************************************************
* Copyright (c) 2017 Olivier Langella <olivier.langella@u-psud.fr>.
*
* This file is part of XTPcpp.
*
*     XTPcpp is free software: you can redistribute it and/or modify
*     it under the terms of the GNU General Public License as published by
*     the Free Software Foundation, either version 3 of the License, or
*     (at your option) any later version.
*
*     XTPcpp is distributed in the hope that it will be useful,
*     but WITHOUT ANY WARRANTY; without even the implied warranty of
*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*     GNU General Public License for more details.
*
*     You should have received a copy of the GNU General Public License
*     along with XTPcpp.  If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
*     Olivier Langella <olivier.langella@u-psud.fr> - initial API and implementation
******************************************************************************/

#include "proteinmatch.h"
#include <pappsomspp/grouping/grpprotein.h>
#include <pappsomspp/pappsoexception.h>
#include <set>
#include <cmath>


QColor ProteinMatch::_color_peptide_background = QColor("yellow");
QColor ProteinMatch::_color_highlighted_peptide_background = QColor("#ff7878");


ProteinMatch::ProteinMatch()
{

}

ProteinMatch::~ProteinMatch()
{
    auto it = _peptide_match_list.begin();
    while (it != _peptide_match_list.end()) {
        delete (*it);
        it++;
    }
}
ValidationState ProteinMatch::getValidationState() const {
    if (isGrouped()) {
        return ValidationState::grouped;
    } else if (isValidAndChecked()) {
        return ValidationState::validAndChecked;
    } else if (isValid()) {
        return ValidationState::valid;
    }
    return ValidationState::notValid;
}
bool ProteinMatch::contains(PeptideMatch * peptide_match) const {
    if (peptide_match == nullptr) return false;
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match == peptide_match) return true;
    }
    return false;
}

void ProteinMatch::updateAutomaticFilters(const AutomaticFilterParameters & automatic_filter_parameters) {
    //qDebug() <<"ProteinMatch::updateAutomaticFilters begin " ;
    _proxy_valid = false;
    unsigned int number_of_valid_peptides =0;
    bool cross_sample = automatic_filter_parameters.getFilterCrossSamplePeptideNumber();

    if (cross_sample) {
        for (auto & p_peptide_match : _peptide_match_list) {
            p_peptide_match->updateAutomaticFilters(automatic_filter_parameters);
        }
        number_of_valid_peptides= countSequenceLi(ValidationState::validAndChecked,nullptr);
    }
    else {
        std::set<const MsRun *> msrun_set;
        for (auto & p_peptide_match : _peptide_match_list) {
            p_peptide_match->updateAutomaticFilters(automatic_filter_parameters);
            if (p_peptide_match->isValidAndChecked()) {
                msrun_set.insert(p_peptide_match->getMsRunP());
            }
        }
        for (const MsRun * p_msrun : msrun_set) {
            unsigned int count = countSequenceLi(ValidationState::validAndChecked,p_msrun);
            if (count > number_of_valid_peptides) {
                number_of_valid_peptides = count;
            }
        }
    }

    if (number_of_valid_peptides == 0) {

        _proxy_valid = false;
    } else {

        if (number_of_valid_peptides < automatic_filter_parameters.getFilterMinimumPeptidePerMatch()) {
        }
        else {
            if (getEvalue() <= automatic_filter_parameters.getFilterProteinEvalue()) {
                _proxy_valid = true;
            }
        }
    }
    //qDebug() <<"ProteinMatch::updateAutomaticFilters end " << number_of_valid_peptides ;
}

const ProteinXtpSp & ProteinMatch::getProteinXtpSp() const {
    return _protein_sp;
}

void ProteinMatch::setProteinXtpSp(ProteinXtpSp protein_sp) {
    _protein_sp = protein_sp;
}


bool ProteinMatch::isValid() const {
    return _proxy_valid;
}
bool ProteinMatch::isChecked() const {
    return _checked;
}

bool ProteinMatch::isValidAndChecked() const {
    return _proxy_valid && _checked;
}

bool ProteinMatch::isGrouped() const {
    if (_sp_grp_protein.get() == nullptr) {
        return false;
    }
    if (_sp_grp_protein.get()->getGroupNumber() == 0) {
        return false;
    }
    return true;
}

void ProteinMatch::setChecked(bool arg1) {
    _checked = arg1;
}
void ProteinMatch::addPeptideMatch(PeptideMatch * peptide_match) {
    _peptide_match_list.push_back(peptide_match);
}


std::vector<PeptideMatch *> & ProteinMatch::getPeptideMatchList() {
    return _peptide_match_list;
}

const std::vector<PeptideMatch *> & ProteinMatch::getPeptideMatchList() const {
    return _peptide_match_list;
}

void ProteinMatch::setGroupingExperiment(GroupingExperiment * p_grp_experiment) {
    _sp_grp_protein = nullptr;
    if ((isValidAndChecked() && (!_protein_sp.get()->isDecoy()))) {
        _sp_grp_protein = p_grp_experiment->getGrpProteinSp(this);

        for (auto & p_peptide_match : _peptide_match_list) {
            p_peptide_match->setGrpPeptideSp(nullptr);
            if (p_peptide_match->isValidAndChecked()) {
                p_peptide_match->setGrpPeptideSp(p_grp_experiment->setGrpPeptide(_sp_grp_protein, p_peptide_match));
            }
        }


        if (_protein_sp.get()->isContaminant()) {
            p_grp_experiment->addPostGroupingGrpProteinSpRemoval(_sp_grp_protein);
        }

    }
}

const pappso::GrpProteinSp & ProteinMatch::getGrpProteinSp() const {
    return _sp_grp_protein;
}


void ProteinMatch::countPeptideMassSample(std::vector<size_t> & count_peptide_mass_sample, ValidationState state) const {
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->getValidationState() >= state) {
            count_peptide_mass_sample.push_back(p_peptide_match->getHashPeptideMassSample());
        }
    }
}
std::vector<PeptideMatch *> ProteinMatch::getPeptideMatchList(ValidationState state) const  {
    std::vector<PeptideMatch *> peptide_match_list;
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->getValidationState() >= state) {
            peptide_match_list.push_back(p_peptide_match);
        }
    }
    return peptide_match_list;
}

unsigned int ProteinMatch::countSampleScan(ValidationState state, const MsRun * p_msrun_id) const {
    std::vector<std::size_t> count_sample_scan;
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->getValidationState() >= state) {
            if(p_msrun_id == nullptr) {
                count_sample_scan.push_back(p_peptide_match->getHashSampleScan());
            }
            else {
                if (p_peptide_match->getMsRunP() == p_msrun_id) {
                    count_sample_scan.push_back(p_peptide_match->getHashSampleScan());
                }
            }
        }
    }
    std::sort(count_sample_scan.begin(), count_sample_scan.end());
    auto last = std::unique(count_sample_scan.begin(),count_sample_scan.end());
    return std::distance(count_sample_scan.begin(),last);
}

unsigned int ProteinMatch::countPeptideMatch(ValidationState state) const {
    return std::count_if (_peptide_match_list.begin(), _peptide_match_list.end(), [state](const PeptideMatch * p_peptide_match) {
        if (p_peptide_match->getValidationState() >= state) {
            return true;
        }
        else              {
            return false;
        }
    });
}

size_t ProteinMatch::countSequenceLi(ValidationState state, const MsRun * p_msrun_id) const {
    std::set<QString> sequence_list;
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->getValidationState() >= state) {
            if(p_msrun_id != nullptr) {
                //within sample
                if (p_peptide_match->getMsRunP() == p_msrun_id) {
                    sequence_list.insert(p_peptide_match->getPeptideXtpSp().get()->getSequenceLi());
                }
            }
            else {
                //overall samples
                sequence_list.insert(p_peptide_match->getPeptideXtpSp().get()->getSequenceLi());
            }
        }
    }
    //qDebug() <<"ProteinMatch::countValidAndCheckedPeptide end " << sequence_list.size();
    return sequence_list.size();
}

unsigned int ProteinMatch::countDistinctMsSamples(ValidationState state) const {
    std::set<QString> sequence_list;
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->getValidationState() >= state) {
            sequence_list.insert(QString("%1").arg(p_peptide_match->getMsRunP()->getXmlId()));
        }
    }
    return sequence_list.size();
}

unsigned int ProteinMatch::countPeptideMassCharge(ValidationState state, const MsRun * sp_msrun_id) const {
    std::set<QString> sequence_list;
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->getValidationState() >= state) {
            if(sp_msrun_id != nullptr) {
                //within sample
                if (p_peptide_match->getMsRunP() == sp_msrun_id) {
                    sequence_list.insert(QString("%1-%2-%3").arg(p_peptide_match->getPeptideXtpSp().get()->getNativePeptideP()->getSequenceLi()).arg(p_peptide_match->getPeptideXtpSp().get()->getNativePeptideP()->getMass()).arg(p_peptide_match->getCharge()));
                }
            }
            else {
                //overall samples
                sequence_list.insert(QString("%1-%2-%3").arg(p_peptide_match->getPeptideXtpSp().get()->getNativePeptideP()->getSequenceLi()).arg(p_peptide_match->getPeptideXtpSp().get()->getNativePeptideP()->getMass()).arg(p_peptide_match->getCharge()));
            }
        }
    }
    return sequence_list.size();
}

pappso::pappso_double ProteinMatch::getEvalue(const MsRun * sp_msrun_id) const {
    return (std::pow ((double) 10.0,getLogEvalue(sp_msrun_id)));
}

pappso::pappso_double ProteinMatch::getLogEvalue(const MsRun * sp_msrun_id) const {
    std::map<QString, pappso::pappso_double> map_sequence_evalue;
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->isValidAndChecked()) {
            QString sequence(p_peptide_match->getPeptideXtpSp().get()->getSequence());
            pappso::pappso_double evalue = p_peptide_match->getEvalue();
            if(sp_msrun_id != nullptr) {
                //within sample
                if (p_peptide_match->getMsRunP() == sp_msrun_id) {
                    auto ret = map_sequence_evalue.insert(std::pair<QString, pappso::pappso_double>(sequence, evalue));
                    if (ret.second == false) {
                        if (ret.first->second > evalue) {//get best evalue for sequence
                            ret.first->second = evalue;
                        }
                    }
                }
            }
            else {
                //overall samples
                auto ret = map_sequence_evalue.insert(std::pair<QString, pappso::pappso_double>(sequence, evalue));
                if (ret.second == false) {
                    if (ret.first->second > evalue) {//get best evalue for sequence
                        ret.first->second = evalue;
                    }
                }
            }
        }
    }

    pappso::pappso_double evalue_prot = 1;
    for (auto && peptide_pair: map_sequence_evalue) {
        evalue_prot += std::log10(peptide_pair.second);
        //evalue_prot *= peptide_pair.second;
    }

    //return (std::pow ((double) 10.0,evalue_prot));
    return (evalue_prot);

}

pappso::pappso_double ProteinMatch::getPAI(const MsRun * sp_msrun_id) const {
    try {
        pappso::pappso_double PAI =  (pappso::pappso_double) countPeptideMassCharge(ValidationState::validAndChecked ,sp_msrun_id) / (pappso::pappso_double) _protein_sp.get()->countTrypticPeptidesForPAI();
        return PAI;

    }
    catch (pappso::PappsoException error) {
        throw pappso::PappsoException(QObject::tr("Error computing PAI for protein %1 :\n%2").arg(this->getProteinXtpSp().get()->getAccession()).arg(error.qwhat()));
    }
}

pappso::pappso_double ProteinMatch::getEmPAI(const MsRun * sp_msrun_id) const {
    // compute emPAI, Ishihama 2005
    pappso::pappso_double value = std::pow(10.0, getPAI(sp_msrun_id)) - (pappso::pappso_double) 1.0;

    return value;
}

const QString ProteinMatch::getHtmlSequence(PeptideMatch * peptide_match_to_locate) const
{
    size_t prot_size = _protein_sp.get()->size();
    //qDebug() << "ProteinMatch::getCoverage begin prot_size=" << prot_size << " " << _protein_sp.get()-//>getSequence();
    if (prot_size == 0) return 0;
    bool cover_bool[prot_size] = {false};
    bool highlight_bool[prot_size] = {false};
    if (this->contains(peptide_match_to_locate)) {
        size_t size = peptide_match_to_locate->getPeptideXtpSp().get()->size();
        size_t offset = peptide_match_to_locate->getStart();
        if (offset >= 0) {
            for (size_t i=0; (i < size) && (offset < prot_size) ; i++,offset++) {
                highlight_bool[offset] = true;
            }
        }
    }

    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->isValidAndChecked()) {
            size_t size = p_peptide_match->getPeptideXtpSp().get()->size();
            size_t offset = p_peptide_match->getStart();
            if (offset >= 0) {
                for (size_t i=0; (i < size) && (offset < prot_size) ; i++,offset++) {
                    cover_bool[offset] = true;
                }
            }
        }
    }
    QString sequence = getProteinXtpSp().get()->getSequence();
    QString sequence_html;
    for (unsigned int i=0; i < prot_size; i++) {
        if(highlight_bool[i]) {
            sequence_html.append(QString("<span style=\"background-color:%2;\">%1").arg(sequence[i]).arg(_color_highlighted_peptide_background.name()));
            i++;
            for (; i < prot_size; i++) {
                if(highlight_bool[i]) {
                    sequence_html.append(sequence[i]);
                }
                else {
                    sequence_html.append(QString("</span>"));
                    i--;
                    break;
                }
            }
            sequence_html.append(QString("</span>"));
        }
        else if(cover_bool[i]) {
            sequence_html.append(QString("<span style=\"background-color:%2;\">%1").arg(sequence[i]).arg(_color_peptide_background.name()));
            i++;
            for (; i < prot_size; i++) {
                if (highlight_bool[i]) {
                    i--;
                    break;
                }
                if(cover_bool[i]) {
                    sequence_html.append(sequence[i]);
                }
                else {
                    sequence_html.append(QString("</span>%1").arg(sequence[i]));
                    break;
                }
            }
            sequence_html.append(QString("</span>"));
        }
        else {
            sequence_html.append(sequence[i]);
        }
    }
    return sequence_html;
}

pappso::pappso_double ProteinMatch::getCoverage() const {
    size_t prot_size = _protein_sp.get()->size();
    //qDebug() << "ProteinMatch::getCoverage begin prot_size=" << prot_size << " " << _protein_sp.get()-//>getSequence();
    if (prot_size == 0) return 0;
    bool cover_bool[prot_size] = {false};
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->isValidAndChecked()) {
            size_t size = p_peptide_match->getPeptideXtpSp().get()->size();
            size_t offset = p_peptide_match->getStart();
            if (offset >= 0) {
                for (size_t i=0; (i < size) && (offset < prot_size) ; i++,offset++) {
                    cover_bool[offset] = true;
                }
            }
        }
    }
    size_t count = 0;
    for (size_t i=0; i < prot_size; i++) {
        if(cover_bool[i]) count++;
    }
    //qDebug() << "ProteinMatch::getCoverage count=" << count << " prot_size=" << prot_size;
    return (((pappso::pappso_double)count)/ ((pappso::pappso_double)prot_size));
}
const GroupingGroupSp & ProteinMatch::getGroupingGroupSp() const {
    return _sp_group;
}

void ProteinMatch::setGroupInstance(GroupStore & group_store) {
    _sp_group = nullptr;
    if (_sp_grp_protein != nullptr) {
        unsigned int group_number = _sp_grp_protein.get()->getGroupNumber();
        if (group_number > 0) {
            _sp_group = group_store.getInstance(group_number);
            _sp_group.get()->add(this);
        }
    }
}


void ProteinMatch::collectMhDelta(std::vector< pappso::pappso_double> & delta_list, pappso::PrecisionUnit unit, ValidationState state) const {
    for (auto & p_peptide_match : _peptide_match_list) {
        if (p_peptide_match->getValidationState() >= state) {
            pappso::pappso_double diff = p_peptide_match->getDeltaMass();
            if (unit == pappso::PrecisionUnit::ppm) {
                while (diff < -0.5) {
                    diff = diff + pappso::DIFFC12C13;
                }
                diff = (diff / p_peptide_match->getPeptideXtpSp().get()->getMz(1)) * pappso::ONEMILLION;
            }
            delta_list.push_back(diff);
        }
    }
}
