
/*******************************************************************************
 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contributors:
 *     Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
 ******************************************************************************/

#include "grpexperiment.h"
#include "grpprotein.h"
#include "grppeptide.h"

#include "grpgroup.h"
#include "grpsubgroup.h"
#include "../pappsoexception.h"

using namespace pappso;

GrpExperiment::GrpExperiment(GrpGroupingMonitorInterface * p_monitor)
{
    _p_monitor = p_monitor;
}

GrpExperiment::~GrpExperiment()
{

}
void GrpExperiment::setRemoveNonInformativeSubgroups(bool ok) {
    _remove_non_informative_subgroups=ok;
}

void GrpExperiment::addPostGroupingGrpProteinSpRemoval(GrpProteinSp sp_protein) {
    GrpPeptideSet peptide_set(sp_protein.get());
    _grp_post_grouping_protein_list_removal.addAll(peptide_set);
}


std::vector <GrpGroupSpConst> GrpExperiment::getGrpGroupSpList() const {
    std::vector <GrpGroupSpConst> grp_list;
    for (GrpGroupSp group: _grpGroupSpList) {
        grp_list.push_back(group);
    }
    return grp_list;
}

GrpProteinSp & GrpExperiment::getGrpProteinSp(const QString & accession, const QString & description) {
    GrpProtein grpProtein(accession, description);
    auto insertedPair = _map_proteins.insert(
                            std::pair<QString, GrpProteinSp>(accession, std::make_shared<GrpProtein>(grpProtein)));
    if (insertedPair.second) {
        _grpProteinList.push_back(insertedPair.first->second);
        _remainingGrpProteinList.push_back(insertedPair.first->second.get());
    }
    return (insertedPair.first->second);
}

GrpPeptideSp & GrpExperiment::setGrpPeptide(const GrpProteinSp & proteinSp, const QString & sequence, pappso_double mass) {
    proteinSp.get()->countPlus();
    GrpPeptideSp sp_grppeptide = std::make_shared<GrpPeptide>(GrpPeptide(sequence, mass));

    auto insertedPair = _map_peptides.insert(
                            std::pair<QString, std::map<unsigned long, GrpPeptideSp> >(sp_grppeptide.get()->_sequence, std::map<unsigned long, GrpPeptideSp>()));
    auto secondInsertedPair = insertedPair.first->second.insert(
                                  std::pair<unsigned long, GrpPeptideSp>((unsigned long) (mass * 100), sp_grppeptide));
    if (secondInsertedPair.second) {
        _grpPeptideList.push_back(secondInsertedPair.first->second);
    }
    proteinSp.get()->push_back(secondInsertedPair.first->second.get());
    return (secondInsertedPair.first->second);
}

void GrpExperiment::startGrouping() {
    qDebug() << "GrpExperiment::startGrouping begin";
    if (_p_monitor != nullptr) _p_monitor->startGrouping(_remainingGrpProteinList.size(), _grpPeptideList.size());
    _groupingStarted = true;
    _map_peptides.clear();
    _map_proteins.clear();
    qDebug() << "GrpExperiment::startGrouping sort protein list _remainingGrpProteinList.size() " << _remainingGrpProteinList.size();
    //_remainingGrpProteinList.sort();
    //_remainingGrpProteinList.unique();


    GrpMapPeptideToGroup grp_map_peptide_to_group;
    qDebug() << "GrpExperiment::startGrouping grouping begin";
    for (auto  p_grpProtein : _remainingGrpProteinList) {
        p_grpProtein->strip();
        if (p_grpProtein->_count == 0) {
            //no peptides : do not group this protein
        }
        else {
            GrpSubGroupSp grpSubGroupSp = GrpSubGroup(p_grpProtein).makeGrpSubGroupSp();

            if (_p_monitor != nullptr) _p_monitor->groupingProtein();
            this->addSubGroupSp(grp_map_peptide_to_group, grpSubGroupSp);
        }
    }
    grp_map_peptide_to_group.clear(_grpGroupSpList);
    qDebug() << "GrpExperiment::startGrouping grouping end";

    qDebug() << "GrpExperiment::startGrouping grouping  _grpGroupSpList.size() " << _grpGroupSpList.size();

    if (_remove_non_informative_subgroups) {
        this->removeNonInformativeSubGroups();
    }

    //post grouping protein group removal
    // remove any group containing contaminants
    _grpGroupSpList.remove_if([this](GrpGroupSp & groupSp) {
        return (groupSp.get()->containsAny(this->_grp_post_grouping_protein_list_removal)) ;
    });


    numbering();
    if (_p_monitor != nullptr) _p_monitor->stopGrouping();
    //GrpGroup(this, *_remainingGrpProteinList.begin());
    qDebug() << "GrpExperiment::startGrouping end";
}

struct ContainsAny
{
    ContainsAny(const GrpPeptideSet& peptide_set)
        : _peptide_set(peptide_set) { }

    typedef bool result_type;

    bool operator()(const GrpGroupSp &testGroupSp)
    {
        return testGroupSp.get()->containsAny(_peptide_set);
    }

    GrpPeptideSet _peptide_set;
};

void GrpExperiment::addSubGroupSp(GrpMapPeptideToGroup & grp_map_peptide_to_group, GrpSubGroupSp & grpSubGroupSp) const {
    qDebug() << "GrpExperiment::addSubGroupSp begin " << grpSubGroupSp.get()->getFirstAccession();

    std::list<GrpGroupSp> new_group_list;
    grp_map_peptide_to_group.getGroupList(grpSubGroupSp.get()->getPeptideSet(), new_group_list);

    if (new_group_list.size() == 0) {
        qDebug() << "GrpExperiment::addSubGroupSp create a new group";
        //create a new group
        GrpGroupSp sp_group = GrpGroup(grpSubGroupSp).makeGrpGroupSp();
        //_grpGroupSpList.push_back(sp_group);

        grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(), sp_group);
    }
    else {
        qDebug() << "GrpExperiment::addSubGroupSp fusion groupList.size() " << new_group_list.size();
        //fusion group and add the subgroup
        auto itGroup = new_group_list.begin();
        GrpGroupSp p_keepGroup = *itGroup;
        qDebug() << "GrpExperiment::addSubGroupSp p_keepGroup->addSubGroupSp(grpSubGroupSp) " << p_keepGroup.get();
        p_keepGroup->addSubGroupSp(grpSubGroupSp);
        grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(), p_keepGroup);

        itGroup++;
        while (itGroup != new_group_list.end()) {
            qDebug() << "GrpExperiment::addSubGroupSp p_keepGroup->addGroup(*itGroup) " << itGroup->get() ;
            p_keepGroup->addGroup(itGroup->get());
            grp_map_peptide_to_group.set((*itGroup)->getGrpPeptideSet(), p_keepGroup);

            //_grpGroupSpList.remove_if([itGroup](GrpGroupSp & groupSp) {
            //    return (itGroup->get() == groupSp.get()) ;
            //});
            itGroup++;
        }

    }

    qDebug() << "GrpExperiment::addSubGroupSp end";
}

void GrpExperiment::numbering() {
    qDebug() << "GrpExperiment::numbering begin";
    if (_p_monitor != nullptr) _p_monitor->startNumberingAllGroups(_grpGroupSpList.size());
    for (auto && group_sp : _grpGroupSpList) {
        group_sp.get()->numbering();
    }
    _grpGroupSpList.sort([](GrpGroupSp & first, GrpGroupSp & second) {
        return ((* first.get()) < (* second.get())) ;
    });
    unsigned int i = 1;
    for (auto && group_sp : _grpGroupSpList) {
        group_sp.get()->setGroupNumber(i);
        i++;
    }

    qDebug() << "GrpExperiment::numbering end";
}

std::vector<GrpProteinSpConst> GrpExperiment::getGrpProteinSpList() const {
    std::vector<GrpProteinSpConst> grouped_protein_list;
    if (!_groupingStarted) {
        throw PappsoException(QObject::tr("unable to get grouped protein list before grouping"));
    }
    for (auto && protein_sp : _grpProteinList) {
        if (protein_sp.get()->getGroupNumber() > 0) {
            grouped_protein_list.push_back(protein_sp);
        }
    }
    return grouped_protein_list;
}

void GrpExperiment::removeNonInformativeSubGroups() {
    qDebug() << "GrpExperiment::removeNonInformativeSubGroups begin";
    if (_p_monitor != nullptr) _p_monitor->startRemovingNonInformativeSubGroupsInAllGroups(_grpGroupSpList.size());

    std::list <GrpGroupSp> old_grpGroupSpList (_grpGroupSpList);
    _grpGroupSpList.clear();
    auto it_group = old_grpGroupSpList.begin();
    while (it_group !=  old_grpGroupSpList.end()) {
        if (_p_monitor != nullptr) _p_monitor->removingNonInformativeSubGroupsInGroup();
        if(it_group->get()->removeNonInformativeSubGroups()) {
            //need to regroup it
            GrpGroupSp old_group_sp = *it_group;
            GrpMapPeptideToGroup grp_map_peptide_to_group;

            std::list<GrpSubGroupSp> dispatch_sub_group_set = old_group_sp.get()->getSubGroupSpList();
            for (GrpSubGroupSp & grp_subgroup:dispatch_sub_group_set) {
                addSubGroupSp(grp_map_peptide_to_group, grp_subgroup);
            }
            grp_map_peptide_to_group.clear(_grpGroupSpList);
        }
        else {
            qDebug() << "GrpExperiment::removeNonInformativeSubGroups  no removeNonInformativeSubGroups";
            _grpGroupSpList.push_back(*it_group);
        }
        it_group++;
    }
    if (_p_monitor != nullptr) _p_monitor->stopRemovingNonInformativeSubGroupsInAllGroups(_grpGroupSpList.size());

    qDebug() << "GrpExperiment::removeNonInformativeSubGroups end";
}

