
/*******************************************************************************
 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contributors:
 *     Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
 ******************************************************************************/

#include "enzyme.h"
#include <QStringList>
#include <QDebug>
#include "../exception/exceptionnotpossible.h"
//#include <iostream>

namespace pappso {
Enzyme::Enzyme()
{
    _recognition_site.setPattern("([KR])([^P])");
    _miscleavage=0;


    char vv1[] = { 'A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V' };
    _wildcard_x.assign(std::begin(vv1), std::end(vv1));

    char vv2[] = { 'N','D' };
    _wildcard_b.assign(std::begin(vv2), std::end(vv2));

    char vv3[] = { 'Q','E' };
    _wildcard_z.assign(std::begin(vv3), std::end(vv3));
}

Enzyme::Enzyme(const QString & recognition_site)
{
    _recognition_site.setPattern(recognition_site);
    _miscleavage=0;


    char vv1[] = { 'A','R','N','D','C','Q','E','G','H','I','L','K','M','F','P','S','T','W','Y','V' };
    _wildcard_x.assign(std::begin(vv1), std::end(vv1));

    char vv2[] = { 'N','D' };
    _wildcard_b.assign(std::begin(vv2), std::end(vv2));

    char vv3[] = { 'Q','E' };
    _wildcard_z.assign(std::begin(vv3), std::end(vv3));

}

Enzyme::~Enzyme()
{

}
void Enzyme::setMaxPeptideVariantListSize(std::size_t max_peptide_variant_list_size) {
    _max_peptide_variant_list_size = max_peptide_variant_list_size;
}

void Enzyme::eat(std::int8_t sequence_database_id, const ProteinSp & protein_sp, bool is_decoy, EnzymeProductInterface & enzyme_product) const {
    /*
     *        for aa in self.aa_to_cut:
              seq = seq.replace(aa, aa + ' ')
          seq_stack = []
          for s in seq.strip().split(' '):
              seq_stack.append(s)
              if len(seq_stack) > self.misscleavage + 1:
                  seq_stack.pop(0)
              s2 = ""
              for s_miss in seq_stack[::-1]:
                  s2 = s_miss + s2
                  yield s2
    */
    qDebug() << "Enzyme::eat begin ";
    const QString sequence = protein_sp.get()->getSequence();
    qDebug() << sequence;
    QStringList peptide_list;
    int pos = 0;
    int peptide_start = 0;
    int peptide_size = sequence.size();
    while ((pos = _recognition_site.indexIn(sequence, pos)) != -1) {
        peptide_size = pos + _recognition_site.cap(1).length()-peptide_start;
        //qDebug() << "pos=" << pos << " peptide_start=" << peptide_start << " peptide_size=" << peptide_size << " " << sequence.mid(peptide_start,peptide_size);
        if (peptide_size > 0) {
            peptide_list.append(sequence.mid(peptide_start,peptide_size));
        }
        peptide_start+= peptide_size;
        pos= peptide_start;//all peptides MUST be consecutive
    }
    peptide_size = sequence.size()- peptide_start;
    if (peptide_size > 0) {
        peptide_list.append(sequence.mid(peptide_start,peptide_size));
    }

    unsigned int start=1;
    bool is_nter = true;
    foreach (const QString &peptide, peptide_list) {
        //enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy, peptide, start,is_nter,0, false);
        sanityCheck(enzyme_product,sequence_database_id, protein_sp,is_decoy, peptide, start,is_nter,0, false);
        is_nter = false;
        start += peptide.size();
    }

    unsigned int miscleavage_i = 0;
    while (miscleavage_i < _miscleavage) {
        miscleavage_i++;
        qDebug() << "miscleavage_i=" << miscleavage_i;
        unsigned int chunk_number= miscleavage_i+1;
        unsigned int start=1;
        bool is_nter = true;

        for (unsigned int i = 0; i < peptide_list.size(); ++i) {
            qDebug() << "start=" << start;
            QStringList peptide_mis_list;
            for (unsigned int j = 0; (j < chunk_number) && ((i+j) < peptide_list.size()); j++) {
                peptide_mis_list << peptide_list.at(i+j);
            }
            if(peptide_mis_list.size() == chunk_number) {
                //enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy, peptide_mis_list.join(""), start,is_nter, miscleavage_i, false);
                sanityCheck(enzyme_product,sequence_database_id, protein_sp,is_decoy, peptide_mis_list.join(""), start,is_nter, miscleavage_i, false);
            }
            is_nter = false;
            start += peptide_list.at(i).size();
        }
    }
}

void Enzyme::replaceWildcards(std::vector<std::string> * p_peptide_variant_list) const {
    std::string new_peptide = p_peptide_variant_list->at(0);
    qDebug() << "Enzyme::replaceWildcards begin " << new_peptide.c_str();
    std::vector<std::string> old_peptide_variant_list;
    old_peptide_variant_list.assign(p_peptide_variant_list->begin(), p_peptide_variant_list->end());


    for (char wildcard : {
                'X', 'B', 'Z'
            }) {

        std::size_t position = new_peptide.find(wildcard);
        if (position == std::string::npos) {
            continue;
        }
        else {
            p_peptide_variant_list->clear();
            /*
            new_peptide[position] = 'A';
            p_peptide_variant_list->push_back(new_peptide);
            break;
            */

            const std::vector<char> * p_x_replace_wildcard = nullptr;
            if (wildcard == 'X') {
                p_x_replace_wildcard = &_wildcard_x;
            } else if (wildcard == 'B') {
                p_x_replace_wildcard = &_wildcard_b;
            }
            else if (wildcard == 'Z') {
                p_x_replace_wildcard = &_wildcard_z;
            }

            if (p_x_replace_wildcard != nullptr) {
                for (std::string orig_peptide : old_peptide_variant_list) {
                    for (char replace: *p_x_replace_wildcard) {
                        orig_peptide[position] = replace;
                        p_peptide_variant_list->push_back(orig_peptide);
                    }
                }
            }
            else {
                throw ExceptionNotPossible(QObject::tr("x_replace_wildcard is empty"));
            }
            //new_peptide[position] = 'A';
            //p_peptide_variant_list->push_back(new_peptide);
            //p_peptide_variant_list->resize(1);
            //std::cerr << "Enzyme::replaceWildcards begin p_peptide_variant_list.size()=" << p_peptide_variant_list->size() << endl;
            break;
        }
    }
    std::vector<std::string>().swap(old_peptide_variant_list);   // clear old_peptide_variant_list reallocating



    qDebug() << "Enzyme::replaceWildcards end " << new_peptide.c_str();
}

void Enzyme::setTakeOnlyFirstWildcard(bool take_only_first_wildcard) {
    _take_only_first_wildcard = take_only_first_wildcard;
}


void Enzyme::sanityCheck(EnzymeProductInterface & enzyme_product, std::int8_t sequence_database_id,const ProteinSp & protein_sp, bool is_decoy, const PeptideStr & peptide, unsigned int start, bool is_nter, unsigned int missed_cleavage_number, bool semi_enzyme) const {
    if (peptide.contains('X') || peptide.contains('B') || peptide.contains('Z')) {

        std::vector<std::string> peptide_variant_list;
        peptide_variant_list.push_back(peptide.toStdString());

        while ((peptide_variant_list.at(0).find('X') != std::string::npos) || (peptide_variant_list.at(0).find('B')!= std::string::npos) || (peptide_variant_list.at(0).find('Z')!= std::string::npos)) {
            replaceWildcards(&peptide_variant_list);
            if (peptide_variant_list.size() > _max_peptide_variant_list_size) {
                peptide_variant_list.resize(_max_peptide_variant_list_size);
                peptide_variant_list.shrink_to_fit();
            }
        }

        //peptide_variant_list.resize(2);
        if (_take_only_first_wildcard) {
            enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy,
                                      QString(peptide_variant_list.at(0).c_str()), start,is_nter, missed_cleavage_number, semi_enzyme);
        }
        else {
            std::string peptide_variant = peptide_variant_list.back();
            while (peptide_variant_list.size() > 0) {
                enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy, QString(peptide_variant.c_str()), start,is_nter, missed_cleavage_number, semi_enzyme);
                peptide_variant_list.pop_back();
                if (peptide_variant_list.size() > 0) {
                    peptide_variant = peptide_variant_list.back();
                }
            }
        }
        std::vector<std::string>().swap(peptide_variant_list);   // clear peptide_variant_list reallocating

    }
    else {
        enzyme_product.setPeptide(sequence_database_id, protein_sp,is_decoy, peptide, start,is_nter, missed_cleavage_number, semi_enzyme);
    }
}
}
