/**
 * \file pappsomspp/peptide/peptideproformaparser.cpp
 * \date 27/11/2023
 * \author Olivier Langella
 * \brief parse peptide string in ProForma to pappso::Peptide
 */

/*******************************************************************************
 * Copyright (c) 2023 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include "peptideproformaparser.h"
#include "../obo/filterobopsimodtermlabel.h"
#include "../obo/filterobopsimodsink.h"
#include "../exception/exceptionnotpossible.h"
#include "../utils.h"

namespace pappso
{


// QRegularExpression PeptideProFormaParser::_mod_parser("\\[[^\\]]*\\]");
QRegularExpression PeptideProFormaParser::_rx_psimod("MOD:[0-9]+");
QRegularExpression PeptideProFormaParser::_rx_modmass("[-+]?[0-9]+\\.?[0-9]*");

//[MOD:01090]@C
QRegularExpression
  PeptideProFormaParser::m_firstGlobalMod("^<([\\[,\\],\\,,0-9,A-Z,a-z,:,@,-,^>]*)>(.*)$");

void
PeptideProFormaParser::parseStringToPeptide(const QString &pepstr, Peptide &peptide)
{
  // Peptide
  // peptide2("C[MOD:00397][MOD:01160]C[MOD:00397]AADDKEAC[MOD:00397]FAVEGPK");
  // CCAADDKEACFAVEGPK
  /*
  <psimod position="1"  accession="MOD:00397"/>
    <psimod position="2"  accession="MOD:00397"/>
    <psimod position="10"  accession="MOD:00397"/>
    <psimod position="1"  accession="MOD:01160"/>
    */


  QString peptide_str = pepstr;

  QStringList res_split = peptide_str.split("?");
  if(res_split.size() == 2)
    {
      peptide_str = res_split.at(1);
    }
  QRegularExpressionMatch match_global_mod = m_firstGlobalMod.match(peptide_str);

  QStringList global_mod_list;
  while(match_global_mod.hasMatch())
    {
      QStringList pline = match_global_mod.capturedTexts();
      qDebug() << pline[1];
      if(pline[1] == "13C")
        {
          // Carbon 13: <13C>ATPEILTVNSIGQLK
          peptide.setGlobalModification(Isotope::C13);
        }
      else if(pline[1] == "15N")
        {
          // Nitrogen 15: <15N>ATPEILTVNSIGQLK
          peptide.setGlobalModification(Isotope::N15);
        }
      else if(pline[1] == "D")
        {
          // Deuterium: <D>ATPEILTVNSIGQLK
          peptide.setGlobalModification(Isotope::H2);
        }
      else
        {
          //<[Oxidation]@C,M>
          global_mod_list << pline[1];
        }
      peptide_str      = pline[2];
      match_global_mod = m_firstGlobalMod.match(peptide_str);
    }


  std::size_t i                 = 0;
  std::size_t end               = peptide_str.size();
  AminoAcidChar last_amino_acid = AminoAcidChar::alanine;
  bool in_cter                  = false;
  while(i < end)
    {
      QChar aa_char = peptide_str[i];
      if(aa_char == '[')
        {
          QString mod;
          i++;
          aa_char = peptide_str[i];
          while((i < end) && (aa_char != ']'))
            {
              mod.append(aa_char);
              i++;
              if(i < end)
                aa_char = peptide_str[i];
            }

          qDebug() << aa_char;
          if(aa_char != ']')
            {
              throw pappso::ExceptionNotPossible(
                QObject::tr("modification string is malformed %1").arg(mod));
            }
          // we have a mod
          // is it a double ?
          bool is_double    = false;
          double mass_modif = mod.toDouble(&is_double);
          AaModificationP aamod;
          if(is_double)
            {
              aamod =
                Utils::guessAaModificationPbyMonoisotopicMassDelta(last_amino_acid, mass_modif);
            }
          else
            {
              aamod = AaModification::getInstance(mod);
            }
          if(peptide.m_aaVec.size() == 0)
            {
              if(is_double)
                {
                  AaModificationP better_mod = Utils::guessAaModificationPbyMonoisotopicMassDelta(
                    AminoAcidChar::alanine, mass_modif);
                  if(better_mod != nullptr)
                    {
                      aamod = better_mod;
                    }
                }
              peptide.setNterModification(aamod);
            }
          else
            {
              if(in_cter)
                {
                  if(is_double)
                    {
                      AaModificationP better_mod =
                        Utils::guessAaModificationPbyMonoisotopicMassDelta(AminoAcidChar::alanine,
                                                                           mass_modif);
                      if(better_mod != nullptr)
                        {
                          aamod = better_mod;
                        }
                    }
                  peptide.setCterModification(aamod);
                }
              else
                {
                  peptide.m_aaVec.back().addAaModification(aamod);
                }
            }
        }
      else
        {
          if(aa_char.isLetter())
            {
              qDebug() << aa_char;
              Aa pappso_aa(aa_char.toLatin1());
              last_amino_acid = pappso_aa.getAminoAcidChar();
              peptide.m_aaVec.push_back(pappso_aa);
            }
          else if(aa_char == '-')
            {
              if(peptide.m_aaVec.size() > 0)
                in_cter = true;
            }
          else
            {
              throw pappso::ExceptionNotPossible(
                QObject::tr("%1 is not an amino acid").arg(aa_char));
            }
        }
      i++;
    }


  for(QString &global_label_str : global_mod_list)
    {
      qDebug() << global_label_str;

      QRegularExpression global_label_reg("^\\[(.*)\\]@(.*)$");

      QRegularExpressionMatch match_global = global_label_reg.match(global_label_str);

      if(match_global.hasMatch())
        {
          QStringList pline = match_global.capturedTexts();
          qDebug() << pline[1];
          AaModificationP aamod = AaModification::getInstance(pline[1]);
          qDebug() << aamod->getAccession();
          qDebug() << pline[2];
          for(QString &aa_str : pline[2].split(","))
            {
              qDebug() << aa_str;
              peptide.addAaModificationOnAllAminoAcid(aamod, (AminoAcidChar)aa_str[0].toLatin1());
            }
        }
    }
  // qDebug() << peptide.toProForma();
  peptide.m_proxyMass = -1;
  peptide.getMass();
}

PeptideSp
PeptideProFormaParser::parseString(const QString &pepstr)
{

  // QMutexLocker locker(&_mutex);
  qDebug();
  Peptide peptide("");
  PeptideProFormaParser::parseStringToPeptide(pepstr, peptide);
  // qDebug() << peptide.toProForma();
  return (peptide.makePeptideSp());
}

NoConstPeptideSp
PeptideProFormaParser::parseNoConstString(const QString &pepstr)
{

  // QMutexLocker locker(&_mutex);
  Peptide peptide("");
  PeptideProFormaParser::parseStringToPeptide(pepstr, peptide);

  return (peptide.makeNoConstPeptideSp());
}
} // namespace pappso
