/**
 * \file test/test_proforma.cpp
 * \date 21/01/2025
 * \author Olivier Langella
 * \brief test proforma notation
 */

/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

// ./tests/catch2-only-tests [proforma] -s

#include <catch2/catch_test_macros.hpp>
#include <catch2/catch_approx.hpp>
#include <pappsomspp/peptide/peptideproformaparser.h>
#include <pappsomspp/exception/exceptionnotpossible.h>


#include "common.h"
#include "tests/config.h"
/// home/langella/developpement/git/i2masschroq/src/input/sage/sagetsvhandler.cpp@457,
/// SageTsvHandler::parsePeptide(): "[+42.0106]-M[MOD:00719]QNDAGEFVDLYVPR"

// https://pmc.ncbi.nlm.nih.gov/articles/PMC7612572/
// https://github.com/HUPO-PSI/ProForma/blob/master/SpecDocument/ProForma_v2_draft15_February2022.pdf

TEST_CASE("Test proforma notation", "[proforma]")
{
  // Set the debugging message formatting pattern.
  qSetMessagePattern(QString("%{file}@%{line}, %{function}(): %{message}"));

  SECTION("..:: Test ProForma parser ::..", "[proforma]")
  {
    REQUIRE_THROWS_AS(
      PeptideProFormaParser::parseString("C[MOD:00397][MOD:01160]C[MOD:00397]AADDKEAC[FAVEGPK"),
      pappso::ExceptionNotPossible);
    qDebug();
    PeptideSp peptide_from_str = PeptideProFormaParser::parseString(
      "C[MOD:00397][MOD:01160]C[MOD:00397]AADDKEAC[MOD:00397]FAVEGPK");
    REQUIRE(peptide_from_str.get()->getMass() == Catch::Approx(1909.764470427));

    REQUIRE(peptide_from_str.get()->toProForma().toStdString() ==
            "C[MOD:00397][MOD:01160]C[MOD:00397]AADDKEAC[MOD:00397]FAVEGPK");

    PeptideSp peptide_from_str2 = PeptideProFormaParser::parseString(
      "C[MOD:00397][MOD:01160]C[MOD:00397]AADDK[-18.02]EAC[MOD:00397]FAVEGPK");
    /*REQUIRE(peptide_from_str2.get()->toProForma().toStdString() ==
            "C[MOD:00397][MOD:01160]C[MOD:00397]AADDK[-18.0200]EAC[MOD:00397]"
            "FAVEGPK");*/
    REQUIRE(peptide_from_str2.get()->getMass() == Catch::Approx(1891.744470427));


    REQUIRE(peptide_from_str2.get()->toProForma().toStdString() ==
            "C[MOD:00397][MOD:01160]C[MOD:00397]AADDK[MOD:00704]EAC[MOD:00397]FAVEGPK");


    PeptideSp peptide_nter_modif_acetyl =
      PeptideProFormaParser::parseString("M[MOD:00408][MOD:00719]QNDAGEFVDLYVPR");
    REQUIRE(peptide_nter_modif_acetyl.get()->getMass() == Catch::Approx(1810.8195998027));

    PeptideSp peptide_nter_modif =
      PeptideProFormaParser::parseString("[+42.0106]-M[MOD:00719]QNDAGEFVDLYVPR");
    REQUIRE(peptide_nter_modif.get()->getMass() == Catch::Approx(1810.8195998027));
    REQUIRE(peptide_nter_modif.get()->toProForma().toStdString() ==
            "[MOD:00408]-M[MOD:00719]QNDAGEFVDLYVPR");


    PeptideSp peptide_nter_modif_acetylstr =
      PeptideProFormaParser::parseString("[Acetyl]-M[MOD:00719]QNDAGEFVDLYVPR");
    REQUIRE(peptide_nter_modif_acetylstr.get()->getMass() == Catch::Approx(1810.8195998027));
    REQUIRE(peptide_nter_modif_acetylstr.get()->toProForma().toStdString() ==
            "[UNIMOD:1]-M[MOD:00719]QNDAGEFVDLYVPR");

    PeptideSp peptide_nter_modif_acetylstr2 =
      PeptideProFormaParser::parseString("M[Acetyl][MOD:00719]QNDAGEFVDLYVPR");
    PeptideSp peptide_nter_modif_acetylstr3 =
      PeptideProFormaParser::parseString("[Acetyl]-M[MOD:00719]QNDAGEFVDLYVPR");
    REQUIRE(peptide_nter_modif_acetylstr2.get()->getMass() == Catch::Approx(1810.8195998027));
    REQUIRE_FALSE(*peptide_nter_modif_acetylstr2.get() == *peptide_nter_modif_acetylstr.get());
    REQUIRE(*peptide_nter_modif_acetylstr3.get() == *peptide_nter_modif_acetylstr.get());

    // EM[Oxidation]EVEES[Phospho]PEK
    peptide_from_str = PeptideProFormaParser::parseString("EM[Oxidation]EVEES[Phospho]PEK");
    REQUIRE(peptide_from_str.get()->getMass() == Catch::Approx(1301.4734300238));
    REQUIRE(peptide_from_str.get()->toProForma().toStdString() ==
            "EM[UNIMOD:35]EVEES[UNIMOD:21]PEK");

    // EM[MOD:00719]EVEES[MOD:00046]PEK
    peptide_from_str = PeptideProFormaParser::parseString("EM[MOD:00719]EVEES[MOD:00046]PEK");
    REQUIRE(peptide_from_str.get()->getMass() == Catch::Approx(1301.4734300238));
    REQUIRE(peptide_from_str.get()->toProForma().toStdString() ==
            "EM[MOD:00719]EVEES[MOD:00046]PEK");
    // SEQUEN[Formula:C12H20O2]CE
    //[iTRAQ4plex]-EMEVNESPEK
    peptide_from_str = PeptideProFormaParser::parseString("[iTRAQ4plex]-EMEVNESPEK");
    REQUIRE(peptide_from_str.get()->getMass() == Catch::Approx(1334.6145817573));
    REQUIRE(peptide_from_str.get()->toProForma().toStdString() == "[UNIMOD:214]-EMEVNESPEK");
    // ELVIS[Phospho|INFO:newly discovered]K

    //[-48.0042]?F[+189.1164]INIVER
    peptide_from_str = PeptideProFormaParser::parseString("[-48.0042]?F[+189.1164]INIVER");
    REQUIRE(peptide_from_str.get()->getMass() == Catch::Approx(1078.6185520281));
    REQUIRE(peptide_from_str.get()->toProForma().toStdString() == "F[+189.1164]INIVER");
  }


  SECTION("..:: Test ProForma parser C13 N15 ::..", "[proforma]")
  {
    /*
    4.6.1 Use Case 1: Representation of isotopes
This might be used in the case of synthetic peptides with 100% incorporation.
Example: Consider extension for 13C on all residues:
Carbon 13: <13C>ATPEILTVNSIGQLK
Nitrogen 15: <15N>ATPEILTVNSIGQLK
Deuterium: <D>ATPEILTVNSIGQLK
The representation of multiple isotopes is also possible. They can be located in any order.
Both Carbon 13 and Nitrogen 15: <13C><15N>ATPEILTVNSIGQLK
Distributions of isotope masses could be supported in future work.
4.6.2 Use Case 2: Fixed protein modifications
This mechanism can be useful especially in the case of full proteoforms. The affected
amino acid MUST be indicated using @. If more than one residue were affected, they
MUST be comma separated. Examples:
<[S-carboxamidomethyl-L-cysteine]@C>ATPEILTCNSIGCLK
<[MOD:01090]@C>ATPEILTCNSIGCLK
<[Oxidation]@C,M>MTPEILTCNSIGCLK
Fixed modifications MUST be written prior to ambiguous and labile modifications, and
similar to ambiguity notation, N-terminal modifications MUST be the last ones written,
just next to the sequence.
ProForma 2.0 (Proteoform and Peptidoform Notation) February 3, 2022
21
http://psidev.info/proforma
The following examples would be valid:
<[MOD:01090]@C>[Phospho]?EM[Oxidation]EVTSECSPEK
<[MOD:01090]@C>[Acetyl]-EM[Oxidation]EVTSECSPEK
*/
    PeptideSp peptide_C13 = PeptideProFormaParser::parseString("<13C>ATPEILTVNSIGQLK");

    REQUIRE(peptide_C13.get()->toProForma().toStdString() == "<13C>ATPEILTVNSIGQLK");

    REQUIRE(peptide_C13.get()->getChemicalFormula().toString().toStdString() ==
            "(13)C 70 H 122 N 18 O 23");
    REQUIRE(peptide_C13.get()->getChemicalFormula().getMass() == Catch::Approx(1653.1278609157));

    PeptideSp peptide_C13N15 = PeptideProFormaParser::parseString("<13C><15N>ATPEILTVNSIGQLK");

    REQUIRE(peptide_C13N15.get()->toProForma().toStdString() == "<13C><15N>ATPEILTVNSIGQLK");
    REQUIRE(peptide_C13N15.get()->getChemicalFormula().getMass() == Catch::Approx(1671.0744889969));

    REQUIRE(peptide_C13N15.get()->getChemicalFormula().toString().toStdString() ==
            "(13)C 70 (15)N 18 H 122 O 23");

    PeptideSp peptide_carba =
      PeptideProFormaParser::parseString("<[MOD:01090]@C>[Acetyl]-EM[Oxidation]EVTSECSPEK");

    REQUIRE(peptide_carba.get()->toProForma().toStdString() ==
            "[UNIMOD:1]-EM[UNIMOD:35]EVTSEC[MOD:01090]SPEK");
  }

  SECTION("..:: Test ProForma parser fixed modifications ::..", "[proforma]")
  {

    //<[S-carboxamidomethyl-L-cysteine]@C>ATPEILTCNSIGCLK
    //<[MOD:01090]@C>ATPEILTCNSIGCLK
    //<[Oxidation]@C,M>MTPEILTCNSIGCLK

    PeptideSp peptide_carba =
      PeptideProFormaParser::parseString("<[Oxidation]@C,M>MTPEILTCNSIGCLK");

    REQUIRE(peptide_carba.get()->toProForma().toStdString() ==
            "M[UNIMOD:35]TPEILTC[UNIMOD:35]NSIGC[UNIMOD:35]LK");


    peptide_carba = PeptideProFormaParser::parseString("<[MOD:01090]@C>ATPEILTCNSIGCLK");

    REQUIRE(peptide_carba.get()->toProForma().toStdString() ==
            "ATPEILTC[MOD:01090]NSIGC[MOD:01090]LK");


    /*
    peptide_carba =
      PeptideProFormaParser::parseString("<[S-carboxamidomethyl-L-cysteine]@C>ATPEILTCNSIGCLK");


    REQUIRE(peptide_carba.get()->toProForma().toStdString() ==
            "[UNIMOD:1]-EM[UNIMOD:35]EVTSECSPEK");
            */
  }

  SECTION("..:: Test ProForma parser Nter Cter ::..", "[proforma]")
  {

    //[iTRAQ4plex]-EM[U:Oxidation]EVNES[Phospho]PEK[iTRAQ4plex]-[Methyl]
    PeptideSp peptide =
      PeptideProFormaParser::parseString("[iTRAQ4plex]-EM[Oxidation]EVNES[Phospho]PEK[iTRAQ4plex]");
    PeptideSp peptide_cter = PeptideProFormaParser::parseString(
      "[iTRAQ4plex]-EM[Oxidation]EVNES[Phospho]PEK[iTRAQ4plex]-[Methyl]");
    REQUIRE(peptide_cter.get()->toProForma().toStdString() ==
            "[UNIMOD:214]-EM[UNIMOD:35]EVNES[UNIMOD:21]PEK[UNIMOD:214]-[UNIMOD:34]");
    REQUIRE(peptide_cter.get()->toAbsoluteString().toStdString() ==
            "E(internal:Nter_hydrolytic_cleavage_H,UNIMOD:214)M(UNIMOD:35)EVNES(UNIMOD:21)PEK("
            "internal:Cter_hydrolytic_cleavage_HO,UNIMOD:34,UNIMOD:214)");

    // xref: delta_mono_mass "14.01565"
    REQUIRE((peptide_cter.get()->getMass() - peptide.get()->getMass()) ==
            Catch::Approx(14.0156500645));

    pappso::ChemicalFormula formula      = peptide.get()->getChemicalFormula();
    pappso::ChemicalFormula formula_cter = peptide_cter.get()->getChemicalFormula();

    formula = formula_cter + -formula;
    REQUIRE(formula.toString().toStdString() == "C 1 H 2");
    REQUIRE(formula.getMass() == Catch::Approx(14.0156500645));


    formula = peptide.get()->getChemicalFormula() + -peptide_cter.get()->getChemicalFormula();
    REQUIRE(formula.toString().toStdString() == "C -1 H -2");
    REQUIRE(formula.getMass() == Catch::Approx(-14.0156500645));
  }
}
