/**
 * \file input/psmcbor/cborpsmfilereader.cpp
 * \date 14/11/2024
 * \author Olivier Langella
 * \brief read data files from CBOR PSM
 */

/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of i2MassChroQ.
 *
 *     i2MassChroQ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     i2MassChroQ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with i2MassChroQ.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include "cborpsmfilereader.h"
#include <pappsomspp/core/peptide/peptideproformaparser.h>
#include <pappsomspp/core/exception/exceptionnotfound.h>

CborPsmFileReader::CborPsmFileReader(Project *p_project)
  : PsmFileReaderBase(), mp_project(p_project)
{
  qDebug();
}

CborPsmFileReader::~CborPsmFileReader()
{
  // qWarning() << "~CborPsmFileReader";
}

void
CborPsmFileReader::assignProteinXtpToFastaFiles()
{
  if(m_allFastaFileSp.size() > 0)
    {
      // TODO : get the good fasta file if there are more than one
      auto first_fasta = m_allFastaFileSp.front();
      for(auto it_pair : m_proteinXtpSpMap)
        {
          qDebug() << it_pair.first;
          it_pair.second.get()->setFastaFileP(first_fasta.get());
        }
    }
  else
    {
      throw pappso::ExceptionNotFound(QObject::tr(
        "ERROR assigning proteins to Fasta file : no fasta files found"));
    }
}

void
CborPsmFileReader::sampleListFinished(pappso::UiMonitorInterface &monitor
                                      [[maybe_unused]])
{
  assignProteinXtpToFastaFiles();
}


void
CborPsmFileReader::informationsReady(pappso::UiMonitorInterface &monitor
                                     [[maybe_unused]])
{
  qDebug();
}


void
CborPsmFileReader::parameterMapReady(pappso::UiMonitorInterface &monitor
                                     [[maybe_unused]])
{
  qDebug();
  for(auto it_engine : m_cborParameterMap)
    {
      qDebug() << it_engine.first.toString();
      // if (engine.toString()
      if(it_engine.first.toString() == "sage")
        {
          m_identificationEngineList.push_back(IdentificationEngine::sage);
          QJsonDocument doc;
          doc.setObject(it_engine.second.toJsonValue().toObject());
          m_identificationEngineParameterMap.insert(
            {IdentificationEngine::sage, doc});
        }
      else if(it_engine.first.toString() == "xtandem")
        {
          m_identificationEngineList.push_back(IdentificationEngine::XTandem);
          QJsonDocument doc;
          doc.setObject(it_engine.second.toJsonValue().toObject());
          m_identificationEngineParameterMap.insert(
            {IdentificationEngine::XTandem, doc});
        }
    }
}

void
CborPsmFileReader::fastaFilesReady(pappso::UiMonitorInterface &monitor
                                   [[maybe_unused]])
{
  for(auto fasta_str : m_targetFastaFiles)
    {
      FastaFileSp fastaFile =
        mp_project->getFastaFileStore().getInstance(FastaFile(fasta_str));
      m_allFastaFileSp.push_back(fastaFile);
    }
  for(auto fasta_str : m_decoyFastaFiles)
    {
      FastaFileSp fastaFile =
        mp_project->getFastaFileStore().getInstance(FastaFile(fasta_str));
      m_allFastaFileSp.push_back(fastaFile);
    }
}

void
CborPsmFileReader::sampleStarted(pappso::UiMonitorInterface &monitor
                                 [[maybe_unused]])
{
  qDebug() << m_currentSampleName;
  m_currentIdentificationEngineToIdentificationDataSourceSpMap.clear();

  mp_currentIdentificationGroup = nullptr;
  std::vector<IdentificationGroup *> identification_list =
    mp_project->getIdentificationGroupList();
  if(mp_project->getProjectMode() == ProjectMode::combined)
    {
      if(identification_list.size() == 0)
        {
          mp_currentIdentificationGroup = mp_project->newIdentificationGroup();
        }
      else
        {
          mp_currentIdentificationGroup = identification_list[0];
        }

      qDebug() << m_currentPeaklistFile.name;
      msp_currentMsrun =
        mp_project->getMsRunStore().getInstance(m_currentPeaklistFile.name);
      /*
      for(auto &filePair : m_mapFilePath2MsRunSp)
        {
          m_mapFilePath2IdentificationGroupPtr.insert(
            {filePair.first, identification_group_p});
        }
        */
    }
  else
    { /*

       for(auto &filePair : m_mapFilePath2MsRunSp)
         {
           IdentificationGroup *identification_group_p = nullptr;
           for(IdentificationGroup *identification_p_flist :
               identification_list)
             {
               if(identification_p_flist->containSample(
                    filePair.second.get()->getSampleName()))
                 {
                   identification_group_p = identification_p_flist;
                   break;
                 }
             }
           if(identification_group_p == nullptr)
             {
               identification_group_p = p_project->newIdentificationGroup();
             }

           m_mapFilePath2IdentificationGroupPtr.insert(
             {filePair.first, identification_group_p});
         }*/
    }

  for(auto engine : m_identificationEngineList)
    {
      if(engine == IdentificationEngine::sage)
        {
          for(auto &psm_file : m_currentIdentificationFileList)
            {
              if(psm_file.name.endsWith(".json"))
                {
                  auto sage_identification_source_sp =
                    mp_project->getIdentificationDataSourceStore()
                      .buildIdentificationSageJsonFileSp(
                        QFileInfo(psm_file.name),
                        m_indexIdentificationSource++,
                        m_identificationEngineParameterMap.at(
                          IdentificationEngine::sage),
                        msp_currentMsrun);
                  m_currentIdentificationEngineToIdentificationDataSourceSpMap
                    .insert({IdentificationEngine::sage,
                             sage_identification_source_sp});

                  mp_currentIdentificationGroup->addIdentificationDataSourceP(
                    sage_identification_source_sp.get());


                  pappso::ProjectParameters parameters;
                  sage_identification_source_sp.get()->fillProjectParameters(
                    parameters);

                  if(parameters
                       .getValue(pappso::ProjectParamCategory::identification,
                                 "sage_database_generate_decoys")
                       .toBool())
                    {
                      mp_project->getProteinStore().setDecoySelectionType(
                        DecoySelectionType::regexp);

                      mp_project->getProteinStore().setRegexpDecoyPattern(
                        QString("^%1").arg(
                          parameters
                            .getValue(
                              pappso::ProjectParamCategory::identification,
                              "sage_database_decoy_tag")
                            .toString()));
                    }
                }
            }
        }
    }

  for(auto it_map :
      m_currentIdentificationEngineToIdentificationDataSourceSpMap)
    {
      for(auto &fasta_file : m_allFastaFileSp)
        {
          it_map.second.get()->addFastaFile(fasta_file);
        }
    }

  // p_monitor->setTotalSteps(m_mapFilePath2IdentificationGroupPtr.size());
  /*
  std::size_t i = p_project->getIdentificationDataSourceStore()
                    .getIdentificationDataSourceList()
                    .size();
  for(auto &identGroupPair : m_mapFilePath2IdentificationGroupPtr)
    {
      auto sage_identification_source_sp =
        p_project->getIdentificationDataSourceStore()
          .buildIdentificationSageJsonFileSp(
            sage_json_file,
            i,
            m_jsonData,
            getMsRunSpWithFileName(identGroupPair.first));
      identGroupPair.second->addIdentificationDataSourceP(
        sage_identification_source_sp.get());
      i += 1;
      m_mapFilePath2IdentificationSageJsonFileSp.insert(
        {identGroupPair.first, sage_identification_source_sp});

      sage_identification_source_sp.get()->setIdentificationEngineVersion(
        m_sageVersion);*/
}

void
CborPsmFileReader::psmReady(pappso::UiMonitorInterface &monitor
                            [[maybe_unused]])
{

  pappso::PeptideSp parse_pep =
    pappso::PeptideProFormaParser::parseString(m_currentPsmProforma);

  PeptideXtpSp msp_peptide = PeptideXtp(*(parse_pep.get())).makePeptideXtpSp();
  qDebug();
  // variable modifications :
  /*
  setVariableModifications(peptide_sp,
                           peptide_line.peptide_string_list.at(6));
*/
  qDebug() << msp_peptide.get()->toProForma();


  msp_peptide = mp_project->getPeptideStore().getInstance(msp_peptide);


  std::size_t spectrum_index = m_cborScanId.value("index").toInteger();


  for(auto psm_eval : m_cborScanPsmEval)
    {
      PeptideEvidence pe(msp_currentMsrun.get(), spectrum_index, true);
      pe.setPeptideXtpSp(msp_peptide);

      fillPeptideEvidence(pe);
      IdentificationEngine identification_engine =
        IdentificationEngine::unknown;

      if(psm_eval.first.toString() == "sage")
        {
          identification_engine = IdentificationEngine::sage;
          storeSageEval(pe, psm_eval.second.toMap());
        }
      if(identification_engine != IdentificationEngine::unknown)
        {

          IdentificationDataSourceSp identification_data_source_sp;
          auto it =
            m_currentIdentificationEngineToIdentificationDataSourceSpMap.find(
              identification_engine);
          if(it ==
             m_currentIdentificationEngineToIdentificationDataSourceSpMap.end())
            {
              throw pappso::ExceptionNotFound(
                QObject::tr(
                  "indentification data source not found for engine %1")
                  .arg((int8_t)identification_engine));
            }
          identification_data_source_sp = it->second;

          pe.setIdentificationDataSource(identification_data_source_sp.get());
          pe.setIdentificationEngine(identification_engine);

          PeptideEvidenceSp pe_sp = identification_data_source_sp.get()
                                      ->getPeptideEvidenceStore()
                                      .getInstance(&pe);


          for(auto &protein_ref : m_currentPsmProteinRefList)
            {
              auto it_prot = m_proteinXtpSpMap.find(protein_ref.accession);
              if(it_prot == m_proteinXtpSpMap.end())
                {
                  throw pappso::ExceptionNotFound(
                    QObject::tr("protein accession %1 not found")
                      .arg(protein_ref.accession));
                }
              ProteinXtpSp protein_sp = it_prot->second;
              ProteinMatch *p_protein_match =
                mp_currentIdentificationGroup->getProteinMatchInstance(
                  protein_ref.accession);
              if(protein_ref.positions.size() == 0)
                {
                  protein_ref.positions.push_back(0);
                }
              for(auto position : protein_ref.positions)
                {
                  PeptideMatch peptide_match;
                  peptide_match.setStart(position);
                  peptide_match.setPeptideEvidenceSp(pe_sp);


                  p_protein_match->setChecked(true);
                  // qDebug() << "startElement_protein p_protein_match 3 " <<
                  // _p_protein_match;
                  p_protein_match->setProteinXtpSp(protein_sp);
                  p_protein_match->addPeptideMatch(peptide_match);
                  // p_protein_match->setParam(ProteinParam::q_score,
                  // m_line.protein_q);
                }
            }
        }
    }
}

void
CborPsmFileReader::fillPeptideEvidence(PeptideEvidence &pe)
{
  pe.setCharge(m_cborScanPrecursor.value("z").toInteger());
  pe.setChecked(true);
  pe.setExperimentalMass(m_cborScanPrecursor.value("mz").toDouble());
  pe.setRetentionTime(m_cborScanMs2.value("rt").toDouble());
}

void
CborPsmFileReader::storeSageEval(PeptideEvidence &pe,
                                 const QCborMap &sage_cbor_map)
{
  qDebug();
  pe.setParam(PeptideEvidenceParam::tandem_hyperscore,
              sage_cbor_map.value("hyperscore").toDouble());

  pe.setParam(PeptideEvidenceParam::sage_sage_discriminant_score,
              sage_cbor_map.value("sage_discriminant_score").toDouble());
  //            m_line.sage_discriminant_score);
  pe.setParam(PeptideEvidenceParam::sage_peptide_q,
              sage_cbor_map.value("peptide_q").toDouble());
  pe.setParam(PeptideEvidenceParam::sage_posterior_error,
              sage_cbor_map.value("posterior_error").toDouble());
  pe.setParam(PeptideEvidenceParam::sage_spectrum_q,
              sage_cbor_map.value("spectrum_q").toDouble());
  pe.setParam(PeptideEvidenceParam::sage_predicted_rt,
              sage_cbor_map.value("predicted_rt").toDouble());
  pe.setParam(PeptideEvidenceParam::sage_isotope_error,
              sage_cbor_map.value("isotope_error").toInteger());

  // m_line.isotope_error);


  /*
    std::size_t progress = m_lineNumber / 10000;
    if(progress > m_progressIndex)
      {
        if(mp_monitor->shouldIstop())
          {
            throw pappso::ExceptionInterrupted(
              QObject::tr("Sage TSV data reading process interrupted"));
          }
        m_progressIndex = progress;
        mp_monitor->setStatus(QString("%1K ").arg(m_progressIndex * 10));
      }
      */
}


void
CborPsmFileReader::proteinMapReady(pappso::UiMonitorInterface &monitor
                                   [[maybe_unused]])
{

  for(auto &cbor_protein : m_proteinMap.getProteinMap())
    {

      ProteinXtpSp protein_sp = ProteinXtp().makeProteinXtpSp();
      protein_sp.get()->setAccession(
        cbor_protein.second.protein_sp.get()->getAccession());
      protein_sp.get()->setDescription(
        cbor_protein.second.protein_sp.get()->getDescription());
      protein_sp.get()->setSequence(
        cbor_protein.second.protein_sp.get()->getSequence());
      // protein_sp.get()->setFastaFileP(m_sageReader.getFastaFileSp().get());
      protein_sp = mp_project->getProteinStore().getInstance(protein_sp);
      protein_sp.get()->setIsDecoy(!cbor_protein.second.isTarget);
      protein_sp.get()->setIsContaminant(cbor_protein.second.isContaminant);


      protein_sp = mp_project->getProteinStore().getInstance(protein_sp);

      m_proteinXtpSpMap.insert({cbor_protein.first, protein_sp});
    }

  // m_proteinMap.clear();
}
