/**
 * \file input/sage/sagereader.cpp
 * \date 21/08/2024
 * \author Olivier Langella
 * \brief read data files from Sage output
 */

/*******************************************************************************
 * Copyright (c) 2024 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of i2MassChroQ.
 *
 *     i2MassChroQ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     i2MassChroQ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with i2MassChroQ.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include "sagereader.h"
#include <QJsonObject>
#include <QJsonArray>
#include <odsstream/tsvreader.h>
#include <odsstream/odsexception.h>
#include "sagetsvhandler.h"
#include "pappsomspp/core/utils.h"
#include "pappsomspp/core/exception/exceptionnotfound.h"
#include "pappsomspp/core/pappsoexception.h"
#include "../../../../fasta/fastareader.h"

pappso::cbor::psm::SageReader::SageReader(pappso::UiMonitorInterface *p_monitor,
                                          pappso::cbor::CborStreamWriter *p_output,
                                          const pappso::cbor::psm::SageFileReader &sage_file_reader,
                                          const QString &sage_json_file)
  : m_sageFileReader(sage_file_reader)
{
  mp_monitor             = p_monitor;
  mp_cborWriter          = p_output;
  m_jsonAbsoluteFilePath = sage_json_file;
}

pappso::cbor::psm::SageReader::~SageReader()
{
}

const QString &
pappso::cbor::psm::SageReader::getmJsonAbsoluteFilePath() const
{
  return m_jsonAbsoluteFilePath;
}


void
pappso::cbor::psm::SageReader::read()
{
  readTsvFile();
}

pappso::cbor::psm::SageReader::FastaSeq::FastaSeq(SageReader *self)
{
  mp_self    = self;
  m_decoyTag = mp_self->getDecoyTag();
}

void
pappso::cbor::psm::SageReader::FastaSeq::setSequence(const QString &description_in,
                                                     const QString &sequence_in)
{
  QString accession = description_in.split(" ", Qt::SkipEmptyParts).at(0);
  try
    {
      const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
      psm_protein.protein_sp.get()->setSequence(sequence_in);
      psm_protein.protein_sp.get()->setDescription(description_in);
    }
  catch(pappso::ExceptionNotFound &err)
    {
    }
  try
    {
      accession                     = accession.prepend(m_decoyTag);
      const PsmProtein &psm_protein = mp_self->m_psmProteinMap.getByAccession(accession);
      psm_protein.protein_sp.get()->setSequence(sequence_in);
      psm_protein.protein_sp.get()->setDescription(description_in);
      psm_protein.protein_sp.get()->reverse();
    }
  catch(pappso::ExceptionNotFound &err)
    {
    }
}

pappso::cbor::CborStreamWriter &
pappso::cbor::psm::SageReader::getCborStreamWriter() const
{
  return *mp_cborWriter;
}


void
pappso::cbor::psm::SageReader::readTsvFile()
{

  extractMzmlPathList(m_sageFileReader.getJsonDocument());
  // getTsvFilePath(mp_identificationDataSource->getJsonDocument().object());
  QFileInfo tsv_file_info(getTsvFilePath(m_sageFileReader.getJsonDocument()));
  SageTsvHandler handler(mp_monitor, *this, m_psmProteinMap);
  try
    {
      TsvReader tsv_reader(handler);

      QFile tsv_file(tsv_file_info.absoluteFilePath());
      tsv_reader.parse(tsv_file);
      tsv_file.close();
    }
  catch(OdsException &error_ods)
    {
      throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
                                      .arg(tsv_file_info.absoluteFilePath())
                                      .arg(error_ods.qwhat()));
    }


  // collect protein sequences
  QFile fastaFile(getFastaFilePath(m_sageFileReader.getJsonDocument()));
  SageReader::FastaSeq seq(this);
  pappso::FastaReader reader(seq);
  reader.parse(fastaFile);

  qDebug();
  mp_cborWriter->append("protein_map");
  m_psmProteinMap.writeMap(*mp_cborWriter);


  mp_cborWriter->append("sample_list");
  mp_cborWriter->startArray();
  try
    {
      handler.writeSampleList();
    }
  catch(OdsException &error_ods)
    {
      throw pappso::PappsoException(QObject::tr("Error reading %1 file:\n %2")
                                      .arg(tsv_file_info.absoluteFilePath())
                                      .arg(error_ods.qwhat()));
    }

  mp_cborWriter->endArray();
}

void
pappso::cbor::psm::SageReader::extractMzmlPathList(const QJsonDocument &json_doc)
{

  QJsonObject sage_object        = json_doc.object();
  QJsonValue json_mzml_path_list = sage_object.value("mzml_paths");
  if(json_mzml_path_list.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("mzml_paths not found in Sage json document"));
    }
  m_mzmlPathList.clear();

  for(auto path_mzml : json_mzml_path_list.toArray())
    {
      m_mzmlPathList << path_mzml.toString();
    }
}

const QString &
pappso::cbor::psm::SageReader::getMzmlPath(const QString &file_msrun) const
{
  for(auto &file_path : m_mzmlPathList)
    {
      if(file_path.endsWith(file_msrun))
        return file_path;
    }
  throw pappso::ExceptionNotFound(
    QObject::tr("MS run %1 not found in Sage json document").append(file_msrun));
}


QString
pappso::cbor::psm::SageReader::getTsvFilePath(const QJsonDocument &json_doc)
{
  QString path;
  QJsonObject sage_object = json_doc.object();
  QJsonValue output_path  = sage_object.value("output_paths");
  if(output_path.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("output_paths not found in Sage json document"));
    }

  if(!output_path.isArray())
    {
      throw pappso::ExceptionNotFound(QObject::tr("output_paths is not an array"));
    }
  for(auto element : output_path.toArray())
    {
      if(element.isString())
        {
          if(element.toString().endsWith(".tsv"))
            {
              path = element.toString();
            }
        }
    }
  return path;
}

QString
pappso::cbor::psm::SageReader::getFastaFilePath(const QJsonDocument &json_doc)
{
  QString path;
  QJsonObject sage_object = json_doc.object();
  QJsonValue database     = sage_object.value("database");
  if(database.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
    }
  path = database.toObject().value("fasta").toString();
  if(path.isEmpty())
    {
      throw pappso::ExceptionNotFound(QObject::tr("fasta value is empty"));
    }
  return path;
}


std::vector<pappso::cbor::psm::SageReader::SageModification>
pappso::cbor::psm::SageReader::getStaticModificationList() const
{
  std::vector<SageReader::SageModification> list;
  QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
  QJsonValue database     = sage_object.value("database");
  if(database.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
    }

  QJsonValue static_mods = database.toObject().value("static_mods");
  if(static_mods.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
    }
  for(QString residue_str : static_mods.toObject().keys())
    {
      SageModification modif;
      modif.residue      = residue_str.at(0);
      modif.modification = pappso::Utils::guessAaModificationPbyMonoisotopicMassDelta(
        (Enums::AminoAcidChar)modif.residue.toLatin1(),
        static_mods.toObject().value(residue_str).toDouble());
      modif.strModification =
        QString::number(static_mods.toObject().value(residue_str).toDouble(), 'f', 6);
      if(modif.strModification.isEmpty())
        {
          throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
        }
      if(modif.modification->getMass() < 0)
        {
          modif.strModification = QString("[%1]").arg(modif.strModification);
        }
      else
        {
          modif.strModification = QString("[+%1]").arg(modif.strModification);
        }
      list.push_back(modif);
    }
  return list;
}

std::vector<pappso::cbor::psm::SageReader::SageModification>
pappso::cbor::psm::SageReader::getVariableModificationList() const
{
  std::vector<SageReader::SageModification> list;
  QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
  QJsonValue database     = sage_object.value("database");
  if(database.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
    }

  QJsonValue var_mods = database.toObject().value("variable_mods");
  if(var_mods.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("static_mods not found in Sage json document"));
    }
  for(QString residue_str : var_mods.toObject().keys())
    {
      SageModification modif;
      modif.residue = residue_str.at(0);
      for(QJsonValue one_mass : var_mods.toObject().value(residue_str).toArray())
        {
          modif.modification = pappso::Utils::guessAaModificationPbyMonoisotopicMassDelta(
            (Enums::AminoAcidChar)modif.residue.toLatin1(), one_mass.toDouble());
          modif.strModification = QString::number(one_mass.toDouble(), 'f', 6);
          if(modif.strModification.isEmpty())
            {
              throw pappso::PappsoException(QObject::tr(" modif.strModification is empty"));
            }
          if(modif.modification->getMass() < 0)
            {
              modif.strModification = QString("[%1]").arg(modif.strModification);
            }
          else
            {
              modif.strModification = QString("[+%1]").arg(modif.strModification);
            }
          list.push_back(modif);
        }
    }
  return list;
}

QString
pappso::cbor::psm::SageReader::getDecoyTag() const
{
  QString path;
  QJsonObject sage_object = m_sageFileReader.getJsonDocument().object();
  QJsonValue database     = sage_object.value("database");
  if(database.isUndefined())
    {
      throw pappso::ExceptionNotFound(QObject::tr("database not found in Sage json document"));
    }
  path = database.toObject().value("decoy_tag").toString();
  if(path.isEmpty())
    {
      throw pappso::ExceptionNotFound(QObject::tr("decoy_tag value is empty"));
    }
  return path;
}

const pappso::cbor::psm::SageFileReader &
pappso::cbor::psm::SageReader::getSageFileReader() const
{
  return m_sageFileReader;
}
