/**
 * \file pappsomspp/processing/cbor/mzcbor/mzmlconvert.cpp
 * \date 19/11/2025
 * \author Olivier Langella
 * \brief convert mzML to mzcbor
 */

/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of PAPPSOms-tools.
 *
 *     PAPPSOms-tools is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms-tools is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms-tools.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include "mzmlconvert.h"
#include <QDebug>
#include <zlib.h>
#include "pappsomspp/core/pappsoexception.h"
#include "pappsomspp/config.h"
#include "binarydataarray.h"


pappso::cbor::mzcbor::MzmlConvert::MzmlConvert(pappso::UiMonitorInterface *p_monitor,
                                               pappso::cbor::CborStreamWriter *p_output)
  : mp_monitor(p_monitor), mp_cborWriter(p_output)
{
  m_elementToStoreInArray << "cv" << "userParam" << "cvParam" << "binaryDataArray" << "spectrum"
                          << "sourceFile"
                          << "referenceableParamGroup" << "software" << "instrumentConfiguration"
                          << "processingMethod" << "dataProcessing" << "scan" << "scanWindow"
                          << "precursor" << "selectedIon";
}

pappso::cbor::mzcbor::MzmlConvert::~MzmlConvert()
{
}

void
pappso::cbor::mzcbor::MzmlConvert::readStream()
{
  mp_cborWriter->startMap();
  if(m_qxmlStreamReader.readNextStartElement())
    {
      qDebug() << m_qxmlStreamReader.name().toString();
      if(m_qxmlStreamReader.name().toString() == "indexedmzML")
        {
          m_qxmlStreamReader.readNextStartElement();
        }
      if(m_qxmlStreamReader.name().toString() == "mzML")
        {
          // write mzCBOR header
          mp_cborWriter->append("mzCBOR");
          mp_cborWriter->startMap();

          mp_cborWriter->append("mode");
          mp_cborWriter->append(0);

          mp_cborWriter->writeInformations(
            PAPPSOMSPP_NAME, PAPPSOMSPP_VERSION, "mzCBOR", "mzMLconvert");
          mp_cborWriter->endMap();

          mp_cborWriter->append(m_qxmlStreamReader.name().toString());

          mp_cborWriter->startMap();
          mp_cborWriter->append("xmlns");
          mp_cborWriter->append(m_qxmlStreamReader.namespaceUri());
          attributeListToCbor(m_qxmlStreamReader.attributes());
          mp_cborWriter->endMap();


          bool array_started = false;
          QString last_element;
          while(m_qxmlStreamReader.readNextStartElement())
            {
              insideElement(last_element, array_started);
              last_element = m_qxmlStreamReader.name().toString();
            }
        }
      else
        {
          m_qxmlStreamReader.raiseError(QObject::tr("Not an mzML input file"));
          m_qxmlStreamReader.skipCurrentElement();
        }
    }
  mp_cborWriter->endMap();
}


void
pappso::cbor::mzcbor::MzmlConvert::getCurrentSpectrumSize()
{
  // defaultArrayLength	1552

  m_currentSpectrumSize = m_qxmlStreamReader.attributes().value("defaultArrayLength").toULongLong();
}


void
pappso::cbor::mzcbor::MzmlConvert::readBinaryDataArrayList()
{
  // m_qxmlStreamReader.skipCurrentElement();
  /*
   *
          <binaryDataArray encodedLength="9092">
            <cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
   unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" /> <cvParam cvRef="MS"
   accession="MS:1000523" value="" name="64-bit float" /> <cvParam cvRef="MS" accession="MS:1000574"
   value="" name="zlib compression" /> <binary>*/
  std::size_t count = m_qxmlStreamReader.attributes().value("count").toULongLong();

  mp_cborWriter->append("binaryDataArray");
  mp_cborWriter->startArray(count);
  while(m_qxmlStreamReader.readNext() && !m_qxmlStreamReader.isEndElement())
    {
      if(m_qxmlStreamReader.isStartElement())
        {
          BinaryDataArray binary_data_array;
          binary_data_array.fromMzml(m_qxmlStreamReader);
          binary_data_array.toCbor(*mp_cborWriter);
          // writeZlibDataArray();
        }
    }
  mp_cborWriter->endArray();
}


void
pappso::cbor::mzcbor::MzmlConvert::insideElement(QString &last_element_in, bool &array_started_in)
{
  m_elementStash.push_back(m_qxmlStreamReader.name().toString());


  if(m_elementStash.back() == "spectrum")
    {
      getCurrentSpectrumSize();
    }


  if(m_elementStash.back() == "binaryDataArrayList")
    {
      readBinaryDataArrayList();
    }
  else
    {
      // stop an array ?
      qDebug() << "current element=" << m_elementStash.back();
      qDebug() << "last_element=" << last_element_in;
      if(array_started_in && (last_element_in != m_qxmlStreamReader.name().toString()))
        {
          mp_cborWriter->endArray();
          array_started_in = false;
        }


      // start an array ?
      if(m_elementToStoreInArray.contains(m_elementStash.back()))
        {
          // start an array ?
          if((!array_started_in) && (last_element_in != m_elementStash.back()))
            {
              mp_cborWriter->append(m_elementStash.back());
              mp_cborWriter->startArray();
              array_started_in = true;
            }
        }


      if(!array_started_in)
        mp_cborWriter->append(m_elementStash.back());


      mp_cborWriter->startMap();
      attributeListToCbor(m_qxmlStreamReader.attributes());

      bool array_started = false;
      QString last_element;

      while(m_qxmlStreamReader.readNext() && !m_qxmlStreamReader.isEndElement())
        {

          if(m_qxmlStreamReader.isCharacters())
            {
              // clean content:
              QStringView content = m_qxmlStreamReader.text().trimmed();
              if((m_qxmlStreamReader.text().toString() == "\n") ||
                 (m_qxmlStreamReader.text().toString() == "\n\t"))
                {
                }
              else
                {
                  // text node
                  if(!content.isEmpty())
                    {
                      qDebug() << "text isCharacters" << content.mid(0, 10);
                      mp_cborWriter->append("@text@");
                      mp_cborWriter->append(content);
                    }
                }
            }
          else if(m_qxmlStreamReader.isStartElement())
            {
              QString tmp_element = m_qxmlStreamReader.name().toString();
              insideElement(last_element, array_started);
              last_element = tmp_element;
            }
        }
      if(array_started)
        {
          mp_cborWriter->endArray();
        }

      mp_cborWriter->endMap();
    }
  m_elementStash.pop_back();
}


void
pappso::cbor::mzcbor::MzmlConvert::attributeValueToCbor(const QStringView &value_str)
{
  bool ok(false);
  double d = value_str.toDouble(&ok);
  if(ok)
    {
      if(value_str.contains('.'))
        {
          mp_cborWriter->append(d);
        }
      else
        {
          qint64 bigint = value_str.toLongLong(&ok);
          if(ok)
            {
              mp_cborWriter->append(bigint);
            }
        }
    }
  else
    {
      mp_cborWriter->append(value_str);
    }
}


void
pappso::cbor::mzcbor::MzmlConvert::attributeListToCbor(const QXmlStreamAttributes &xml_attributes)
{
  for(auto &xml_attribute : xml_attributes)
    {
      // qDebug() << xml_attribute.name() << " " << xml_attribute.value();
      mp_cborWriter->append(xml_attribute.name());
      attributeValueToCbor(xml_attribute.value());
    }
}
