/**
 * \file pappsomspp/processing/specpeptidoms/semiglobalalignment.h
 * \date 24/03/2025
 * \author Aurélien Berthier
 * \brief protein to spectrum alignment
 *
 * C++ implementation of the SpecPeptidOMS algorithm described in :
 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
 * https://doi.org/10.1021/acs.jproteome.4c00870.
 */

/*
 * Copyright (c) 2025 Aurélien Berthier
 * <aurelien.berthier@ls2n.fr>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#pragma once

#include <boost/numeric/ublas/matrix.hpp>
#include "spomsspectrum.h"
#include "../../protein/protein.h"
#include "scorevalues.h"
#include "locationsaver.h"
#include "scenario.h"

namespace pappso
{
namespace specpeptidoms
{

struct KeyCell
{
  std::size_t n_row;
  int score;
  std::size_t beginning;
  int tree_id;
};

struct Alignment
{
  std::vector<std::size_t> peaks;
  QString interpretation;
  int score;
  double begin_shift,
    end_shift; //  begin_shift represents the shift at the beginning of the spectrum, i.e. the end
               //  of the peptide sequence (in the N->C reading direction), and vice-versa for
               //  end_shift.
  std::vector<double> shifts;
  std::size_t SPC, beginning, end;
};

class SemiGlobalAlignment
{
  public:
  /**
   * Default constructor
   */
  SemiGlobalAlignment(ScoreValues &score_values,
                      const pappso::PrecisionPtr precision_ptr,
                      AaCode &aaCode);

  /**
   * Destructor
   */
  ~SemiGlobalAlignment();

  /**
   * @brief perform the first alignment search between a protein sequence and a spectrum. The member
   * location heap is filled with the candidates locations.
   * @param protein Protein to align
   * @param spectrum Spectrum to align
   */
  void fastAlign(const SpOMSSpectrumCsp &spectrum,
                 const QString &protein_seq,
                 const QString &protein_id);

  /**
   * @brief performs the second alignment search between a protein subsequence and a spectrum.
   * IMPLEMENTATION MATRICE DES ORIGINES => ARBRE ?
   */
  void preciseAlign(const SpOMSSpectrumCsp &spectrum,
                    const QString &protein_seq,
                    const QString &protein_id,
                    const std::size_t beginning,
                    const std::size_t length);

  /**
   * @brief performs the post-processing : generates corrected spectra and align them
   * @param shifts List of potential precursor mass errors to test
   */
  void postProcessingAlign(const SpOMSSpectrumCsp &spectrum,
                           const QString &protein_seq,
                           const QString &protein_id,
                           std::size_t beginning,
                           std::size_t length,
                           const std::vector<double> &shifts);

  LocationSaver getLocationSaver() const;
  Scenario getScenario() const;
  const Alignment &getBestAlignment(const SpOMSSpectrumCsp &spectrum) const;

  static std::vector<double> getPotentialMassErrors(const Alignment &alignment,
                                                    const QString &protein_seq);

  private:
  std::vector<KeyCell> m_interest_cells;
  std::vector<std::pair<std::size_t, KeyCell>> m_updated_cells;
  ScoreValues &m_scorevalues;
  const int min_score = 15;
  pappso::PrecisionPtr m_precision_ptr;
  AaCode &m_aaCode;
  LocationSaver m_location_saver;
  Scenario m_scenario;
  Alignment m_best_alignment, m_best_corrected_alignment, m_best_post_processed_alignment;

  /**
   * @brief Stores the best alignment from m_scenario in m_best_alignment
   */
  void
  saveBestAlignment(const QString sequence, const SpOMSSpectrumCsp &spectrum, std::size_t offset);

  void correctAlign(const QString &protein_seq,
                    const QString &protein_id,
                    const SpOMSSpectrumCsp &spectrum,
                    std::vector<std::size_t> peaks_to_remove,
                    std::size_t offset);

  /**
   * @brief updates the scores of the alignment matrix for a given amino acid as well as the
   * location heap/scenario.
   * @param sequence Reversed sequence of the protein being aligned
   * @param row_number number of the row to update (== index in sequence of the amino acid being
   * aligned)
   * @param aa_positions list of the AaPositions of the current amino acid
   * @param spectrum Spectrum being aligned
   * @param fast_align Whether to use the fast version of the algorithm (for 1st alignemnt step)
   */
  void updateAlignmentMatrix(const QString &sequence,
                             const std::size_t row_number,
                             const std::vector<AaPosition> aa_positions,
                             const SpOMSSpectrumCsp &spectrum,
                             const bool fast_align,
                             const QString &protein);

  /**
   * @brief indicates if a perfect shift is possible between the provided positions
   * @param sequence Reversed sequence of the protein being aligned
   * @param spectrum Spectrum being aligned
   * @param origin_row beginning row of the aa gap to verify (== index of the first missing aa in
   * sequence)
   * @param current_row row being processed (== index of the current AaPosition in sequence)
   * @param l_peak left peak index of the mz gap to verify
   * @param r_peak right peak index of the mz gap to verify
   */
  bool perfectShiftPossible(const QString &sequence,
                            const SpOMSSpectrumCsp &spectrum,
                            const std::size_t origin_row,
                            const std::size_t current_row,
                            const std::size_t l_peak,
                            const std::size_t r_peak) const;

  std::size_t perfectShiftPossibleFrom0(const QString &sequence,
                                        const SpOMSSpectrumCsp &spectrum,
                                        const std::size_t current_row,
                                        const std::size_t r_peak) const;

  std::size_t perfectShiftPossibleEnd(const QString &sequence,
                                      const SpOMSSpectrumCsp &spectrum,
                                      std::size_t end_row,
                                      std::size_t end_peak) const;
};
} // namespace specpeptidoms
} // namespace pappso
