/**
 * \file pappsomspp/protein/protein.cpp
 * \date 2/7/2015
 * \author Olivier Langella
 * \brief object to handle a protein
 */

/*******************************************************************************
 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
 *
 * This file is part of the PAPPSOms++ library.
 *
 *     PAPPSOms++ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms++ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms++.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Contributors:
 *     Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and implementation
 ******************************************************************************/

#include "protein.h"
#include <QStringList>
#include <algorithm>

namespace pappso {

QRegExp   Protein::_remove_translation_stop("\\*$");

/*
 * http://www.ncbi.nlm.nih.gov/BLAST/blastcgihelp.shtml
 */
//    For those programs that use amino acid query sequences (BLASTP and TBLASTN), the accepted amino acid codes are:
//
// 		A  alanine               P  proline
// 		B  aspartate/asparagine  Q  glutamine
// 		C  cystine               R  arginine
// 		D  aspartate             S  serine
// 		E  glutamate             T  threonine
// 		F  phenylalanine         U  selenocysteine
// 		G  glycine               V  valine
// 		H  histidine             W  tryptophan
// 		I  isoleucine            Y  tyrosine
// 		K  lysine                Z  glutamate/glutamine
// 		L  leucine               X  any
// 		M  methionine            *  translation stop
// 		N  asparagine            -  gap of indeterminate length

Protein::Protein()
{
}
Protein::Protein(const QString & description, const QString & sequence):
    _description(description.simplified()), _accession(_description.split(" ").at(0)),_sequence(sequence)
{
    _description = _description.remove(0,_accession.size()).simplified();
    //_sequence.replace(_remove_translation_stop, "");
    _length = _sequence.size();
}
Protein::Protein(const Protein & protein):
    _description(protein._description), _accession(protein._accession), _sequence(protein._sequence), _length(protein._length)
{
}

Protein & Protein::removeTranslationStop() {
    _sequence.replace(_remove_translation_stop, "");
    return (*this);
}

Protein & Protein::reverse() {
    std::reverse(_sequence.begin(), _sequence.end());
    return (*this);
}

ProteinSp Protein::makeProteinSp() const {
    return std::make_shared<Protein>(*this);
}


bool Protein::operator==(const Protein& other) const {
    return (_accession == other._accession);
}

void Protein::setSequence(const QString & sequence) {
    _sequence = sequence.simplified();
    _length = _sequence.size();
}
unsigned int Protein::size() const {
    return _length;
}

const QString & Protein::getSequence() const {
    return _sequence;
}
const QString & Protein::getAccession() const {
    return _accession;
}
void Protein::setAccession(const QString & accession) {
    _accession = accession.simplified();
}
const QString & Protein::getDescription() const {
    return _description;
}
void Protein::setDescription(const QString & description) {
    _description = description.simplified();
}
Protein::~Protein()
{

}
}
