# search with fasta digested
import sqlite3
import subprocess
import tempfile
import logging
import re

logging.basicConfig()
logging.root.setLevel(logging.NOTSET)
logging.basicConfig(level=logging.NOTSET)

logger = logging.getLogger(__name__)
FORMAT = "[%(filename)s:%(lineno)s - %(funcName)20s() ] %(message)s"
logging.basicConfig(format=FORMAT)
logger.setLevel(logging.INFO)


class fastaDigested:

    def __init__(self):
        self.db = sqlite3.connect(':memory:', check_same_thread=False)
        # self.db = sqlite3.connect('db.sqlite3', check_same_thread=False)
        c = self.db.cursor()
        c.execute('''CREATE TABLE fastaDigested (peptide text, peptideJ text, accession text, description text)''')
        self.db.commit()
        c.close()

# pt-fastadigestor --min 5 -i uniprotkb_S_cerevisiae_2025_07_30.fasta -f digested.fasta

    def populateDatabase(self, fastafile, min_pep_length, max_pep_length, cut):
        tempoutput = tempfile.NamedTemporaryFile(delete=False, mode='w+t')
        # digest fasta file with pt-fastadigestor
        subresult = subprocess.run(['pt-fastadigestor', "--min", str(min_pep_length), "--max", str(max_pep_length),  "-i", fastafile, "-f", tempoutput.name, "-e", cut], capture_output=True, text=True)
        if subresult.returncode != 0:
            logger.info(subresult.stderr)
            return -1
        else:
            logger.info(subresult.stdout)
        # read fasta modify IL to J and load sequence to db
        digestedFasta = open(tempoutput.name, "r")
        tempAccession = {}
        tempdict = {}
        c = self.db.cursor()
        for line in digestedFasta:
            if line.startswith('>'):
                # doing something
                # logging.info(line.strip())
                tempAccession = {"accession": line.strip().split(" ")[0][1:].replace("|", "__"),
                                "fullAccession": line.strip()[1:].replace("|", "__")}
            else:
                tabpep = line.strip().split(" ")
                for pep in tabpep:
                    pepj = re.sub(r"[LI]", 'J', pep)
                    tempdict[pep] = tempAccession
                    request = '''INSERT INTO fastaDigested(peptide, peptideJ, accession, description) VALUES (?,?,?,?)'''
                    # logger.info(request)
                    c.execute(request, (pep, pepj, tempAccession["accession"], tempAccession["fullAccession"]))
                self.db.commit()
        c.close()
        logger.info(len(tempdict.keys()))
        return 0

    def getAccession(self, peptides):
        c = self.db.cursor()
        query = f"SELECT * FROM fastaDigested WHERE peptideJ in ({','.join(['?']*len(peptides))}) ORDER BY peptideJ"
        logger.info(query)
        c.execute(query, peptides)
        rows = c.fetchall()
        for row in rows:
            logger.info(row)
