#import "template.typ": *
#import "@preview/fletcher:0.5.8" as fletcher: diagram, node, edge
#import fletcher.shapes: diamond, brace, cylinder

#show: doc => conf(
  authors: (
    (
      name: "Olivier Langella",
    ),
  ),
  toc: true,
  lang: "en",
  font: "Roboto",
  date: "11/07/2025",
  years: (2025),
  version: "0.1",
  "PeptidOMS documentation",
  doc
)

#let peptidoms = "PeptidOMS"
#let protein-matcher = "ProteinMatcher"
#let specglobtool = "SpecGlobTool"
#let specoms = "SpecOMS"

= Introduction

#peptidoms aligns MS2 fragmentation spectra directly to protein sequences. 
Algorithm is described in @benoist_specpeptidoms_2025.

= PSM CBOR file workflow

#set text(10pt)
#diagram(
	node-stroke: .1em,
	node-fill: gradient.radial(blue.lighten(80%), blue, center: (30%, 20%), radius: 80%),
	spacing: 4em,
	node((-1,0.5), `fasta file`, shape: diamond, fill: none, name: <fastafile>),
	node((-1,-0.5), `mzML file`, shape: diamond, fill: none, name: <mzmlfile>),
	node((0,0), `peptidoms`, radius: 3em, name: <peptidoms>),
	edge(``, "-|>"),
	node((1,0), `PSM cbor file`, shape: diamond, fill: none, name: <peptidomscbor>),
	edge(``, "-|>"),
	node((2,0), `psmfeatures`, radius: 3em),
	edge(``, "-|>"),
	node((3,0), `ODS export`, radius: 2em),
	node((2,1), `deeplc mumble`, radius: 2em, name: <deeplc>),
	edge(<peptidomscbor>, <deeplc>, ``, "-|>"),
	edge(<fastafile>,<peptidoms.west>, "-|>", bend: 20deg),
	edge(<mzmlfile>,<peptidoms.west>, "-|>", bend: -20deg),
)


= Command line interface

#peptidoms works on a collection of MS/MS spectra (mzML file typically) and a protein sequence database (FASTA file) and produces a PSM CBOR file as output.

#code-block("peptidoms -p params.json -m mzdata.mzml -f protein_sequences.fasta -o results.cbor", "bash")

To get a readable file results, use "peptidoms-export" to produce JSON or ODS files:

#code-block("peptidoms-export --ods results.ods -i results.cbor", "bash")

#code-block("peptidoms-export --json results.json -i results.cbor", "bash")




= JSON parameter file

#peptidoms parameters are specified in a JSON file

#let code_psm = read("json/peptidoms_parameters.json")
#code-block(code_psm, "json", title: "JSON parameter file")


= CBOR peptidoms eval PSM structure

#peptidoms writes results in a PSM CBOR file.
For each PSM, #peptidoms uses the "eval" section to store its scores :

#let code_psm = read("json/peptidoms_psm_eval.json")
#code-block(code_psm, "json", title: "peptidosm PSM eval structure")



= Tests for #peptidoms


#code-block("./src/peptidoms -p ../doc/typst/json/peptidoms_parameters.json -f /gorgone/pappso/moulon/database/Genome_Z_mays_5a.fasta -m /gorgone/pappso/data_extraction_pappso/mzML/20120906_balliau_extract_1_A01_urnb-1.mzML -o /tmp/test.cbor", "bash")


== Smaller dataset


#code-block("./src/peptidoms -p ../doc/typst/json/peptidoms_parameters.json -f ../tests/data/fasta/small_zea_mays.fasta -m ../tests/data/peaklists/peaklist_15046.mgf -o /tmp/test.cbor", "bash")



#code-block("./src/peptidoms -p ../tests/data/parameters/DATNVGDEGGFAPNIIENK_parameters.json -f ../tests/data/fasta/databankThreeProteinsHuman.fasta -m ../tests/data/peaklists/DATNVGDEGGFAPNIIENK.mgf -o DATNVGDEGGFAPNIIENK.cbor -c 10", "bash")



= Tests for #protein-matcher


#code-block("./src/proteinmatcher -p ../doc/typst/json/proteinmatcher_parameters.json -f /gorgone/pappso/moulon/database/Genome_Z_mays_5a.fasta -m /gorgone/pappso/data_extraction_pappso/mzML/20120906_balliau_extract_1_A01_urnb-1.mzML -o /tmp/test.cbor", "bash")


good match "index": 2605,    "native_id": "controllerType=0 controllerNumber=1 scan=2606"
"proforma":"MESGTGNGDSEVQR",

TPVDNALR on index 4503 should be a good match

QQVM[MOD:00425]VGYSDSGK on index 4291

    "accession": "GRMZM2G326111_P01",
    "positions": [
"eval":{
    "matcher": {
        "score": 107855
    },
    "peptidoms": {
        "bracket": "[G][N]GTGGESIYGEK",
        "nam": 0,
        "score": 69,
        "spc": 11
    }
}

== Smaller dataset


#code-block("./src/proteinmatcher -p ../doc/typst/json/proteinmatcher_parameters.json -f ../tests/data/fasta/small_zea_mays.fasta -m ../tests/data/peaklists/peaklist_15046.mgf -o /tmp/test.cbor", "bash")



#bibliography(style:"frontiers", "bibliography.bib")

