import pandas as pd
import pyhmmer
from functools import reduce
import os
import sys


threads_max = workflow.cores * 0.75


def load_blast(blastout, hasHeaders=False):
	if hasHeaders:
		blastpDF = pd.read_csv(blastout, sep = "\t")
	else:
		blastpDF = pd.read_csv(blastout, sep = "\t", names = ["qseqid","sseqid","pident","length","mismatch","gapopen","qstart","qend","sstart","send","evalue","bitscore"])
	return blastpDF

def load_hmm(hmmFile):
    with pyhmmer.plan7.HMMFile(hmmFile) as h:
        hmm = next(h)
    return hmm

def build_hmm(alnFile):
	abc = pyhmmer.easel.Alphabet.amino()
	builder = pyhmmer.plan7.Builder(alphabet=abc)

	with pyhmmer.easel.MSAFile(alnFile) as msa_file:
		msa_file.set_digital(abc)
		msa = next(msa_file)
  # MSA must have a name, otherwise building will fail
	if msa.name is None:
		msa.name = b"alignment"
	builder = pyhmmer.plan7.Builder(abc)
	background = pyhmmer.plan7.Background(abc)
	hmm, _, _ = builder.build_msa(msa, background)

	return hmm

def run_hmmsearch(queryFile, hmmFile):
	hmm = load_hmm(hmmFile)
	with pyhmmer.easel.SequenceFile(queryFile) as seq_file:
		sequences = [ seq.digitize(hmm.alphabet) for seq in seq_file ]
	pipeline = pyhmmer.plan7.Pipeline(hmm.alphabet)
	hits = pipeline.search_hmm(hmm, sequences)

	return hits , hmm.name.decode()

def hmmsearch(queryFile, hmm):

	with pyhmmer.easel.SequenceFile(queryFile) as seq_file:
		sequences = [ seq.digitize(hmm.alphabet) for seq in seq_file ]
	pipeline = pyhmmer.plan7.Pipeline(hmm.alphabet)
	hits = pipeline.search_hmm(hmm, sequences)
	return hits

configfile: srcdir('config.yaml')

SAMPLES, = glob_wildcards("{sample}.fna")

rule final:
	input:
		config["outdir"] + "/03_best_hits/best_immunity_protein_candidates.csv",
		config["outdir"] + "/03_best_hits/best_MFP_candidates.csv",
		config["outdir"] + "/03_best_hits/best_hit_contigs.csv"

include: "rules/setup_dbs.smk"
include: "rules/filter_input.smk"
include: "rules/prodigal.smk"
include: "rules/microcin.smk"
include: "rules/immunity_protein.smk"
include: "rules/CvaB.smk"
include: "rules/MFP.smk"
include: "rules/duomolog_components.smk"
include: "rules/best_hits.smk"
