import pandas as pd
import pyhmmer
from functools import reduce
import os
import sys

def load_blast(blastout, hasHeaders=False):
	if hasHeaders:
		blastpDF = pd.read_csv(blastout, sep = "\t")
	else:
		blastpDF = pd.read_csv(blastout, sep = "\t", names = ["qseqid","sseqid","pident","length","mismatch",			"gapopen","qstart","qend","sstart","send","evalue","bitscore"])
	return blastpDF

def load_hmm(hmmFile):
    with pyhmmer.plan7.HMMFile(hmmFile) as h:
        hmm = next(h)
    return hmm

def build_hmm(alnFile):
  abc = pyhmmer.easel.Alphabet.amino()
  builder = pyhmmer.plan7.Builder(alphabet=abc)

  with pyhmmer.easel.MSAFile(alnFile) as msa_file:
    msa_file.set_digital(abc)
    msa = next(msa_file)
  # MSA must have a name, otherwise building will fail
  if msa.name is None:
    msa.name = b"alignment"
  builder = pyhmmer.plan7.Builder(abc)
  background = pyhmmer.plan7.Background(abc)
  hmm, _, _ = builder.build_msa(msa, background)

  return hmm

def run_hmmsearch(queryFile, hmmFile):
	hmm = load_hmm(hmmFile)
	with pyhmmer.easel.SequenceFile(queryFile) as seq_file:
		sequences = [ seq.digitize(hmm.alphabet) for seq in seq_file ]
	pipeline = pyhmmer.plan7.Pipeline(hmm.alphabet)
	hits = pipeline.search_hmm(hmm, sequences) # Has lots of goodies!
	
	return hits , hmm.name.decode() # [hit.name.decode() for hit in hmmerOut]

def hmmsearch(queryFile, hmm):
	
  with pyhmmer.easel.SequenceFile(queryFile) as seq_file:
    sequences = [ seq.digitize(hmm.alphabet) for seq in seq_file ]
  pipeline = pyhmmer.plan7.Pipeline(hmm.alphabet)
  hits = pipeline.search_hmm(hmm, sequences)
  return hits

SAMPLES, = glob_wildcards("{sample}.fna")

threads_max = workflow.cores * 0.75

if threads_max <1:
	threads_max = 1

# sys.exit("HE:P!!!!!!!!" + srcdir('config.yaml'))
configfile: srcdir('config.yaml')

rule final:
	input:
		config["outdir"] + "/03_best_hits/best_immunity_protein_candidates.csv",
		config["outdir"] + "/03_best_hits/best_hit_contigs.csv"
		# "cinfulOut/03_best_hits/best_hits.csv"
		# expand("cinfulOut/02_homology_results/{sample}.best_hits.csv", sample = SAMPLES)
		# expand("cinfulOut/02_homology_results/{sample}.all_merged.csv", sample = SAMPLES)




include: "rules/setup_dbs.smk"
include: "rules/filter_input.smk"
include: "rules/prodigal.smk"
include: "rules/microcin.smk"
include: "rules/immunity_protein.smk"
include: "rules/CvaB.smk"
include: "rules/duomolog_components.smk"
include: "rules/best_hits.smk"
