import pandas as pd
import pyhmmer
from functools import reduce
import os
import sys

def load_blast(blastout, hasHeaders=False):
	if hasHeaders:
		blastpDF = pd.read_csv(blastout, sep = "\t")
	else:
		blastpDF = pd.read_csv(blastout, sep = "\t", names = ["qseqid","sseqid","pident","length","mismatch",			"gapopen","qstart","qend","sstart","send","evalue","bitscore"])
	return blastpDF

def load_hmm(hmmFile):
    with pyhmmer.plan7.HMMFile(hmmFile) as h:
        hmm = next(h)
    return hmm

def build_hmm(alnFile):
  abc = pyhmmer.easel.Alphabet.amino()
  builder = pyhmmer.plan7.Builder(alphabet=abc)

  with pyhmmer.easel.MSAFile(alnFile) as msa_file:
    msa_file.set_digital(abc)
    msa = next(msa_file)
  # MSA must have a name, otherwise building will fail
  if msa.name is None:
    msa.name = b"alignment"
  builder = pyhmmer.plan7.Builder(abc)
  background = pyhmmer.plan7.Background(abc)
  hmm, _, _ = builder.build_msa(msa, background)

  return hmm

def run_hmmsearch(queryFile, hmmFile):
	hmm = load_hmm(hmmFile)
	with pyhmmer.easel.SequenceFile(queryFile) as seq_file:
		sequences = [ seq.digitize(hmm.alphabet) for seq in seq_file ]
	pipeline = pyhmmer.plan7.Pipeline(hmm.alphabet)
	hits = pipeline.search_hmm(hmm, sequences) # Has lots of goodies!
	
	return hits , hmm.name.decode() # [hit.name.decode() for hit in hmmerOut]

def hmmsearch(queryFile, hmm):
	
  with pyhmmer.easel.SequenceFile(queryFile) as seq_file:
    sequences = [ seq.digitize(hmm.alphabet) for seq in seq_file ]
  pipeline = pyhmmer.plan7.Pipeline(hmm.alphabet)
  hits = pipeline.search_hmm(hmm, sequences)
  return hits



threads_max = workflow.cores * 0.75

if threads_max <1:
	threads_max = 1

# sys.exit("HE:P!!!!!!!!" + srcdir('config.yaml'))
configfile: srcdir('config.yaml')

print("Checking samples")
if "SAMPLES" in config:
	if config["SAMPLES"] != []:
		SAMPLES = config["SAMPLES"]
elif glob_wildcards("{sample}.fna") != []:
	SAMPLES, = glob_wildcards("{sample}.fna")	
else:
	sys.exit("No valid FASTA files found, exiting")
# SAMPLES = glob_wildcards("{sample}.fna")
print(SAMPLES)
sys.exit("Samples checked")
rule final:
	input:
		config["outdir"] + "/03_best_hits/best_immunity_protein_candidates.csv",
		config["outdir"] + "/03_best_hits/best_hit_contigs.csv"
		# "cinfulOut/03_best_hits/best_hits.csv"
		# expand("cinfulOut/02_homology_results/{sample}.best_hits.csv", sample = SAMPLES)
		# expand("cinfulOut/02_homology_results/{sample}.all_merged.csv", sample = SAMPLES)




include: "rules/setup_dbs.smk"
include: "rules/filter_input.smk"
include: "rules/prodigal.smk"
include: "rules/microcin.smk"
include: "rules/immunity_protein.smk"
include: "rules/CvaB.smk"
include: "rules/duomolog_components.smk"
include: "rules/best_hits.smk"

# import shutil
# onsuccess:
#     shutil.rmtree(".snakemake")
# rule prodigal:
# 	input:
# 		"{sample}.fna"
# 	output:
# 		"cinfulOut/01_orf_homology/{sample}_prodigal/{sample}.faa"
# 	shell:
# 		"""
# 		snakemake --snakefile ../cinful/rules/prodigal.smk --unlock
# 		snakemake --snakefile ../cinful/rules/prodigal.smk --cores 1 -p
# 		"""

# rule microcin:
# 	input:
# 		"cinfulOut/01_orf_homology/{sample}_prodigal/{sample}.faa"
# 	output:
# 		"cinfulOut/01_orf_homology/{sample}_prodigal/microcins/{sample}.filtered.fa"
# 	shell:
# 		"""
# 		snakemake --snakefile ../cinful/rules/microcin.smk --unlock
# 		snakemake --snakefile ../cinful/rules/microcin.smk --cores 1 -p
# 		"""
# 
# rule immunity_protein:
	# input:
		# "cinfulOut/01_orf_homology/{sample}_prodigal/{sample}.faa"
	# output:
		# "cinfulOut/01_orf_homology/{sample}_prodigal/immunity_proteins/{sample}.filtered.fa"
	# shell:
		# """
		# snakemake --snakefile ../cinful/rules/immunity_protein.smk --unlock
		# snakemake --snakefile ../cinful/rules/immunity_protein.smk --cores 1 -p
		# """

# rule CvaB:
	# input:
		# "cinfulOut/01_orf_homology/{sample}_prodigal/{sample}.faa"
	# output:
		# "cinfulOut/01_orf_homology/{sample}_prodigal/CvaB/{sample}.filtered.fa"
	# shell:
		# """
		# snakemake --snakefile ../cinful/rules/CvaB.smk --unlock
		# snakemake --snakefile ../cinful/rules/CvaB.smk --cores 1 -p
		# """

# rule duomolog:
	# input:
		# expand("cinfulOut/01_orf_homology/{sample}_prodigal/{component}/{sample}.filtered.fa", sample = SAMPLES, component = ["microcins","CvaB","immunity_proteins"])
	# output:
		# "cinfulOut/02_homology_results/{sample}.all_merged.csv"
	# shell:
		# """
		# snakemake  --snakefile ../cinful/rules/duomolog.smk --unlock
		# snakemake  --snakefile ../cinful/rules/duomolog.smk --cores 1 -p
		# """
