#ifndef PREFILTERING_H
#define PREFILTERING_H

#include <mmseqs/commons/baseMatrix.h>
#include <mmseqs/commons/dBReader.h>
#include <mmseqs/prefiltering/indexTable.h>
#include <mmseqs/commons/parameters.h>
#include <mmseqs/prefiltering/prefilteringIndexReader.h>
#include <mmseqs/prefiltering/queryMatcher.h>
#include <mmseqs/commons/scoreMatrix.h>
#include <mmseqs/output.h>

#include <list>
#include <string>
#include <utility>

class Prefiltering {
 public:
  Prefiltering(mmseqs_output *out, const std::string &queryDB,
               const std::string &queryDBIndex, const std::string &targetDB,
               const std::string &targetDBIndex, int querySeqType,
               int targetSeqType, const Parameters &par);

  ~Prefiltering();

  void runAllSplits(mmseqs_output *out, const std::string &resultDB,
                    const std::string &resultDBIndex);

#ifdef HAVE_MPI
  void runMpiSplits(mmseqs_output *out, const std::string &resultDB,
                    const std::string &resultDBIndex,
                    const std::string &localTmpPath, const int runRandomId);
#endif

  int runSplits(mmseqs_output *out, const std::string &resultDB,
                const std::string &resultDBIndex, size_t fromSplit,
                size_t splitProcessCount, bool merge);

  // merge file
  void mergePrefilterSplits(
      mmseqs_output* out,
      const std::string &outDb, const std::string &outDBIndex,
      const std::vector<std::pair<std::string, std::string>> &splitFiles);

  // get substitution matrix
  static BaseMatrix *getSubstitutionMatrix(
      mmseqs_output* out,
      const MultiParam<char *> &scoringMatrixFile, MultiParam<int> alphabetSize,
      float bitFactor, bool profileState, bool isNucl);

  static void setupSplit(mmseqs_output* out, DBReader<unsigned int> &dbr, const int alphabetSize,
                         const unsigned int querySeqType, const int threads,
                         const bool templateDBIsIndex, const size_t memoryLimit,
                         const size_t qDbSize, size_t &maxResListLen,
                         int &kmerSize, int &split, int &splitMode);

  static int getKmerThreshold(mmseqs_output* out, const float sensitivity, const bool isProfile,
                              const int kmerScore, const int kmerSize);

  static void mergeTargetSplits(
      mmseqs_output* out,
      const std::string &outDB, const std::string &outDBIndex,
      const std::vector<std::pair<std::string, std::string>> &fileNames,
      unsigned int threads);

 private:
  const std::string queryDB;
  const std::string queryDBIndex;
  const std::string targetDB;
  const std::string targetDBIndex;
  DBReader<unsigned int> *qdbr;
  DBReader<unsigned int> *tdbr;
  DBReader<unsigned int> *tidxdbr;
  bool sameQTDB;

  BaseMatrix *kmerSubMat;
  BaseMatrix *ungappedSubMat;
  ScoreMatrix _2merSubMatrix;
  ScoreMatrix _3merSubMatrix;
  IndexTable *indexTable;
  SequenceLookup *sequenceLookup;

  // parameter
  int splits;
  int kmerSize;
  std::string spacedKmerPattern;
  std::string localTmp;
  bool spacedKmer;
  int alphabetSize;
  bool templateDBIsIndex;
  int maskMode;
  int maskLowerCaseMode;
  int splitMode;
  int kmerThr;
  MultiParam<char *> scoringMatrixFile;
  MultiParam<char *> seedScoringMatrixFile;
  int targetSeqType;
  bool takeOnlyBestKmer;
  size_t maxResListLen;

  const int kmerScore;
  const float sensitivity;
  size_t maxSeqLen;
  int querySeqType;
  const unsigned int diagonalScoring;
  const unsigned int minDiagScoreThr;
  bool aaBiasCorrection;
  const float covThr;
  const int covMode;
  const bool includeIdentical;
  int preloadMode;
  const unsigned int threads;
  int compressed;

  bool runSplit(mmseqs_output *out, const std::string &resultDB,
                const std::string &resultDBIndex, size_t split, bool merge);

  // compute kmer size and split size for index table
  static std::pair<int, int> optimizeSplit(mmseqs_output* out, size_t totalMemoryInByte,
                                           DBReader<unsigned int> *tdbr,
                                           int alphabetSize, int kmerSize,
                                           unsigned int querySeqType,
                                           unsigned int threads);

  // estimates memory consumption while runtime
  static size_t estimateMemoryConsumption(
      int split, size_t dbSize, size_t resSize, size_t maxHitsPerQuery,
      int alphabetSize, int kmerSize, unsigned int querySeqType, int threads);

  static size_t estimateHDDMemoryConsumption(size_t dbSize,
                                             size_t maxResListLen);

  ScoreMatrix getScoreMatrix(mmseqs_output* out, const BaseMatrix &matrix, const size_t kmerSize);

  // needed for index lookup
  void getIndexTable(mmseqs_output *out, int split, size_t dbFrom,
                     size_t dbSize);

  void printStatistics(mmseqs_output* out, const statistics_t &stats, std::list<int> **reslens,
                       unsigned int resLensSize, size_t empty,
                       size_t maxResults);

  bool isSameQTDB(mmseqs_output* out);
};

#endif
