//------------------------------------------------------------------------------
// Copyright 2018-2022 H2O.ai
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//------------------------------------------------------------------------------
#include <atomic>       // std::atomic
#include <cmath>        // std::isinf, std::sqrt
#include <limits>       // std::numeric_limits
#include <type_traits>  // std::is_floating_point
#include "column.h"
#include "column/column_impl.h"
#include "datatablemodule.h"
#include "lib/parallel_hashmap/phmap.h"
#include "ltype.h"
#include "models/murmurhash.h"
#include "parallel/api.h"
#include "parallel/shared_mutex.h"
#include "python/_all.h"
#include "python/string.h"
#include "rowindex.h"
#include "sort.h"
#include "stats.h"
#include "stype.h"
#include "utils/assert.h"
#include "utils/misc.h"


//------------------------------------------------------------------------------
// enum Stat helpers
//------------------------------------------------------------------------------

static const char* stat_name(Stat s) {
  switch (s) {
    case Stat::NaCount: return "NaCount";
    case Stat::Sum:     return "Sum";
    case Stat::Mean:    return "Mean";
    case Stat::StDev:   return "StDev";
    case Stat::Skew:    return "Skew";
    case Stat::Kurt:    return "Kurt";
    case Stat::Min:     return "Min";
    case Stat::Qt25:    return "Qt25";
    case Stat::Median:  return "Median";
    case Stat::Qt75:    return "Qt75";
    case Stat::Max:     return "Max";
    case Stat::Mode:    return "Mode";
    case Stat::NModal:  return "NModal";
    case Stat::NUnique: return "NUnique";
  }
  throw RuntimeError() << "Unknown stat " << static_cast<int>(s);
}



//------------------------------------------------------------------------------
// main Stats class
//------------------------------------------------------------------------------

Stats::Stats(const dt::ColumnImpl* col) : column(col) {
  xassert(col);
}

Stats::~Stats() {}


void Stats::reset() {
  _computed.reset();
  _valid.reset();
}

bool Stats::is_computed(Stat stat) const {
  return _computed.test(static_cast<size_t>(stat));
}

bool Stats::is_valid(Stat stat) const {
  return _valid.test(static_cast<size_t>(stat));
}

void Stats::set_valid(Stat stat, bool isvalid) {
  _computed.set(static_cast<size_t>(stat), true);
  _valid.set(static_cast<size_t>(stat), isvalid);
}


size_t VoidStats::memory_footprint() const noexcept {
  return sizeof(VoidStats);
}

template <typename T>
size_t NumericStats<T>::memory_footprint() const noexcept {
  return sizeof(NumericStats<T>);
}

size_t StringStats::memory_footprint() const noexcept {
  return sizeof(StringStats);
}

size_t PyObjectStats::memory_footprint() const noexcept {
  return sizeof(PyObjectStats);
}




//------------------------------------------------------------------------------
// Stats getters (generic)
//------------------------------------------------------------------------------

template <typename T>
static T _invalid(bool* isvalid) {
  if (isvalid) *isvalid = false;
  return T();
}


int64_t Stats::get_stat_int(Stat stat, bool* isvalid) {
  switch (stat) {
    case Stat::Sum:  return sum_int(isvalid);
    case Stat::Min:  return min_int(isvalid);
    case Stat::Max:  return max_int(isvalid);
    case Stat::Mode: return mode_int(isvalid);
    default:         return _invalid<int64_t>(isvalid);
  }
}


size_t Stats::get_stat_uint(Stat stat, bool* isvalid) {
  switch (stat) {
    case Stat::NaCount: return nacount(isvalid);
    case Stat::NUnique: return nunique(isvalid);
    case Stat::NModal:  return nmodal(isvalid);
    default:            return _invalid<size_t>(isvalid);
  }
}


double Stats::get_stat_double(Stat stat, bool* isvalid) {
  switch (stat) {
    case Stat::Sum:   return sum_double(isvalid);
    case Stat::Mean:  return mean(isvalid);
    case Stat::StDev: return stdev(isvalid);
    case Stat::Skew:  return skew(isvalid);
    case Stat::Kurt:  return kurt(isvalid);
    case Stat::Min:   return min_double(isvalid);
    case Stat::Max:   return max_double(isvalid);
    case Stat::Mode:  return mode_double(isvalid);
    default:          return _invalid<double>(isvalid);
  }
}


dt::CString Stats::get_stat_string(Stat stat, bool* isvalid) {
  switch (stat) {
    case Stat::Mode: return mode_string(isvalid);
    default:         return _invalid<dt::CString>(isvalid);
  }
}


bool Stats::get_stat(Stat stat, int64_t* out) {
  bool ret;
  *out = get_stat_int(stat, &ret);
  return ret;
}

bool Stats::get_stat(Stat stat, size_t* out) {
  bool ret;
  *out = get_stat_uint(stat, &ret);
  return ret;
}

bool Stats::get_stat(Stat stat, double* out) {
  bool ret;
  *out = get_stat_double(stat, &ret);
  return ret;
}

bool Stats::get_stat(Stat stat, dt::CString* out)  {
  bool ret;
  *out = get_stat_string(stat, &ret);
  return ret;
}




//------------------------------------------------------------------------------
// Stats getters (specific)
//------------------------------------------------------------------------------

void Stats::_fill_validity_flag(Stat stat, bool* isvalid) {
  if (isvalid) *isvalid = is_valid(stat);
}

size_t Stats::nacount(bool* isvalid) {
  if (!is_computed(Stat::NaCount)) compute_nacount();
  _fill_validity_flag(Stat::NaCount, isvalid);
  return _countna;
}

size_t Stats::nunique(bool* isvalid) {
  if (!is_computed(Stat::NUnique)) compute_nunique();
  _fill_validity_flag(Stat::NUnique, isvalid);
  return _nunique;
}

size_t Stats::nmodal(bool* isvalid) {
  if (!is_computed(Stat::NModal)) compute_sorted_stats();
  _fill_validity_flag(Stat::NModal, isvalid);
  return _nmodal;
}


double Stats::mean (bool* isvalid) { return _invalid<double>(isvalid); }
double Stats::stdev(bool* isvalid) { return _invalid<double>(isvalid); }
double Stats::skew (bool* isvalid) { return _invalid<double>(isvalid); }
double Stats::kurt (bool* isvalid) { return _invalid<double>(isvalid); }


template <typename T>
double NumericStats<T>::mean(bool* isvalid) {
  if (!is_computed(Stat::Mean)) compute_moments12();
  _fill_validity_flag(Stat::Mean, isvalid);
  return _mean;
}

template <typename T>
double NumericStats<T>::stdev(bool* isvalid) {
  if (!is_computed(Stat::StDev)) compute_moments12();
  _fill_validity_flag(Stat::StDev, isvalid);
  return _stdev;
}

template <typename T>
double NumericStats<T>::skew(bool* isvalid) {
  if (!is_computed(Stat::Skew)) compute_moments34();
  _fill_validity_flag(Stat::Skew, isvalid);
  return _skew;
}

template <typename T>
double NumericStats<T>::kurt(bool* isvalid) {
  if (!is_computed(Stat::Kurt)) compute_moments34();
  _fill_validity_flag(Stat::Kurt, isvalid);
  return _kurt;
}

int64_t Stats::min_int    (bool* isvalid) { return _invalid<int64_t>(isvalid); }
int64_t Stats::max_int    (bool* isvalid) { return _invalid<int64_t>(isvalid); }
int64_t Stats::mode_int   (bool* isvalid) { return _invalid<int64_t>(isvalid); }
int64_t Stats::sum_int    (bool* isvalid) { return _invalid<int64_t>(isvalid); }
double  Stats::min_double (bool* isvalid) { return _invalid<double>(isvalid); }
double  Stats::max_double (bool* isvalid) { return _invalid<double>(isvalid); }
double  Stats::mode_double(bool* isvalid) { return _invalid<double>(isvalid); }
double  Stats::sum_double (bool* isvalid) { return _invalid<double>(isvalid); }
dt::CString Stats::mode_string(bool* isvalid) { return _invalid<dt::CString>(isvalid); }

template <typename T>
int64_t NumericStats<T>::min_int(bool* isvalid) {
  if (!std::is_integral<T>::value) return _invalid<int64_t>(isvalid);
  if (!is_computed(Stat::Min)) compute_minmax();
  _fill_validity_flag(Stat::Min, isvalid);
  xassert((std::is_same<decltype(_min), int64_t>::value));
  return static_cast<int64_t>(_min);
}

template <typename T>
int64_t NumericStats<T>::max_int(bool* isvalid) {
  if (!std::is_integral<T>::value) return _invalid<int64_t>(isvalid);
  if (!is_computed(Stat::Max)) compute_minmax();
  _fill_validity_flag(Stat::Max, isvalid);
  xassert((std::is_same<decltype(_max), int64_t>::value));
  return static_cast<int64_t>(_max);
}

template <typename T>
double NumericStats<T>::min_double(bool* isvalid) {
  if (!std::is_floating_point<T>::value) return _invalid<double>(isvalid);
  if (!is_computed(Stat::Min)) compute_minmax();
  _fill_validity_flag(Stat::Min, isvalid);
  xassert((std::is_same<decltype(_min), double>::value));
  return static_cast<double>(_min);
}

template <typename T>
double NumericStats<T>::max_double(bool* isvalid) {
  if (!std::is_floating_point<T>::value) return _invalid<double>(isvalid);
  if (!is_computed(Stat::Max)) compute_minmax();
  _fill_validity_flag(Stat::Max, isvalid);
  xassert((std::is_same<decltype(_max), double>::value));
  return static_cast<double>(_max);
}

template <typename T>
int64_t NumericStats<T>::mode_int(bool* isvalid) {
  if (!std::is_integral<T>::value) return _invalid<int64_t>(isvalid);
  if (!is_computed(Stat::Mode)) compute_sorted_stats();
  _fill_validity_flag(Stat::Mode, isvalid);
  xassert((std::is_same<decltype(_mode), int64_t>::value));
  return static_cast<int64_t>(_mode);
}

template <typename T>
double NumericStats<T>::mode_double(bool* isvalid) {
  if (!std::is_floating_point<T>::value) return _invalid<double>(isvalid);
  if (!is_computed(Stat::Mode)) compute_sorted_stats();
  _fill_validity_flag(Stat::Mode, isvalid);
  xassert((std::is_same<decltype(_mode), double>::value));
  return static_cast<double>(_mode);
}


template <typename T>
int64_t NumericStats<T>::sum_int(bool* isvalid) {
  if (!is_computed(Stat::Sum)) compute_moments12();
  _fill_validity_flag(Stat::Sum, isvalid);
  return static_cast<int64_t>(_sum);
}


template <typename T>
double NumericStats<T>::sum_double(bool* isvalid) {
  if (!is_computed(Stat::Sum)) compute_moments12();
  _fill_validity_flag(Stat::Sum, isvalid);
  return static_cast<double>(_sum);
}


dt::CString StringStats::mode_string(bool* isvalid) {
  if (!is_computed(Stat::Mode)) compute_sorted_stats();
  _fill_validity_flag(Stat::Mode, isvalid);
  return dt::CString(mode_);
}




//------------------------------------------------------------------------------
// Stats setters (generic)
//------------------------------------------------------------------------------

void Stats::set_stat(Stat stat, int64_t value, bool isvalid) {
  switch (stat) {
    case Stat::Min:  return set_min(value, isvalid);
    case Stat::Max:  return set_max(value, isvalid);
    case Stat::Mode: return set_mode(value, isvalid);
    case Stat::Sum:  return set_sum(value, isvalid);
    default: throw RuntimeError() << "Incorrect stat " << stat_name(stat);
  }
}

void Stats::set_stat(Stat stat, size_t value, bool isvalid) {
  switch (stat) {
    case Stat::NaCount: return set_nacount(value, isvalid);
    case Stat::NUnique: return set_nunique(value, isvalid);
    case Stat::NModal:  return set_nmodal(value, isvalid);
    default: throw RuntimeError() << "Incorrect stat " << stat_name(stat);
  }
}

void Stats::set_stat(Stat stat, double value, bool isvalid) {
  switch (stat) {
    case Stat::Sum:   return set_sum(value, isvalid);
    case Stat::Mean:  return set_mean(value, isvalid);
    case Stat::StDev: return set_stdev(value, isvalid);
    case Stat::Skew:  return set_skew(value, isvalid);
    case Stat::Kurt:  return set_kurt(value, isvalid);
    case Stat::Min:   return set_min(value, isvalid);
    case Stat::Max:   return set_max(value, isvalid);
    case Stat::Mode:  return set_mode(value, isvalid);
    default: throw RuntimeError() << "Incorrect stat " << stat_name(stat);
  }
}

void Stats::set_stat(Stat stat, const dt::CString& value, bool isvalid) {
  switch (stat) {
    case Stat::Mode: return set_mode(value, isvalid);
    default: throw RuntimeError() << "Incorrect stat " << stat_name(stat);
  }
}




//------------------------------------------------------------------------------
// Stats setters (specific)
//------------------------------------------------------------------------------

void Stats::set_nacount(size_t value, bool isvalid) {
  xassert(isvalid);
  _countna = value;
  set_valid(Stat::NaCount, isvalid);
}

void Stats::set_nunique(size_t value, bool isvalid) {
  _nunique = value;
  set_valid(Stat::NUnique, isvalid);
}

void Stats::set_nmodal(size_t value, bool isvalid) {
  _nmodal = value;
  set_valid(Stat::NModal, isvalid);
}

void Stats::set_sum(double, bool)   { throw RuntimeError(); }
void Stats::set_sum(int64_t, bool)  { throw RuntimeError(); }
void Stats::set_mean(double, bool)  { throw RuntimeError(); }
void Stats::set_stdev(double, bool) { throw RuntimeError(); }
void Stats::set_skew(double, bool)  { throw RuntimeError(); }
void Stats::set_kurt(double, bool)  { throw RuntimeError(); }
void Stats::set_min(int64_t, bool)  { throw RuntimeError(); }
void Stats::set_min(double, bool)   { throw RuntimeError(); }
void Stats::set_max(int64_t, bool)  { throw RuntimeError(); }
void Stats::set_max(double, bool)   { throw RuntimeError(); }
void Stats::set_mode(int64_t, bool) { throw RuntimeError(); }
void Stats::set_mode(double, bool)  { throw RuntimeError(); }
void Stats::set_mode(const dt::CString&, bool) { throw RuntimeError(); }


template <typename T>
void NumericStats<T>::set_sum(int64_t value, bool isvalid) {
  xassert((std::is_same<V, int64_t>::value));
  _sum = static_cast<V>(value);
  set_valid(Stat::Sum, isvalid);
}

template <typename T>
void NumericStats<T>::set_sum(double value, bool isvalid) {
  xassert((std::is_same<V, double>::value));
  _sum = static_cast<V>(value);
  set_valid(Stat::Sum, isvalid);
}

template <typename T>
void NumericStats<T>::set_mean(double value, bool isvalid) {
  _mean = value;
  set_valid(Stat::Mean, isvalid);
}

template <typename T>
void NumericStats<T>::set_stdev(double value, bool isvalid) {
  _stdev = value;
  set_valid(Stat::StDev, isvalid);
}

template <typename T>
void NumericStats<T>::set_skew(double value, bool isvalid) {
  _skew = value;
  set_valid(Stat::Skew, isvalid);
}

template <typename T>
void NumericStats<T>::set_kurt(double value, bool isvalid) {
  _kurt = value;
  set_valid(Stat::Kurt, isvalid);
}

template <typename T>
void NumericStats<T>::set_min(int64_t value, bool isvalid) {
  xassert((std::is_same<V, int64_t>::value));
  _min = static_cast<V>(value);
  set_valid(Stat::Min, isvalid);
}

template <typename T>
void NumericStats<T>::set_min(double value, bool isvalid) {
  xassert((std::is_same<V, double>::value));
  _min = static_cast<V>(value);
  set_valid(Stat::Min, isvalid);
}

template <typename T>
void NumericStats<T>::set_max(int64_t value, bool isvalid) {
  xassert((std::is_same<V, int64_t>::value));
  _max = static_cast<V>(value);
  set_valid(Stat::Max, isvalid);
}

template <typename T>
void NumericStats<T>::set_max(double value, bool isvalid) {
  xassert((std::is_same<V, double>::value));
  _max = static_cast<V>(value);
  set_valid(Stat::Max, isvalid);
}

template <typename T>
void NumericStats<T>::set_mode(int64_t value, bool isvalid) {
  xassert((std::is_same<V, int64_t>::value));
  _mode = static_cast<V>(value);
  set_valid(Stat::Mode, isvalid);
}

template <typename T>
void NumericStats<T>::set_mode(double value, bool isvalid) {
  xassert((std::is_same<V, double>::value));
  _mode = static_cast<V>(value);
  set_valid(Stat::Mode, isvalid);
}

void StringStats::set_mode(const dt::CString& value, bool isvalid) {
  mode_ = value.to_string();
  set_valid(Stat::Mode, isvalid);
}




//------------------------------------------------------------------------------
// Stats computation: NaCount
//------------------------------------------------------------------------------

template <typename T>
static size_t _compute_nacount(const dt::ColumnImpl* col) {
  xassert(col->type().can_be_read_as<T>());
  size_t n = col->nrows();
  if (n <= 32) {
    T target;
    size_t countna = 0;
    for (size_t i = 0; i < n; ++i) {
      bool isvalid = col->get_element(i, &target);
      countna += !isvalid;
    }
    return countna;
  }
  else {
    std::atomic<size_t> total_countna { 0 };
    dt::parallel_region(
      dt::NThreads(col->allow_parallel_access()),
      [&] {
        T target;
        size_t thread_countna = 0;
        dt::nested_for_static(n,
          [&](size_t i) {
            bool isvalid = col->get_element(i, &target);
            thread_countna += !isvalid;
          });
        total_countna += thread_countna;
      });
    return total_countna.load();
  }
}

void Stats::compute_nacount() { throw NotImplError(); }

template <typename T>
void NumericStats<T>::compute_nacount() {
  set_nacount(_compute_nacount<T>(column), true);
}

void BooleanStats::compute_nacount() {
  compute_all_stats();
}

void StringStats::compute_nacount() {
  set_nacount(_compute_nacount<dt::CString>(column), true);
}

void PyObjectStats::compute_nacount() {
  set_nacount(_compute_nacount<py::oobj>(column), true);
}




//------------------------------------------------------------------------------
// Stats computation: Min + Max
//------------------------------------------------------------------------------

template<typename T>
constexpr T infinity() {
  return std::numeric_limits<T>::has_infinity
         ? std::numeric_limits<T>::infinity()
         : std::numeric_limits<T>::max();
}

void Stats::compute_minmax() {
  set_valid(Stat::Min, false);
  set_valid(Stat::Max, false);
}

void BooleanStats::compute_minmax() {
  compute_all_stats();
}

template <typename T>
void NumericStats<T>::compute_minmax() {
  xassert(column->type().can_be_read_as<T>());
  size_t nrows = column->nrows();
  size_t count_valid = 0;
  T min = infinity<T>();
  T max = -infinity<T>();
  std::mutex mutex;
  dt::parallel_region(
    dt::NThreads(column->allow_parallel_access()),
    [&] {
      size_t t_count_notna = 0;
      T t_min = infinity<T>();
      T t_max = -infinity<T>();

      dt::nested_for_static(nrows,
        [&](size_t i) {
          T x;
          bool isvalid = column->get_element(i, &x);
          if (!isvalid) return;
          t_count_notna++;
          if (x < t_min) t_min = x;  // Note: these ifs are not exclusive!
          if (x > t_max) t_max = x;
        });

      if (t_count_notna) {
        std::lock_guard<std::mutex> lock(mutex);
        count_valid += t_count_notna;
        if (t_min < min) min = t_min;
        if (t_max > max) max = t_max;
      }
    });
  set_nacount(nrows - count_valid, true);
  set_min(static_cast<V>(min), (count_valid > 0));
  set_max(static_cast<V>(max), (count_valid > 0));
}





//------------------------------------------------------------------------------
// Stats computation: NUnique
//------------------------------------------------------------------------------

void Stats::compute_nunique() {
  set_valid(Stat::NUnique, false);
}


template <typename T>
void NumericStats<T>::compute_nunique() {
  compute_sorted_stats();
}


struct StrHasher {
  size_t operator()(const dt::CString& s) const {
    return hash_murmur2(s.data(), s.size());
  }
};

struct StrEqual {
  bool operator()(const dt::CString& lhs, const dt::CString& rhs) const {
    return (lhs == rhs);
  }
};

void BooleanStats::compute_nunique() {
  compute_all_stats();
}

void StringStats::compute_nunique() {
  dt::shared_bmutex rwmutex;
  phmap::parallel_flat_hash_set<dt::CString, StrHasher, StrEqual> values_seen;

  size_t batch_size = 8;
  size_t nbatches = (column->nrows() + batch_size - 1) / batch_size;
  dt::parallel_for_dynamic(
    nbatches,
    [&](size_t i) {
      size_t j0 = i * batch_size;
      size_t j1 = std::min(j0 + batch_size, column->nrows());
      dt::CString cstr;
      for (size_t j = j0; j < j1; ++j) {
        bool isvalid = column->get_element(j, &cstr);
        if (!isvalid) continue;
        {
          dt::shared_lock<dt::shared_bmutex> lock(rwmutex, false);
          if (values_seen.contains(cstr)) continue;
        }
        {
          dt::shared_lock<dt::shared_bmutex> lock(rwmutex, true);
          values_seen.insert(std::move(cstr));
        }
      }
    });
  set_nunique(values_seen.size());
}




//------------------------------------------------------------------------------
// Stats computation: Sum + Mean + StDev
//------------------------------------------------------------------------------

void Stats::compute_moments12() {
  set_valid(Stat::Sum, false);
  set_valid(Stat::Mean, false);
  set_valid(Stat::StDev, false);
}


/**
 * Standard deviation and mean computations are done using Welford's method.
 * In particular, if m1[n-1] is the mean of n-1 observations x[1]...x[n-1], then
 *
 *     m1[n] = 1/n * (x[1] + x[2] + ... + x[n-1] + x[n])
 *           = 1/n * ((n-1)*m1[n-1] + x[n])
 *           = m1[n-1] + (x[n] - m1[n-1])/n
 *           = m1[n-1] + delta1/n
 *
 * Similarly, for the second central moment:
 *
 *     M2[n] = (x[1] - m1[n])^2 + ... + (x[n] - m1[n])^2
 *           = x[1]^2 + ... + x[n]^2 - 2*m1[n]*(x[1]+...+x[n]) + n*m1[n]^2
 *           = (x[1]^2 + ... + x[n]^2) - n*m1[n]^2
 *           = M2[n-1] + (n-1)*m1[n-1]^2 + x[n]^2 - n*m1[n]^2
 *           = M2[n-1] + x[n]^2 - m1[n-1]^2 - n*(m1[n]^2 - m1[n-1]^2)
 *           = M2[n-1] + delta1*(x[n] + m1[n-1]) - delta1*(m1[n] + m1[n-1])
 *           = M2[n-1] + delta1*(x[n] - m1[n])
 *           = M2[n-1] + delta1*delta2
 *
 * where `delta1 = x[n] - m1[n-1]` and `delta2 = x[n] - m1[n]`.
 *
 * References:
 * [Pebay2008] P. Pébay. Formulas for Robust, One-Pass Parallel Computation of
 *             Covariances and Arbitrary-Order Statistical Moments. Sandia
 *             Report, 2008.
 *             https://prod-ng.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf
 */
template <typename T>
void NumericStats<T>::compute_moments12() {
  size_t nrows = column->nrows();
  size_t count = 0;
  bool has_pos_inf = false;
  bool has_neg_inf = false;
  V sum = 0;
  double mean = 0;
  double M2 = 0;

  std::mutex mutex;
  dt::parallel_region(
    dt::NThreads(column->allow_parallel_access()),
    [&] {
      size_t t_count = 0;
      bool t_has_pos_inf = false;
      bool t_has_neg_inf = false;
      V t_sum = 0;
      double t_mean = 0.0;
      double t_M2 = 0.0;

      dt::nested_for_static(nrows,
        [&](size_t i) {
          T value;
          bool isvalid = column->get_element(i, &value);
          if (!isvalid) return;
          double x = static_cast<double>(value);
          t_count++;
          t_sum += static_cast<V>(value);
          double delta1 = x - t_mean;
          t_mean += delta1 / static_cast<double>(t_count);
          double delta2 = x - t_mean;
          t_M2 += delta1 * delta2;
          if (std::is_floating_point<T>::value) {
            t_has_pos_inf |= (value == std::numeric_limits<T>::infinity());
            t_has_neg_inf |= (value == -std::numeric_limits<T>::infinity());
          }
        });

      if (t_count) {
        std::lock_guard<std::mutex> lock(mutex);
        size_t n1 = count;
        size_t n2 = t_count;
        size_t n = n1 + n2;
        double delta21 = t_mean - mean;
        count = n;
        sum += t_sum;
        mean += delta21 * static_cast<double>(n2) / static_cast<double>(n);
        M2 += t_M2 + delta21 * delta21
                       * static_cast<double>(n1) / static_cast<double>(n)
                       * static_cast<double>(n2);
        has_pos_inf |= t_has_pos_inf;
        has_neg_inf |= t_has_neg_inf;
      }
    });

  size_t n = count;
  bool sum_valid = true;
  bool mean_valid = (n > 0);
  bool stdev_valid = (n > 0);
  double s = n > 1 ? std::sqrt(M2 / static_cast<double>(n - 1)) : 0.0;

  // Adjustment for the case when some of the `x[i]`s were infinite.
  if (std::is_floating_point<T>::value && (has_pos_inf || has_neg_inf)) {
    constexpr double nan = std::numeric_limits<double>::quiet_NaN();
    constexpr double inf = std::numeric_limits<double>::infinity();
    s = nan;
    stdev_valid = false;
    if (has_pos_inf && has_neg_inf) {
      sum = mean = s = nan;
      sum_valid = mean_valid = false;
    }
    else if (has_pos_inf) {
      sum = mean = inf;
    }
    else if (has_neg_inf) {
      sum = mean = -inf;
    }
    if (has_pos_inf) set_max(inf, true);
    if (has_neg_inf) set_min(-inf, true);
  }

  set_nacount(nrows - n, true);
  set_sum(sum, sum_valid);
  set_mean(mean, mean_valid);
  set_stdev(s, stdev_valid);
}

void BooleanStats::compute_moments12() {
  compute_all_stats();
}




//------------------------------------------------------------------------------
// Stats computation: Skew + Kurt
//------------------------------------------------------------------------------

void Stats::compute_moments34() {
  set_valid(Stat::Skew, false);
  set_valid(Stat::Kurt, false);
}


/**
 * The [Pebay 2008] paper linked above gives formulas for parallel computation
 * of third and fourth central moments too:
 *
 * delta = x[n] - m1[n-1]
 * M3[n] = M3[n-1] + (n-1)*(n-2)*delta^3/n^2 - 3*M2[n-1]*delta/n
 * M4[n] = M4[n-1] + (n-1)*(n^2 - 3n + 3)*delta^4/n^3 + 6*M2[n-1]*delta^2/n^2
 *         -4*M3[n-1]*delta/n
 */
template <typename T>
void NumericStats<T>::compute_moments34() {
  size_t nrows = column->nrows();
  size_t count = 0;
  V sum = 0;          // x[1] + ... + x[n]
  double mean = 0.0;  // sum / n
  double M2 = 0.0;    // (x[1] - mean)^2 + ... (x[n] - mean)^2
  double M3 = 0.0;    // (x[1] - mean)^3 + ... (x[n] - mean)^3
  double M4 = 0.0;    // (x[1] - mean)^4 + ... (x[n] - mean)^4

  std::mutex mutex;
  dt::parallel_region(
    dt::NThreads(column->allow_parallel_access()),
    [&] {
      size_t t_count = 0;
      V t_sum = 0;
      double t_mean = 0.0;
      double t_M2 = 0.0;
      double t_M3 = 0.0;
      double t_M4 = 0.0;

      dt::nested_for_static(nrows,
        [&](size_t i) {
          T value;
          bool isvalid = column->get_element(i, &value);
          if (!isvalid) return;
          ++t_count;
          double x = static_cast<double>(value);
          double n = static_cast<double>(t_count);
          double delta = x - t_mean;                // δ
          double gamma = delta / n;                 // δ/n
          double beta = gamma * gamma;              // δ²/n²
          double alpha = delta * gamma * (n - 1);   // δ²(n-1)/n
          t_sum += static_cast<V>(value);
          t_mean += gamma;
          t_M4 += (alpha * (n*n - 3*n + 3) + 6*t_M2) * beta - 4*gamma * t_M3;
          t_M3 += (alpha * (n - 2) - 3*t_M2) * gamma;
          t_M2 += alpha;
        });

      if (t_count) {
        std::lock_guard<std::mutex> lock(mutex);
        const double nx = static_cast<double>(count);
        const double ny = static_cast<double>(t_count);
        const double n = nx + ny;
        const double delta = t_mean - mean;
        const double gamma = delta / n;
        const double beta  = gamma * gamma;
        const double alpha = delta * delta * nx*ny/n;
        const double M2x = M2;
        const double M2y = t_M2;
        const double M3x = M3;
        const double M3y = t_M3;
        const double M4x = M4;
        const double M4y = t_M4;

        count += t_count;
        sum += t_sum;
        mean += gamma * ny;
        M2 = M2x + M2y + alpha;
        M3 = M3x + M3y + alpha * (nx - ny) * gamma
             + 3.0 * (nx * M2y - ny * M2x) * gamma;
        M4 = M4x + M4y + alpha * beta * (nx*nx - nx*ny + ny*ny)
             + 6 * beta * (nx*nx * M2y + ny*ny * M2x)
             + 4 * gamma * (nx * M3y - ny * M3x);
      }
    });

  double n = static_cast<double>(count);
  double s = (count > 1) ? std::sqrt(M2 / (n - 1)) : 0.0;
  double G = (count > 2) ? M3 / std::pow(s, 3) * n /(n-1) /(n-2) : 0.0;
  double K = (count > 3) ? (M4 / std::pow(s, 4) * n*(n+1)
                           - 3.0*(n-1)*(n-1)*(n-1)) /(n-1) /(n-2) /(n-3) : 0.0;

  set_nacount(nrows - count, true);
  set_sum(sum, true);
  set_mean(mean, (count > 0));
  set_stdev(s, (count > 0));
  set_skew(G, (count > 0));
  set_kurt(K, (count > 0));
}


void BooleanStats::compute_moments34() {
  compute_all_stats();
}



//------------------------------------------------------------------------------
// Stats computation: Mode, NModal
//------------------------------------------------------------------------------

void Stats::compute_sorted_stats() {
  set_valid(Stat::Mode, false);
  set_valid(Stat::NModal, false);
}


template <typename T>
void NumericStats<T>::compute_sorted_stats() {
  auto r = group({Column(column->clone())}, {SortFlag::NONE});
  RowIndex ri   = std::move(r.first);
  Groupby grpby = std::move(r.second);
  if (column->nrows() == 0) {
    grpby = Groupby::single_group(0);
  }

  const int32_t* groups = grpby.offsets_r();
  size_t n_groups = grpby.size();
  xassert(n_groups >= 1);

  // Sorting gathers all NA elements at the top (in the first group). Thus if
  // we did not yet compute the NA count for the column, we can do so now by
  // checking whether the elements in the first group are NA or not.
  if (!is_computed(Stat::NaCount)) {
    T x0;
    size_t ri0;
    bool isvalid = (ri.size() == 0) ||
                   (ri.get_element(0, &ri0) &&
                    column->get_element(ri0, &x0));
    set_nacount(isvalid? 0 : static_cast<size_t>(groups[1]));
  }

  bool has_nas = (_countna > 0);
  bool grp_empty = (n_groups == 1) && (groups[1] == 0);
  set_nunique(n_groups - has_nas - grp_empty, true);

  size_t max_group_size = 0;
  size_t largest_group_index = 0;
  for (size_t i = has_nas; i < n_groups; ++i) {
    size_t grpsize = static_cast<size_t>(groups[i + 1] - groups[i]);
    if (grpsize > max_group_size) {
      max_group_size = grpsize;
      largest_group_index = i;
    }
  }

  size_t ig = static_cast<size_t>(groups[largest_group_index]);
  T mode_value {};
  size_t ri_ig;
  bool mode_valid = max_group_size &&
                    ri.get_element(ig, &ri_ig) &&
                    column->get_element(ri_ig, &mode_value);
  set_mode(static_cast<V>(mode_value), mode_valid);
  set_nmodal(max_group_size, true);
}


void StringStats::compute_sorted_stats() {
  auto r = group({Column(column->clone())}, {SortFlag::NONE});
  RowIndex ri   = std::move(r.first);
  Groupby grpby = std::move(r.second);
  if (column->nrows() == 0) {
    grpby = Groupby::single_group(0);
  }

  const int32_t* groups = grpby.offsets_r();
  size_t n_groups = grpby.size();
  xassert(n_groups >= 1);

  // Sorting gathers all NA elements at the top (in the first group). Thus if
  // we did not yet compute the NA count for the column, we can do so now by
  // checking whether the elements in the first group are NA or not.
  if (!is_computed(Stat::NaCount)) {
    dt::CString x0;
    size_t ri0;
    bool isvalid = (ri.size() == 0) ||
                   (ri.get_element(0, &ri0) &&
                    column->get_element(ri0, &x0));
    set_nacount(isvalid? 0 : static_cast<size_t>(groups[1]));
  }

  bool has_nas = (_countna > 0);
  bool grp_empty = (n_groups == 1) && (groups[1] == 0);
  set_nunique(n_groups - has_nas - grp_empty, true);

  size_t max_group_size = 0;
  size_t largest_group_index = 0;
  for (size_t i = has_nas; i < n_groups; ++i) {
    size_t grpsize = static_cast<size_t>(groups[i + 1] - groups[i]);
    if (grpsize > max_group_size) {
      max_group_size = grpsize;
      largest_group_index = i;
    }
  }

  size_t ig = static_cast<size_t>(groups[largest_group_index]);
  dt::CString mode_value;
  size_t ri_ig;
  bool mode_valid = max_group_size &&
                    ri.get_element(ig, &ri_ig) &&
                    column->get_element(ri_ig, &mode_value);
  set_mode(mode_value, mode_valid);
  set_nmodal(max_group_size, true);
}


void BooleanStats::compute_sorted_stats() {
  compute_all_stats();
}




//------------------------------------------------------------------------------
// BooleanStats: compute all
//------------------------------------------------------------------------------

/**
 * For boolean column, all statistics can be computed from just two
 * quantities: count of 0s `n0`, and count of 1s `n1`. Then:
 *
 *     n = n0 + n1
 *     µ = n1 / n
 *     s^2 = n0*n1 / (n*(n-1))
 *     G = (n0 - n1) / ((n-2) * s)
 *     K = (n-1)/((n-2)(n-3)) * ((n+1)*(n0^2 + n0*n1 + n1^2)/(n0*n1) - 3(n-1))
 */
void BooleanStats::compute_all_stats() {
  size_t nrows = column->nrows();
  std::atomic<size_t> count_all { 0 };
  std::atomic<size_t> count_1 { 0 };

  dt::parallel_region(
    dt::NThreads(column->allow_parallel_access()),
    [&] {
      size_t t_count_all = 0;
      size_t t_count_1 = 0;

      dt::nested_for_static(nrows,
        [&](size_t i) {
          int8_t x;
          bool isvalid = column->get_element(i, &x);
          if (!isvalid) return;
          t_count_all++;
          t_count_1 += static_cast<size_t>(x);
        });

      count_all += t_count_all;
      count_1 += t_count_1;
    });
  size_t n = count_all.load();
  size_t n1 = count_1.load();
  size_t n0 = n - n1;
  set_nacount(nrows - n, true);
  set_all_stats(n0, n1);
}


void BooleanStats::set_all_stats(size_t n0, size_t n1) {
  double n0d = static_cast<double>(n0);
  double n1d = static_cast<double>(n1);
  double n = n0d + n1d;
  bool valid = (n > 0);
  double mu = valid ? n1d / n : 0.0;
  double s = (n > 1) ? std::sqrt(n0d * n1d / n / (n-1)) : 0.0;
  double G = (n > 2) ? (n0d - n1d) / (n-2) / s : 0.0;
  double K = (n > 3) ? ((n0d*n0d - n0d*n1d + n1d*n1d) * (n+1)/n0d/n1d - 3.0*(n-1))
                       * (n-1) / (n-2) / (n-3) : 0.0;

  set_nunique((n0 > 0) + (n1 > 0), true);
  set_nmodal(std::max(n0, n1), true);
  set_sum(static_cast<int64_t>(n1), true);
  set_mean(mu, valid);
  set_stdev(s, valid);
  set_skew(G, valid);
  set_kurt(K, valid);
  set_min(int64_t(n0? 0 : 1), valid);
  set_max(int64_t(n1? 1 : 0), valid);
  set_mode(int64_t(n0>=n1? 0 : 1), valid);
}



//------------------------------------------------------------------------------
// VoidStats: compute all
//------------------------------------------------------------------------------

int64_t VoidStats::sum_int (bool* isvalid) {
  if (isvalid) *isvalid = true;
  return 0;
}

double VoidStats::sum_double (bool* isvalid) {
  if (isvalid) *isvalid = true;
  return 0.0;
}

size_t VoidStats::nacount(bool* isvalid) {
  if (isvalid) *isvalid = true;
  return column->nrows();
}

size_t VoidStats::nunique(bool* isvalid) {
  if (isvalid) *isvalid = true;
  return 0;
}

size_t VoidStats::nmodal(bool* isvalid) {
  if (isvalid) *isvalid = true;
  return 0;
}



//------------------------------------------------------------------------------
// Column's API
//------------------------------------------------------------------------------

static std::unique_ptr<Stats> _make_stats(const dt::ColumnImpl* col) {
  using StatsPtr = std::unique_ptr<Stats>;
  switch (col->data_stype()) {
    case dt::SType::VOID:    return StatsPtr(new VoidStats(col));
    case dt::SType::BOOL:    return StatsPtr(new BooleanStats(col));
    case dt::SType::INT8:    return StatsPtr(new IntegerStats<int8_t>(col));
    case dt::SType::INT16:   return StatsPtr(new IntegerStats<int16_t>(col));
    case dt::SType::DATE32:  return StatsPtr(new DateStats(col));
    case dt::SType::INT32:   return StatsPtr(new IntegerStats<int32_t>(col));
    case dt::SType::TIME64:  return StatsPtr(new TimeStats(col));
    case dt::SType::INT64:   return StatsPtr(new IntegerStats<int64_t>(col));
    case dt::SType::FLOAT32: return StatsPtr(new RealStats<float>(col));
    case dt::SType::FLOAT64: return StatsPtr(new RealStats<double>(col));
    case dt::SType::STR32:
    case dt::SType::STR64:   return StatsPtr(new StringStats(col));
    case dt::SType::OBJ:     return StatsPtr(new PyObjectStats(col));
    default:
      throw NotImplError()
        << "Cannot create Stats object for a column with type `"
        << col->stype() << '`';
  }
}

Stats* Column::stats() const {
  return impl_->stats();
}

Stats* dt::ColumnImpl::stats() const {
  if (!stats_) stats_ = _make_stats(this);
  return stats_.get();
}

Stats* Column::get_stats_if_exist() const {
  return impl_->stats_.get();
}


void Column::reset_stats() {
  auto stats = get_stats_if_exist();
  if (stats) stats->reset();
}


std::unique_ptr<Stats> Column::clone_stats() const {
  return (impl_->stats_)? impl_->stats_->clone()
                        : std::unique_ptr<Stats>();
}


void Column::replace_stats(std::unique_ptr<Stats>&& new_stats) {
  new_stats->column = impl_;
  impl_->stats_ = std::move(new_stats);
}


bool Column::is_stat_computed(Stat stat) const {
  auto stats = get_stats_if_exist();
  return stats? stats->is_computed(stat) : false;
}




//------------------------------------------------------------------------------
// Stats cloning
//------------------------------------------------------------------------------

template <typename S>
std::unique_ptr<Stats> Stats::_clone(const S* inp) const {
  S* out = new S(inp->column);
  std::memcpy(static_cast<void*>(out),
              static_cast<const void*>(inp),
              sizeof(S));
  xassert(out->_valid == inp->_valid);
  xassert(out->_computed == inp->_computed);
  return std::unique_ptr<Stats>(out);
}


std::unique_ptr<Stats> VoidStats::clone()       const { return this->_clone(this); }
template <typename T>
std::unique_ptr<Stats> RealStats<T>::clone()    const { return this->_clone(this); }
template <typename T>
std::unique_ptr<Stats> IntegerStats<T>::clone() const { return this->_clone(this); }

std::unique_ptr<Stats> DateStats::clone() const { return this->_clone(this); }
std::unique_ptr<Stats> TimeStats::clone() const { return this->_clone(this); }

std::unique_ptr<Stats> BooleanStats::clone()    const { return this->_clone(this); }
std::unique_ptr<Stats> PyObjectStats::clone()   const { return this->_clone(this); }

// StringStats contains a `std::string` object, which cannot be copied
// using memcpy.
std::unique_ptr<Stats> StringStats::clone() const {
  auto res = std::make_unique<StringStats>(column);
  res->_computed = this->_computed;
  res->_valid    = this->_valid;
  res->_countna  = this->_countna;
  res->_nunique  = this->_nunique;
  res->_nmodal   = this->_nmodal;
  res->mode_     = this->mode_;  // copy string
  return std::move(res);
}



//------------------------------------------------------------------------------
// Stats integrity checks
//------------------------------------------------------------------------------

template <typename T>
inline T _tol(T a, T b, T tol) {
  return std::max(tol * std::max(std::abs(a), std::abs(b)), tol);
}

template <typename T>
inline bool _equal(T a, T b) { return a == b; }

template<>
inline bool _equal(float a, float b) {
  // Equality check is needed to ensure that inf==inf
  return (a == b) || (std::abs(a - b) < _tol(a, b, 1e-7f));
}

template<>
inline bool _equal(double a, double b) {
  return (a == b) || (std::abs(a - b) < _tol(a, b, 1e-12));
}

template <typename T>
static void check_stat(Stat stat, Stats* curr_stats, Stats* new_stats) {
  if (!curr_stats->is_computed(stat)) return;
  T value1, value2;
  bool isvalid1 = curr_stats->get_stat(stat, &value1);
  bool isvalid2 = new_stats->get_stat(stat, &value2);
  if (isvalid1 != isvalid2) {
    throw AssertionError() << "Stat " << stat_name(stat) << " is recorded as "
      "valid=" << isvalid1 << " in the Stats object, but was valid=" << isvalid2
      << " upon re-checking";
  }
  if (isvalid1 && !_equal<dt::ref_t<T>>(value1, value2)) {
    throw AssertionError() << "Stat " << stat_name(stat) << "'s value is "
      << value1 << ", but it was " << value2 << " upon recalculation";
  }
}

void Stats::verify_integrity(const dt::ColumnImpl* col) {
  XAssert(column == col);
  switch (col->stype()) {
    case dt::SType::VOID:    XAssert(dynamic_cast<VoidStats*>(this)); break;
    case dt::SType::BOOL:    XAssert(dynamic_cast<BooleanStats*>(this)); break;
    case dt::SType::INT8:    XAssert(dynamic_cast<IntegerStats<int8_t>*>(this)); break;
    case dt::SType::INT16:   XAssert(dynamic_cast<IntegerStats<int16_t>*>(this)); break;
    case dt::SType::DATE32:  XAssert(dynamic_cast<DateStats*>(this)); break;
    case dt::SType::INT32:   XAssert(dynamic_cast<IntegerStats<int32_t>*>(this)); break;
    case dt::SType::TIME64:  XAssert(dynamic_cast<TimeStats*>(this)); break;
    case dt::SType::INT64:   XAssert(dynamic_cast<IntegerStats<int64_t>*>(this)); break;
    case dt::SType::FLOAT32: XAssert(dynamic_cast<RealStats<float>*>(this)); break;
    case dt::SType::FLOAT64: XAssert(dynamic_cast<RealStats<double>*>(this)); break;
    case dt::SType::STR32:
    case dt::SType::STR64:   XAssert(dynamic_cast<StringStats*>(this)); break;
    case dt::SType::OBJ:     XAssert(dynamic_cast<PyObjectStats*>(this)); break;
    default: throw AssertionError() << "Unknown column type " << col->stype();
  }
  auto new_stats = _make_stats(column);
  check_stat<size_t>(Stat::NaCount, this, new_stats.get());
  check_stat<size_t>(Stat::NUnique, this, new_stats.get());
  check_stat<size_t>(Stat::NModal, this, new_stats.get());
  check_stat<double>(Stat::Sum, this, new_stats.get());
  check_stat<double>(Stat::Mean, this, new_stats.get());
  check_stat<double>(Stat::StDev, this, new_stats.get());
  check_stat<double>(Stat::Skew, this, new_stats.get());
  check_stat<double>(Stat::Kurt, this, new_stats.get());
  check_stat<double>(Stat::Min, this, new_stats.get());
  check_stat<double>(Stat::Max, this, new_stats.get());
  check_stat<double>(Stat::Mode, this, new_stats.get());
  check_stat<int64_t>(Stat::Min, this, new_stats.get());
  check_stat<int64_t>(Stat::Max, this, new_stats.get());
  check_stat<int64_t>(Stat::Mode, this, new_stats.get());
  check_stat<dt::CString>(Stat::Mode, this, new_stats.get());
}




//------------------------------------------------------------------------------
// Stats "pyobject" getter
//------------------------------------------------------------------------------

template <typename S>
py::oobj Stats::pywrap_stat(Stat stat) {
  S value;
  bool isvalid = get_stat(stat, &value);
  return isvalid? py::oobj::wrap(value) : py::None();
}


py::oobj Stats::get_stat_as_pyobject(Stat stat) {
  switch (stat) {
    case Stat::NaCount:
    case Stat::NUnique:
    case Stat::NModal: {
      return pywrap_stat<size_t>(stat);
    }
    case Stat::Mean: {
      if (column->type().is_temporal()) {
        double value;
        bool isvalid = get_stat(stat, &value);
        if (!isvalid) return py::None();
        return py::odatetime(static_cast<int64_t>(value));
      } else {
        return pywrap_stat<double>(stat);
      }
    }
    case Stat::StDev:
    case Stat::Skew:
    case Stat::Kurt: {
      return pywrap_stat<double>(stat);
    }
    case Stat::Sum: {
      switch (dt::stype_to_ltype(column->data_stype())) {
        case dt::LType::MU:
        case dt::LType::BOOL:
        case dt::LType::INT:  return pywrap_stat<int64_t>(stat);
        case dt::LType::REAL: return pywrap_stat<double>(stat);
        default: return py::None();
      }
    }
    case Stat::Min:
    case Stat::Max:
    case Stat::Mode: {
      switch (dt::stype_to_ltype(column->data_stype())) {
        case dt::LType::BOOL:
        case dt::LType::INT:  return pywrap_stat<int64_t>(stat);
        case dt::LType::REAL: return pywrap_stat<double>(stat);
        case dt::LType::STRING: return pywrap_stat<dt::CString>(stat);
        case dt::LType::DATETIME: {
          int64_t value;
          bool isvalid = get_stat(stat, &value);
          if (!isvalid) return py::None();
          if (column->stype() == dt::SType::DATE32) {
            return py::odate(static_cast<int32_t>(value));
          } else {
            return py::odatetime(value);
          }
        }
        default: return py::None();
      }
    }
    default:
      throw NotImplError() << "Cannot handle stat " << stat_name(stat);
  }
}




//------------------------------------------------------------------------------
// Stats "Column" getter
//------------------------------------------------------------------------------

template <typename T>
static Column _make_column(dt::SType stype, T value) {
  Buffer mbuf = Buffer::mem(sizeof(T));
  mbuf.set_element<T>(0, value);
  Column res = Column::new_mbuf_column(1, stype, std::move(mbuf));
  xassert(res.nrows() == 1);
  return res;
}

static Column _make_nacol(dt::SType stype) {
  return Column::new_na_column(1, stype);
}

static Column _make_column_str(const dt::CString& value) {
  using T = uint32_t;
  Buffer mbuf = Buffer::mem(sizeof(T) * 2);
  Buffer strbuf;
  if (value.isna()) {
    mbuf.set_element<T>(0, 0);
    mbuf.set_element<T>(1, dt::GETNA<T>());
  } else {
    size_t len = value.size();
    mbuf.set_element<T>(0, 0);
    mbuf.set_element<T>(1, static_cast<T>(len));
    strbuf.resize(len);
    if (len) {
      std::memcpy(strbuf.wptr(), value.data(), len);
    }
  }
  return Column::new_string_column(1, std::move(mbuf), std::move(strbuf));
}


template <typename S, typename R>
Column Stats::colwrap_stat(Stat stat, dt::SType stype) {
  S value;
  bool isvalid = get_stat(stat, &value);
  return isvalid? _make_column<R>(stype, static_cast<R>(value))
                : _make_nacol(stype);
}

Column Stats::strcolwrap_stat(Stat stat) {
  dt::CString value;
  bool isvalid = get_stat(stat, &value);
  return isvalid? _make_column_str(value)
                : _make_nacol(column->data_stype());
}


Column Stats::get_stat_as_column(Stat stat) {
  switch (stat) {
    case Stat::NaCount:
    case Stat::NUnique:
    case Stat::NModal: return colwrap_stat<size_t, int64_t>(stat, dt::SType::INT64);
    case Stat::Mean: {
      if (column->type().is_temporal()) {
        return colwrap_stat<double, int64_t>(stat, dt::SType::TIME64);
      } else {
        return colwrap_stat<double, double>(stat, dt::SType::FLOAT64);
      }
    }
    case Stat::StDev:
    case Stat::Skew:
    case Stat::Kurt: {
      return colwrap_stat<double, double>(stat, dt::SType::FLOAT64);
    }
    case Stat::Sum: {
      switch (column->data_stype()) {
        case dt::SType::VOID:    return colwrap_stat<int64_t, int64_t>(stat, dt::SType::INT64);
        case dt::SType::BOOL:    return colwrap_stat<int64_t, int64_t>(stat, dt::SType::INT64);
        case dt::SType::INT8:    return colwrap_stat<int64_t, int64_t>(stat, dt::SType::INT64);
        case dt::SType::INT16:   return colwrap_stat<int64_t, int64_t>(stat, dt::SType::INT64);
        case dt::SType::INT32:   return colwrap_stat<int64_t, int64_t>(stat, dt::SType::INT64);
        case dt::SType::INT64:   return colwrap_stat<int64_t, int64_t>(stat, dt::SType::INT64);
        case dt::SType::FLOAT32: return colwrap_stat<double, float>(stat, dt::SType::FLOAT32);
        case dt::SType::FLOAT64: return colwrap_stat<double, double>(stat, dt::SType::FLOAT64);
        default:                 return _make_nacol(dt::SType::FLOAT64);
      }
    }
    case Stat::Min:
    case Stat::Max:
    case Stat::Mode: {
      switch (column->data_stype()) {
        case dt::SType::BOOL:    return colwrap_stat<int64_t, int8_t>(stat, dt::SType::BOOL);
        case dt::SType::INT8:    return colwrap_stat<int64_t, int8_t>(stat, dt::SType::INT8);
        case dt::SType::INT16:   return colwrap_stat<int64_t, int16_t>(stat, dt::SType::INT16);
        case dt::SType::INT32:   return colwrap_stat<int64_t, int32_t>(stat, dt::SType::INT32);
        case dt::SType::INT64:   return colwrap_stat<int64_t, int64_t>(stat, dt::SType::INT64);
        case dt::SType::FLOAT32: return colwrap_stat<double, float>(stat, dt::SType::FLOAT32);
        case dt::SType::FLOAT64: return colwrap_stat<double, double>(stat, dt::SType::FLOAT64);
        case dt::SType::STR32:
        case dt::SType::STR64:   return strcolwrap_stat(stat);
        case dt::SType::DATE32:  return colwrap_stat<int64_t, int32_t>(stat, dt::SType::DATE32);
        case dt::SType::TIME64:  return colwrap_stat<int64_t, int64_t>(stat, dt::SType::TIME64);
        default:                 return _make_nacol(column->data_stype());
      }
    }
    default:
      throw NotImplError();
  }
}


//------------------------------------------------------------------------------
// Date and Time stats
//------------------------------------------------------------------------------

int64_t TimeStats::sum_int (bool* isvalid) { return _invalid<int64_t>(isvalid); }
double TimeStats::sum_double (bool* isvalid) { return _invalid<double>(isvalid); }
double TimeStats::stdev (bool* isvalid) { return _invalid<double>(isvalid); }

int64_t DateStats::sum_int (bool* isvalid) { return _invalid<int64_t>(isvalid); }
double DateStats::sum_double (bool* isvalid) { return _invalid<double>(isvalid); }
double DateStats::stdev (bool* isvalid) { return _invalid<double>(isvalid); }


double DateStats::mean (bool* isvalid) {
  if (!is_computed(Stat::Mean)) {
    compute_moments12();
    // Conversion from days to nanoseconds for further casting to `time64`
    constexpr int64_t NANOSECONDS_PER_DAY = 24LL * 3600LL * 1000000000LL;
    _mean *= NANOSECONDS_PER_DAY;
  }
  _fill_validity_flag(Stat::Mean, isvalid);
  return _mean;
}


//------------------------------------------------------------------------------
// Instantiate templates
//------------------------------------------------------------------------------

template class RealStats<float>;
template class RealStats<double>;
template class IntegerStats<int8_t>;
template class IntegerStats<int16_t>;
template class IntegerStats<int32_t>;
template class IntegerStats<int64_t>;
template class NumericStats<int64_t>;
