/*
 * Copyright (C) 2008-2009 Sébastien Villemot <sebastien.villemot@ens.fr>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <iostream>
#include <cmath>
#include <climits>
#include <cassert>
#include <fstream>
#include <sstream>
#include <iomanip>

#include <gsl/gsl_randist.h>
#include <gsl/gsl_blas.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_qrng.h>

#include "SolutionTester.hh"

SolutionTester::SolutionTester(ModelSolution &modsol_arg, integration_method int_meth_arg, bool dump_arg)
  : modsol(modsol_arg), n(modsol.modspec.n), ny(5*n+1), int_meth(int_meth_arg), dump(dump_arg),
    monomial_deg5_d(n+1),
    monomial_deg5_r(sqrt(1.0+0.5*monomial_deg5_d)),
    monomial_deg5_s(sqrt(0.5+0.25*monomial_deg5_d)),
    monomial_deg5_V(pow(M_PI, monomial_deg5_d/2.0)),
    monomial_deg5_A(2.0/(monomial_deg5_d+2.0)*monomial_deg5_V),
    monomial_deg5_B((4.0-monomial_deg5_d)/(2.0*pow(monomial_deg5_d+2.0, 2.0))*monomial_deg5_V),
    monomial_deg5_D(monomial_deg5_V/pow(monomial_deg5_d+2.0, 2.0))
{
}

// Values from http://mathworld.wolfram.com/Hermite-GaussQuadrature.html
const double SolutionTester::hermite_nodes[HERMITE_NODES_NB] = {
  sqrt((3.0-sqrt(6.0))/2.0),
  -sqrt((3.0-sqrt(6.0))/2.0),
  sqrt((3.0+sqrt(6.0))/2.0),
  -sqrt((3.0+sqrt(6.0))/2.0)
};

const double SolutionTester::hermite_weights[HERMITE_NODES_NB] = {
  sqrt(M_PI)/(4.0*(3.0-sqrt(6.0))),
  sqrt(M_PI)/(4.0*(3.0-sqrt(6.0))),
  sqrt(M_PI)/(4.0*(3.0+sqrt(6.0))),
  sqrt(M_PI)/(4.0*(3.0+sqrt(6.0)))
};

void
SolutionTester::euler_errors_gauss_hermite(const gsl_vector *y_prev, const gsl_vector *y_curr,
                                           const gsl_vector *shocks_curr, gsl_vector *errors)
{
  const int ny = y_prev->size;

  // Compute the expectancy of the forward part of the Euler equation
  gsl_vector *fwd_mean = gsl_vector_alloc(n);
  gsl_vector_set_zero(fwd_mean);

  gsl_vector *shocks_next = gsl_vector_alloc(n+1); 
  gsl_vector *y_next = gsl_vector_alloc(ny);
  gsl_vector *fwd_point = gsl_vector_alloc(n);

  assert(pow(HERMITE_NODES_NB, n+1) <= ((double) INT_MAX));

  /* Iteration over the grid in n+1 dimensions is done using an integer
     which is then interpreted as n+1 digits in base HERMITE_NODES_NB */
  int point = 0;
  while(point < pow(HERMITE_NODES_NB, n+1))
    {
      double weight = 1;
      int idx = point;
      for(int i = 0; i < n+1; i++)
        {
          gsl_vector_set(shocks_next, i, hermite_nodes[idx % HERMITE_NODES_NB]);
          weight *= hermite_weights[idx % HERMITE_NODES_NB] * M_2_SQRTPI / 2.0;
          idx /= HERMITE_NODES_NB;
        }

      gsl_vector_scale(shocks_next, M_SQRT2);
      modsol.policy_func(y_curr, shocks_next, y_next);
      modsol.modspec.forward_part(y_prev, y_curr, y_next, shocks_curr, fwd_point);
      gsl_vector_scale(fwd_point, weight);
      gsl_vector_add(fwd_mean, fwd_point);

      point++;
    }

  // Compute Euler errors for all equations
  modsol.modspec.errors(y_prev, y_curr, fwd_mean, shocks_curr, errors);

  gsl_vector_free(fwd_point);
  gsl_vector_free(y_next);
  gsl_vector_free(shocks_next);
  gsl_vector_free(fwd_mean);
}

void
SolutionTester::euler_errors_monomial_deg5(const gsl_vector *y_prev, const gsl_vector *y_curr,
                                           const gsl_vector *shocks_curr, gsl_vector *errors)
{
  const int ny = y_prev->size;

  // Compute the expectancy of the forward part of the Euler equation
  gsl_vector *fwd_mean = gsl_vector_alloc(n);
  gsl_vector_set_zero(fwd_mean);

  gsl_vector *shocks_next = gsl_vector_alloc(n+1); 
  gsl_vector *y_next = gsl_vector_alloc(ny);
  gsl_vector *fwd_point = gsl_vector_alloc(n);

  // First part of formula (term at origin)
  gsl_vector_set_zero(shocks_next);
  modsol.policy_func(y_curr, shocks_next, y_next);
  modsol.modspec.forward_part(y_prev, y_curr, y_next, shocks_curr, fwd_point);
  gsl_vector_scale(fwd_point, monomial_deg5_A);
  gsl_vector_add(fwd_mean, fwd_point);

  // Second part of formula
  // Index s is 0 or 1, for choosing the sign of the point
  for(int i = 0; i < n+1; i++)
    for(int s = 0; s < 2; s++)
      {
        gsl_vector_set_zero(shocks_next);
        if (s)
          gsl_vector_set(shocks_next, i, M_SQRT2 * monomial_deg5_r);
        else
          gsl_vector_set(shocks_next, i, -M_SQRT2 * monomial_deg5_r);
        modsol.policy_func(y_curr, shocks_next, y_next);
        modsol.modspec.forward_part(y_prev, y_curr, y_next, shocks_curr, fwd_point);
        gsl_vector_scale(fwd_point, monomial_deg5_B);
        gsl_vector_add(fwd_mean, fwd_point);
      }

  // Third part of formula
  // Index s is between 0 and 3: bit 0 determines sign for index i, bit 1 determines sign for index j
  for(int i = 0; i < n; i++)
    for(int j = i+1; j < n+1; j++)
      for(int s = 0; s < 4; s++)
        {
          gsl_vector_set_zero(shocks_next);

          if (s & 1)
            gsl_vector_set(shocks_next, i, M_SQRT2 * monomial_deg5_s);
          else
            gsl_vector_set(shocks_next, i, -M_SQRT2 * monomial_deg5_s);

          if (s & 2)
            gsl_vector_set(shocks_next, j, M_SQRT2 * monomial_deg5_s);
          else
            gsl_vector_set(shocks_next, j, -M_SQRT2 * monomial_deg5_s);

          modsol.policy_func(y_curr, shocks_next, y_next);
          modsol.modspec.forward_part(y_prev, y_curr, y_next, shocks_curr, fwd_point);
          gsl_vector_scale(fwd_point, monomial_deg5_D);
          gsl_vector_add(fwd_mean, fwd_point);
        }

  // Rescale by 1/sqrt(PI)^d
  gsl_vector_scale(fwd_mean, pow(M_PI, -monomial_deg5_d/2.0));

  // Compute Euler errors for all equations
  modsol.modspec.errors(y_prev, y_curr, fwd_mean, shocks_curr, errors);

  gsl_vector_free(fwd_point);
  gsl_vector_free(y_next);
  gsl_vector_free(shocks_next);
  gsl_vector_free(fwd_mean);
}

void
SolutionTester::euler_errors_quasi_monte_carlo(const gsl_vector *y_prev, const gsl_vector *y_curr,
                                               const gsl_vector *shocks_curr, gsl_vector *errors)
{
  const int ny = y_prev->size;

  // Compute the expectancy of the forward part of the Euler equation
  gsl_vector *fwd_mean = gsl_vector_alloc(n);
  gsl_vector_set_zero(fwd_mean);

  gsl_vector *shocks_next = gsl_vector_alloc(n+1); 
  gsl_vector *y_next = gsl_vector_alloc(ny);
  gsl_vector *fwd_point = gsl_vector_alloc(n);

  // Use Niederreiter low-discrepancy sequences
  if (n+1 > 12)
    {
      std::cerr << "SolutionTester::euler_errors_quasi_monte_carlo: too high dimensionality for Niederreiter generator" << std::endl;
      exit(EXIT_FAILURE);
    }
  gsl_qrng *qrng = gsl_qrng_alloc(gsl_qrng_niederreiter_2, n+1);

  for(int i = 0; i < QMC_POINTS_NB; i++)
    {
      gaussian_draw(shocks_next, qrng);
      modsol.policy_func(y_curr, shocks_next, y_next);
      modsol.modspec.forward_part(y_prev, y_curr, y_next, shocks_curr, fwd_point);
      gsl_vector_add(fwd_mean, fwd_point);
    }
  gsl_vector_scale(fwd_mean, 1.0/((double) QMC_POINTS_NB));

  // Compute Euler errors for all equations
  modsol.modspec.errors(y_prev, y_curr, fwd_mean, shocks_curr, errors);

  gsl_qrng_free(qrng);
  gsl_vector_free(fwd_point);
  gsl_vector_free(y_next);
  gsl_vector_free(shocks_next);
  gsl_vector_free(fwd_mean);
}

void
SolutionTester::euler_errors(const gsl_vector *y_prev, const gsl_vector *y_curr,
                             const gsl_vector *shocks_curr, gsl_vector *errors)
{
  assert(y_prev->size == y_curr->size && y_prev->size == errors->size
         && (int) shocks_curr->size == n+1);

#ifdef DEBUG
  // Compare GH and Monomial methods
  gsl_vector *errs2 = gsl_vector_alloc(ny);
  euler_errors_gauss_hermite(y_prev, y_curr, shocks_curr, errors);
  euler_errors_monomial_deg5(y_prev, y_curr, shocks_curr, errs2);
  gsl_vector_sub(errs2, errors);
  std::cout << "Diff:" << std::endl;
  gsl_vector_fprintf(stdout, errs2, "%.10g");
  std::cout << std::endl;
  gsl_vector_free(errs2);
#endif

  switch(int_meth)
    {
    case GaussHermiteAndMonomialDegree5:
      if (n+1 <= MAX_GAUSS_HERMITE_DIM)
        euler_errors_gauss_hermite(y_prev, y_curr, shocks_curr, errors);
      else
        euler_errors_monomial_deg5(y_prev, y_curr, shocks_curr, errors);
      break;
    case GaussHermiteOnly:
      euler_errors_gauss_hermite(y_prev, y_curr, shocks_curr, errors);
      break;
    case MonomialDegree5Only:
      euler_errors_monomial_deg5(y_prev, y_curr, shocks_curr, errors);
      break;
    case QuasiMonteCarloOnly:
      euler_errors_quasi_monte_carlo(y_prev, y_curr, shocks_curr, errors);
      break;
    }
}

void
SolutionTester::simulate_model(gsl_vector *init, gsl_matrix *sims, gsl_matrix *shocks,
                               int warmup, unsigned long rng_seed)
{
  const int T = sims->size1;
  assert((int) shocks->size1 == T && (int) shocks->size2 == n+1 && init->size == sims->size2);

  const int ny = init->size;

  // Initialize Mersenne Twister random number generator
  gsl_rng *rng = gsl_rng_alloc(gsl_rng_mt19937);
  gsl_rng_set(rng, rng_seed);

  // Simulate the initialization period, by updating init
  gsl_vector *y_tmp = gsl_vector_alloc(ny);
  gsl_vector *shocks_tmp = gsl_vector_alloc(n+1);
  while(warmup--)
    {
      for(int j = 0; j < n+1; j++)
        gsl_vector_set(shocks_tmp, j, gsl_ran_ugaussian(rng));

      modsol.policy_func(init, shocks_tmp, y_tmp);
      gsl_vector_memcpy(init, y_tmp);
    }
  gsl_vector_free(shocks_tmp);
  gsl_vector_free(y_tmp);

  // Simulate what we're interested in
  for(int t = 0; t < T; t++)
    {
      for(int j = 0; j < n+1; j++)
        gsl_matrix_set(shocks, t, j, gsl_ran_ugaussian(rng));

      modsol.policy_func(t == 0 ? init : &gsl_matrix_const_row(sims, t-1).vector,
                         &gsl_matrix_const_row(shocks, t).vector,
                         &gsl_matrix_row(sims, t).vector);
    }

  gsl_rng_free(rng);
}

void
SolutionTester::prewhiten(const gsl_matrix *h, gsl_matrix *A, gsl_matrix *resid) throw (SingularityException)
{
  const int T = h->size1;
  const int r = h->size2;

  assert((int) A->size1 == r && (int) A->size2 == r && (int) resid->size1 == T && (int) resid->size2 == r);

  // Compute VAR(1) coefficient matrix
  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1,
                 &gsl_matrix_const_submatrix(h, 1, 0, T-1, r).matrix,
                 &gsl_matrix_const_submatrix(h, 0, 0, T-1, r).matrix,
                 0, A);

  gsl_matrix *B = gsl_matrix_alloc(r, r);
  // We fill in the lower part of B, since GSL's Cholesky uses the lower part
  gsl_blas_dsyrk(CblasLower, CblasTrans, 1,
                 &gsl_matrix_const_submatrix(h, 0, 0, T-1, r).matrix,
                 0, B);

  // This fills both the upper and the lower part of B with the computed triangle
  gsl_error_handler_t *old_handler = gsl_set_error_handler_off();
  int err = gsl_linalg_cholesky_decomp(B);
  gsl_set_error_handler(old_handler);
  switch(err)
    {
    case 0:
      break;
    case GSL_EDOM:
      gsl_matrix_free(B);
      throw SingularityException();
    default:
      std::cerr << gsl_strerror(err);
      exit(EXIT_FAILURE);
    }

  // Right-multiply A by the inverse of B
  gsl_blas_dtrsm(CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, 1, B, A);
  gsl_blas_dtrsm(CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, 1, B, A);

  // Compute VAR(1) residuals
  gsl_matrix_memcpy(resid, h);

  gsl_blas_dgemm(CblasNoTrans, CblasTrans, -1,
                 &gsl_matrix_const_submatrix(h, 0, 0, T-1, r).matrix,
                 A, 1, &gsl_matrix_submatrix(resid, 1, 0, T-1, r).matrix);

  gsl_matrix_free(B);
}

int
SolutionTester::newey_west_bandwith(const gsl_vector *w, const gsl_matrix *resid)
{
  const int T = resid->size1;
  const int r = resid->size2;

  assert((int) w->size == r);

  const int nn = (int) floor(4.0*pow(((double) T) / 100.0, 2.0/9.0));

#ifdef DEBUG
  //  std::cout << "nn = " << nn << std::endl;
#endif

  gsl_vector *wh = gsl_vector_alloc(T);
  gsl_blas_dgemv(CblasNoTrans, 1, resid, w, 0, wh);

  double *sigma = new double[nn+1];
  for(int j = 0; j <= nn; j++)
    {
      sigma[j] = 0;
      for(int t = j+1; t < T; t++)
        sigma[j] += gsl_vector_get(wh, t) * gsl_vector_get(wh, t-j);
      sigma[j] /= (double) (T-1);
    }

  double s1 = 0;
  double s0 = sigma[0];
  for(int j = 1; j <= nn; j++)
    {
      s1 += 2.0*((double) j)*sigma[j];
      s0 += 2.0*sigma[j];
    }

#ifdef DEBUG
  //  std::cout << "s0 = " << s0 << ", s1 = " << s1 << std::endl;
#endif
  const double gamma = 1.1447 * pow(pow(s1/s0, 2.0), 1.0/3.0);
#ifdef DEBUG
  //  std::cout << "gamma = " << gamma << std::endl;
#endif

  delete[] sigma;
  gsl_vector_free(wh);

  return ((int) floor(gamma * pow((double) T, 1.0/3.0)));
}

void
SolutionTester::inv_newey_west_estimator(const gsl_matrix *kron, gsl_matrix *inv_estim) throw (SingularityException)
{
#ifdef DEBUG
  //  std::cout << "Prewhitening..." << std::endl;
#endif
  assert(kron->size2 == inv_estim->size1 && inv_estim->size1 == inv_estim->size2);

  const int T = kron->size1;
  const int nk = kron->size2;

  // Prewithen the elements of kron
  gsl_matrix *A = gsl_matrix_alloc(nk, nk);
  gsl_matrix *resid = gsl_matrix_alloc(T, nk);
  prewhiten(kron, A, resid);

  // Compute bandwith m (for weight vector w=(1 1 ... 1)')
  gsl_vector *w = gsl_vector_alloc(nk);
  gsl_vector_set_all(w, 1);
  int m = newey_west_bandwith(w, resid);
#ifdef DEBUG
  std::cout << "bandwith=" << m;
#endif

  if (m >= T-1)
    {
#ifdef DEBUG
      std::cout << ", (too big, diminishing it)";
#endif
      m = T-2;
    }

  // Computes estimator for whitened residuals
  gsl_matrix *S = gsl_matrix_alloc(nk, nk);
  gsl_matrix_set_zero(S);
  
  gsl_matrix *omega = gsl_matrix_alloc(nk, nk);
  for(int j = 0; j <= m; j++)
    {
      gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0/(((double) T) - 1.0),
                     &gsl_matrix_submatrix(resid, j+1, 0, T-1-j, nk).matrix,
                     &gsl_matrix_submatrix(resid, 1, 0, T-1-j, nk).matrix,
                     0, omega);

      if (j == 0)
        gsl_matrix_add(S, omega);
      else
        {
          gsl_matrix_scale(omega, 1.0 - ((double) j)/(((double) m)+1.0));
          gsl_matrix_add(S, omega);
          gsl_matrix_transpose(omega);
          gsl_matrix_add(S, omega);
        }
    }

  gsl_matrix_free(resid);
  gsl_matrix_free(omega);
  gsl_vector_free(w);

#ifdef DEBUG
  //  print_matrix(S);
#endif

  // Put (I-A)' in M
  gsl_matrix *M = gsl_matrix_alloc(nk, nk);
  gsl_matrix_set_identity(M);
  gsl_matrix_sub(M, A);
  gsl_matrix_transpose(M);

  // Right-multiply M by inv(S)
  gsl_error_handler_t *old_handler = gsl_set_error_handler_off();
  int err = gsl_linalg_cholesky_decomp(S);
  gsl_set_error_handler(old_handler);
  switch(err)
    {
    case 0:
      break;
    case GSL_EDOM:
      gsl_matrix_free(M);
      gsl_matrix_free(S);
      gsl_matrix_free(A);
      throw SingularityException();
    default:
      std::cerr << gsl_strerror(err);
      exit(EXIT_FAILURE);
    }
  gsl_blas_dtrsm(CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, 1, S, M);
  gsl_blas_dtrsm(CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, 1, S, M);
 
  // Put I-A in S
  gsl_matrix_set_identity(S);
  gsl_matrix_sub(S, A);

  // Compute final inv_estim = M*S
  gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1, M, S, 0, inv_estim);

  gsl_matrix_free(M);
  gsl_matrix_free(S);
  gsl_matrix_free(A);
}

void
SolutionTester::gaussian_draw(gsl_vector *x, gsl_qrng *qrng)
{
  assert(x->size == qrng->dimension);

  double *rnd = new double[x->size];

  /* Draw a quasi-random sample in the [0,1] hypercube, excluding those on the edges
     and convert it to a gaussian vector using inverse cumulative */
 loop:
  gsl_qrng_get(qrng, rnd);
  for(int j = 0; j < (int) x->size; j++)
    {
      if (rnd[j] == 0.0 || rnd[j] == 1.0)
        goto loop;
      gsl_vector_set(x, j, gsl_cdf_ugaussian_Pinv(rnd[j]));
    }
  delete[] rnd;
}

void
SolutionTester::hypersphere_draw(gsl_vector *x, const gsl_vector *center, 
                                 double radius, gsl_qrng *qrng)
{
  // Draw a non-zero random gaussian vector
  do
    gaussian_draw(x, qrng);
  while(gsl_blas_dnrm2(x) == 0.0);

  gsl_vector_scale(x, radius / gsl_blas_dnrm2(x));
  gsl_vector_add(x, center);
}

void
SolutionTester::write_variables_head(std::ostream &output) const
{
  for(int i = 1; i <= n; i++)
    output << "c_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "l_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "i_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "k_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "a_" << i << ", ";
  output << "lambda, ";
}

void
SolutionTester::write_residuals_head(std::ostream &output) const
{
  for(int i = 1; i <= n; i++)
    output << "MargConsUtil_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "MargLabUtil_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "Euler_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "LawMotionCap_" << i << ", ";
  for(int i = 1; i <= n; i++)
    output << "TFP_" << i << ", ";
  output << "AggRscConst, ";
}

void
SolutionTester::accuracy_test1(int nb_points, double radius, double &max_err, gsl_vector *eq_max_err)
{
  assert(eq_max_err == NULL || (int) eq_max_err->size == ny);

  std::ofstream output;
  if (dump)
    {
      std::ostringstream filename;
      filename << modsol.name() << "-A" << modsol.modspec.spec_no() << "-N" << n << "-test1-r" << radius << ".csv";
      output.open(filename.str().c_str(), std::ios::out | std::ios::binary);
      if (!output.is_open())
        {
          std::cerr << "Can't open file " << filename.str() << " for writing" << std::endl;
          exit(EXIT_FAILURE);
        }

      for(int i = 1; i <= n; i++)
        output << "k_prev_" << i << ", ";
      write_variables_head(output);
      write_residuals_head(output);
      output << std::endl;

      output << std::setprecision(15);
    }

  gsl_vector *y_ss = gsl_vector_alloc(ny);
  gsl_vector *y_prev = gsl_vector_alloc(ny);
  gsl_vector *y_curr = gsl_vector_alloc(ny);
  gsl_vector *shocks = gsl_vector_alloc(n+1);
  gsl_vector *errors = gsl_vector_alloc(ny);

  // The state space has 2*n dimensions, for the variables k_{t-1} and a_t
  gsl_vector *state = gsl_vector_alloc(2*n);

  // Extract the steady state for the k's and the a's
  modsol.modspec.steady_state(y_ss);
  gsl_vector *state_ss = &gsl_vector_subvector(y_ss, 3*n, 2*n).vector;

  // Initialize max errors to NAN
  max_err = NAN;
  if (eq_max_err != NULL)
    gsl_vector_set_all(eq_max_err, NAN);

  /* Use Sobol low-discrepancy sequences (Niederreiter sequences are only valid up
     to 12 dimensions, which is not enough for 8 or 10 countries */
  assert(2*n <= 40);

  gsl_qrng *qrng = gsl_qrng_alloc(gsl_qrng_sobol, 2*n);

  for(int i = 0; i < nb_points; i++)
    {
      hypersphere_draw(state, state_ss, radius, qrng);

      /* Infer y_prev and shocks from this state by supposing that a_t is the result
         of a_{t-1}=1 and null global shock */
      gsl_vector_set_all(y_prev, NAN);
      gsl_vector_set(shocks, n, 0);
      for(int i = 0; i < n; i++)
        {
          gsl_vector_set(y_prev, 3*n+i, gsl_vector_get(state, i));
          gsl_vector_set(y_prev, 4*n+i, gsl_vector_get(state_ss, n+i));
          gsl_vector_set(shocks, i, (log(gsl_vector_get(state, n+i)) - modsol.modspec.rho*log(gsl_vector_get(state_ss, n+i)))/modsol.modspec.sigma);
        }

      // Compute policy function and Euler error
      modsol.policy_func(y_prev, shocks, y_curr);
      euler_errors(y_prev, y_curr, shocks, errors);

      for(int j = 0; j < ny; j++)
        {
          double e = log10(fabs(gsl_vector_get(errors, j)));

          if (std::isnan(max_err) || e > max_err)
            max_err = e;
          if (eq_max_err != NULL && (std::isnan(gsl_vector_get(eq_max_err, j)) || e > gsl_vector_get(eq_max_err, j)))
            gsl_vector_set(eq_max_err, j, e);
        }

      if (dump)
        {
          // Write results
          for(int i = 0; i < n; i++)
            output << gsl_vector_get(state, i) << ", ";
          for(int i = 0; i < ny; i++)
            output << gsl_vector_get(y_curr, i) << ", ";
          for(int i = 0; i < ny; i++)
            output << gsl_vector_get(errors, i) << ", ";
          output << std::endl;
        }
    }

  gsl_vector_free(state);
  gsl_vector_free(errors);
  gsl_vector_free(shocks);
  gsl_vector_free(y_curr);
  gsl_vector_free(y_prev);
  gsl_vector_free(y_ss);
  gsl_qrng_free(qrng);

  if (dump)
    output.close();
}

void
SolutionTester::accuracy_test2(int T, int warmup, double &max_err, double &mean_err,
                               gsl_vector *eq_max_err, unsigned long rng_seed)
{
  assert(eq_max_err == NULL || (int) eq_max_err->size == ny);

  std::ofstream output;
  if (dump)
    {
      std::ostringstream filename;
      filename << modsol.name() << "-A" << modsol.modspec.spec_no() << "-N" << n << "-test2.csv";
      output.open(filename.str().c_str(), std::ios::out | std::ios::binary);
      if (!output.is_open())
        {
          std::cerr << "Can't open file " << filename.str() << " for writing" << std::endl;
          exit(EXIT_FAILURE);
        }

      write_variables_head(output);
      for(int i = 1; i <= n; i++)
        output << "e_" << i << ", ";
      output << "e_global, ";
      write_residuals_head(output);
      output << std::endl;

      output << std::setprecision(15);
    }

  gsl_vector *init = gsl_vector_alloc(ny);

  // Use steady state as starting point for simulations
  modsol.modspec.steady_state(init);

#ifdef DEBUG
  std::cout << "Computing simulations..." << std::endl;
#endif

  gsl_matrix *sims = gsl_matrix_alloc(T, ny);
  gsl_matrix *shocks = gsl_matrix_alloc(T, n+1);
  simulate_model(init, sims, shocks, warmup, rng_seed);

  if (dump)
    {
      // Write initial state
      for(int i = 0; i < ny; i++)
        output << gsl_vector_get(init, i) << ", ";
      for(int i = 0; i < ny+n+1; i++)
        output << ", ";
      output << std::endl;
    }

  max_err = NAN;
  mean_err = 0;
  if (eq_max_err != NULL)
    gsl_vector_set_all(eq_max_err, NAN);

  gsl_vector *errors = gsl_vector_alloc(ny);
  for(int t = 0; t < T; t++)
    {
      euler_errors(t == 0 ? init : &gsl_matrix_row(sims, t-1).vector,
                   &gsl_matrix_row(sims, t).vector,
                   &gsl_matrix_row(shocks, t).vector,
                   errors);

      for(int j = 0; j < ny; j++)
        {
          double ee = fabs(gsl_vector_get(errors, j));

          // Add this error to the mean, except if we are in the labor equation of model A1 or A5
          if ((modsol.modspec.spec_no() != 1 && modsol.modspec.spec_no() != 5) || j < n || j >= 2*n)
            mean_err += ee;

          double e = log10(ee);
          
          if (std::isnan(max_err) || e > max_err)
            max_err = e;

          if (eq_max_err != NULL && (std::isnan(gsl_vector_get(eq_max_err, j)) || e > gsl_vector_get(eq_max_err, j)))
            gsl_vector_set(eq_max_err, j, e);
        }

      if (dump)
        {
          // Write results
          for(int i = 0; i < ny; i++)
            output << gsl_matrix_get(sims, t, i) << ", ";
          for(int i = 0; i <= n; i++)
            output << gsl_matrix_get(shocks, t, i) << ", ";
          for(int i = 0; i < ny; i++)
            output << gsl_vector_get(errors, i) << ", ";
          output << std::endl;
        }
    }

  if (modsol.modspec.spec_no() == 1 || modsol.modspec.spec_no() == 5)
    mean_err = log10(mean_err / (T*(ny-n)));
  else
    mean_err = log10(mean_err / (T*ny));

  gsl_vector_free(errors);
  gsl_matrix_free(sims);
  gsl_matrix_free(shocks);
  gsl_vector_free(init);

  if (dump)
    output.close();
}

gsl_matrix *
SolutionTester::generate_instruments(const gsl_vector *init, const gsl_matrix *sims, bool constant,
                                     bool order1_monomials, bool order2_monomials)
{
  assert(init->size == sims->size2);

  const int T = (int) sims->size1;

  // Compute number of instruments
  int ni = 0;
  if (constant)
    ni++;
  if (order1_monomials)
    ni += 2*n;
  if (order2_monomials)
    ni += n*(2*n+1);

  gsl_matrix *instruments = gsl_matrix_alloc(T, ni);

  int ncol = 0;

  if (constant)
    {
      gsl_vector_set_all(&gsl_matrix_column(instruments, ncol).vector, 1);
      ncol++;
    }

  if (order1_monomials || order2_monomials)
    {
      gsl_matrix *state_vars = gsl_matrix_alloc(T, 2*n);
      // Copy a_t
      gsl_matrix_memcpy(&gsl_matrix_submatrix(state_vars, 0, 0, T, n).matrix,
                        &gsl_matrix_const_submatrix(sims, 0, 4*n, T, n).matrix);
      // Copy k_{t-1}
      gsl_vector_memcpy(&gsl_matrix_subrow(state_vars, 0, n, n).vector,
                        &gsl_vector_const_subvector(init, 3*n, n).vector);
      gsl_matrix_memcpy(&gsl_matrix_submatrix(state_vars, 1, n, T-1, n).matrix,
                        &gsl_matrix_const_submatrix(sims, 0, 3*n, T-1, n).matrix);

      // Monomials of degree 1
      if (order1_monomials)
        {
          gsl_matrix_memcpy(&gsl_matrix_submatrix(instruments, 0, ncol, T, 2*n).matrix,
                            state_vars);
          ncol += 2*n;
        }

      // Monomials of degree 2
      if (order2_monomials)
        {
          gsl_vector *col = gsl_vector_alloc(T);
          for(int i = 0; i < 2*n; i++)
            for(int j = 0; j <= i; j++)
              {
                gsl_matrix_get_col(&gsl_matrix_column(instruments, ncol).vector,
                                   state_vars, i);
                gsl_matrix_get_col(col, state_vars, j);
                gsl_vector_mul(&gsl_matrix_column(instruments, ncol).vector, col);
                ncol++;
              }
          gsl_vector_free(col);
        }

      gsl_matrix_free(state_vars);
    }

  return instruments;
}

void
SolutionTester::den_haan_marcet_stat(int T, int warmup, const equation_blocks_type &equation_blocks,
                                     bool constant, bool order1_monomials,
                                     bool order2_monomials, gsl_vector *pvals, unsigned long rng_seed)
{
  assert(pvals->size == equation_blocks.size());
  assert(constant || order1_monomials || order2_monomials);

  gsl_vector *init = gsl_vector_alloc(ny);

  // Use steady state as starting point for simulations
  modsol.modspec.steady_state(init);

#ifdef DEBUG
  //  std::cout << "Computing simulations..." << std::endl;
#endif

  // Simulate model for T+1 periods (so that we have T forecast errors)
  gsl_matrix *sims = gsl_matrix_alloc(T+1, ny);
  gsl_matrix *shocks = gsl_matrix_alloc(T+1, n+1);
  simulate_model(init, sims, shocks, warmup, rng_seed);

#ifdef DEBUG
  //  print_matrix(sims);
  //  std::cout << "Computing forecast errors..." << std::endl;
#endif

  // Forecast errors, for dates t=1..T
  gsl_matrix *errors = gsl_matrix_alloc(T, ny);
  gsl_vector *fwd = gsl_vector_alloc(n);
  for(int t = 0; t < T; t++)
    {
      modsol.modspec.forward_part(t == 0 ? init : &gsl_matrix_const_row(sims, t-1).vector,
                                  &gsl_matrix_const_row(sims, t).vector,
                                  &gsl_matrix_const_row(sims, t+1).vector,
                                  &gsl_matrix_const_row(shocks, t).vector,
                                  fwd);
      modsol.modspec.errors(t == 0 ? init : &gsl_matrix_const_row(sims, t-1).vector,
                            &gsl_matrix_const_row(sims, t).vector,
                            fwd,
                            &gsl_matrix_const_row(shocks, t).vector,
                            &gsl_matrix_row(errors, t).vector);
    }
  gsl_matrix_free(shocks);
  gsl_vector_free(fwd);

  gsl_matrix *instruments = generate_instruments(init, &gsl_matrix_submatrix(sims, 0, 0, T, ny).matrix,
                                                 constant, order1_monomials, order2_monomials);
  const int ni = (int) instruments->size2;

  gsl_vector_free(init);
  gsl_matrix_free(sims);
#ifdef DEBUG
  //  std::cout << "Instruments..." << std::endl;
  //  gsl_vector_fprintf(stdout, &gsl_matrix_row(instruments, T/2).vector, "%g");
  /*  FILE *fi = fopen("instrum.txt", "w");
  gsl_matrix_fprintf(fi, instruments, "%.16g");
  fclose(fi);*/
#endif

  // Vector with only ones
  gsl_vector *unit = gsl_vector_alloc(T);
  gsl_vector_set_all(unit, 1);

  for(int i = 0; i < (int) pvals->size; i++)
    {
#ifdef DEBUG
      std::cout << " ...for block " << i << ": ";
#endif
      // Select only the forecast errors corresponding to the equation block
      const int neq = equation_blocks[i].size();
      gsl_matrix *errors_sub = gsl_matrix_alloc(T, neq);
      for(int j = 0; j < neq; j++)
        gsl_matrix_get_col(&gsl_matrix_column(errors_sub, j).vector, errors, equation_blocks[i][j]);

#ifdef DEBUG
  /*  FILE *fe = fopen("errors.txt", "w");
  gsl_matrix_fprintf(fe, errors_sub, "%.16g");
  fclose(fe);*/
#endif

      /*
        Create the kronecker product of errors and instruments.
        We use dger(), since for vectors x and y, the matrix output of x*y' is equal to kron(x,y)
        if seen as a vector.
      */
      int nk = neq * ni;
      gsl_matrix *kron = gsl_matrix_alloc(T, nk);
      gsl_matrix_set_zero(kron);
      for(int t = 0; t < T; t++)
        gsl_blas_dger(1, &gsl_matrix_row(errors_sub, t).vector,
                      &gsl_matrix_row(instruments, t).vector,
                      &gsl_matrix_view_vector(&gsl_matrix_row(kron, t).vector, neq, ni).matrix);
      gsl_matrix_free(errors_sub);

#ifdef DEBUG
  /* FILE *f = fopen("kron.txt", "w");
  gsl_matrix_fprintf(f, kron, "%.16g");
  fclose(f); */
#endif

      gsl_matrix *S = gsl_matrix_alloc(nk, nk);
  
      // Compute inverse of Newey-West estimator
      try
        {
          inv_newey_west_estimator(kron, S);
        }
      catch(SingularityException &e)
        {
          gsl_vector_set(pvals, i, NAN);
          gsl_matrix_free(kron);
          gsl_matrix_free(S);
#ifdef DEBUG
          std::cout << "... stat computation failed!" << std::endl;
#endif
          continue;
        }

      gsl_vector *mean = gsl_vector_alloc(nk);
      gsl_vector *buf = gsl_vector_alloc(nk);

      // Compute sample mean
      gsl_blas_dgemv(CblasTrans, 1.0/((double) T), kron, unit, 0, mean);
  
      // Compute test statistics and pval
      double stat;
      gsl_blas_dsymv(CblasUpper, T, S, mean, 0, buf);
      gsl_blas_ddot(mean, buf, &stat);
      gsl_vector_set(pvals, i, gsl_cdf_chisq_P(stat, nk));

#ifdef DEBUG
      std::cout << ", stat=" << stat << ", pval=" << gsl_vector_get(pvals, i) << std::endl;
#endif

      gsl_matrix_free(S);
      gsl_matrix_free(kron);
      gsl_vector_free(buf);
      gsl_vector_free(mean);
    }

  gsl_vector_free(unit);
  gsl_matrix_free(instruments);
  gsl_matrix_free(errors);
}

void
SolutionTester::accuracy_test3(int nstat, int T, int warmup, const equation_blocks_type &equation_blocks,
                               bool constant, bool order1_monomials, bool order2_monomials,
                               const gsl_vector *thresholds, gsl_matrix *freqs, unsigned long rng_seed)
{
  assert(thresholds->size == freqs->size1 && freqs->size2 == equation_blocks.size());

  gsl_matrix_set_zero(freqs);

  gsl_vector *pvals = gsl_vector_alloc(freqs->size2);

  for(int i = 0; i < nstat; i++)
    {
#ifdef DEBUG
      std::cout << " DHM stat " << i << "/" << nstat << "..." << std::endl;
#endif
      den_haan_marcet_stat(T, warmup, equation_blocks, constant, order1_monomials, order2_monomials,
                           pvals, rng_seed + i);
      for(int j = 0; j < (int) freqs->size2; j++)
        {
          // If failed to compute stat for an equation block, enforce NaN result
          if (std::isnan(gsl_vector_get(pvals, j)))
            gsl_vector_set_all(&gsl_matrix_column(freqs, j).vector, NAN);
          else
            for(int k = 0; k < (int) thresholds->size; k++)
              if (gsl_vector_get(pvals, j) < gsl_vector_get(thresholds, k))
                (*gsl_matrix_ptr(freqs, k, j))++;
        }
    }
  gsl_matrix_scale(freqs, 1.0/((double) nstat));

  gsl_vector_free(pvals);
}

#ifdef DEBUG
void
SolutionTester::print_matrix(const gsl_matrix *A)
{
  for(int i = 0; i < (int) A->size1; i++)
    {
      for(int j = 0; j < (int) A->size2; j++)
        std::cout << gsl_matrix_get(A, i, j) << " ";
      std::cout << std::endl;
    }
  std::cout << std::endl;
}

void
SolutionTester::hypersphere_draw_test(int dim, double zmin, double zmax, int ndraw)
{
  gsl_vector *center = gsl_vector_alloc(dim);
  gsl_vector_set_zero(center);

  gsl_vector *x = gsl_vector_alloc(dim);

  gsl_qrng *qrng = gsl_qrng_alloc(gsl_qrng_sobol, dim);

  int *counts = new int[dim];
  for(int i = 0; i < dim; i++)
    counts[i] = 0;

  for(int i = 0; i < ndraw; i++)
    {
      hypersphere_draw(x, center, 1, qrng);
      for(int j = 0; j < dim; j++)
        if (gsl_vector_get(x, j) >= zmin && gsl_vector_get(x, j) <= zmax)
          counts[j]++;
    }

  std::cout << "Counts: ";
  for(int i = 0; i < dim; i++)
    std::cout << counts[i] << " ";
  std::cout << std::endl;

  delete[] counts;
  gsl_qrng_free(qrng);
  gsl_vector_free(x);
  gsl_vector_free(center);
}
#endif
