From 1ad1292720f89c93e4d8a4046d68028fdee449fe Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 26 Sep 2024 08:27:05 -0400 Subject: [PATCH 01/99] Fix EnergyAndForces tests (#277) * have them work in debug mode too --- src/DensityMatrix.cc | 7 ++++++- src/DensityMatrix.h | 13 +++++++++++-- src/ProjectedMatrices.cc | 1 + tests/WFEnergyAndForces/mgmol.cfg | 2 +- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/DensityMatrix.cc b/src/DensityMatrix.cc index 6e6ea652..e2c7b473 100644 --- a/src/DensityMatrix.cc +++ b/src/DensityMatrix.cc @@ -28,7 +28,7 @@ const double factor_kernel4dot = 10.; template DensityMatrix::DensityMatrix(const int ndim) { - assert(ndim >= 0); + assert(ndim > 0); dim_ = ndim; @@ -45,6 +45,7 @@ DensityMatrix::DensityMatrix(const int ndim) kernel4dot_ = new MatrixType("K4dot", ndim, ndim); work_ = new MatrixType("work", ndim, ndim); occupation_.resize(dim_); + setDummyOcc(); } template @@ -109,6 +110,7 @@ void DensityMatrix::build( const std::vector& occ, const int new_orbitals_index) { assert(dm_ != nullptr); + assert(!occ.empty()); setOccupations(occ); @@ -149,6 +151,8 @@ template void DensityMatrix::setUniform( const double nel, const int new_orbitals_index) { + assert(!occupation_.empty()); + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); const double occ = (double)((double)nel / (double)dim_); if (mmpi.instancePE0()) @@ -314,6 +318,7 @@ void DensityMatrix::computeOccupations(const MatrixType& ls) template void DensityMatrix::setOccupations(const std::vector& occ) { + assert(!occ.empty()); #ifdef PRINT_OPERATIONS MGmol_MPI& mmpi = *(MGmol_MPI::instance()); if (mmpi.instancePE0()) diff --git a/src/DensityMatrix.h b/src/DensityMatrix.h index 960ad18e..84804a86 100644 --- a/src/DensityMatrix.h +++ b/src/DensityMatrix.h @@ -84,17 +84,24 @@ class DensityMatrix *dm_ = mat; orbitals_index_ = orbitals_index; - occupation_.clear(); + setDummyOcc(); occ_uptodate_ = false; uniform_occ_ = false; stripped_ = false; } + // set occupations to meaningless values to catch uninitialized use + void setDummyOcc() + { + for (auto& occ : occupation_) + occ = -1.; + } + void initMatrix(const double* const val) { dm_->init(val, dim_); - occupation_.clear(); + setDummyOcc(); occ_uptodate_ = false; uniform_occ_ = false; @@ -105,8 +112,10 @@ class DensityMatrix void getOccupations(std::vector& occ) const { + assert(!occupation_.empty()); assert(occ_uptodate_); assert((int)occ.size() == dim_); + memcpy(&occ[0], &occupation_[0], dim_ * sizeof(double)); } diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index e0e679e9..3606aa40 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -509,6 +509,7 @@ template void ProjectedMatrices::setOccupations( const std::vector& occ) { + assert(!occ.empty()); #ifdef PRINT_OPERATIONS if (mmpi.instancePE0()) (*MPIdata::sout) << "ProjectedMatrices::setOccupations()" diff --git a/tests/WFEnergyAndForces/mgmol.cfg b/tests/WFEnergyAndForces/mgmol.cfg index d543b626..12002703 100644 --- a/tests/WFEnergyAndForces/mgmol.cfg +++ b/tests/WFEnergyAndForces/mgmol.cfg @@ -18,7 +18,7 @@ pseudopotential=pseudo.H type=QUENCH [Quench] max_steps=50 -atol=1.e-9 +atol=1.e-8 num_lin_iterations=2 [Orbitals] initial_type=Gaussian From 8ac6cd60cd2c06f87c8be6a5806478e316f33c53 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 30 Sep 2024 11:53:23 -0400 Subject: [PATCH 02/99] Move factor 4pi out og linear solvers (#278) --- src/Hartree.cc | 9 ++--- src/Hartree.h | 2 -- src/Hartree_CG.cc | 1 + src/PCGSolver.cc | 59 ++++++++++++++++----------------- src/PCGSolver.h | 52 ++++++++++++++--------------- src/PCGSolver_Diel.cc | 68 +++++++++++++++++++------------------- src/PCGSolver_Diel.h | 30 +++++++++-------- src/pb/Mgm.h | 3 -- src/pb/Solver.h | 7 ++-- src/pb/SolverLap.cc | 77 ++----------------------------------------- src/pb/SolverLap.h | 2 +- src/pb/SolverPB.cc | 5 ++- src/pb/SolverPB.h | 4 +-- 13 files changed, 120 insertions(+), 199 deletions(-) diff --git a/src/Hartree.cc b/src/Hartree.cc index b3984b87..7d0ba4d8 100644 --- a/src/Hartree.cc +++ b/src/Hartree.cc @@ -7,10 +7,8 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id$ #include #include -using namespace std; #include "Control.h" #include "Hartree.h" @@ -82,6 +80,8 @@ void Hartree::solve( // { /* solve with POTDTYPE precision */ pb::GridFunc rhs(work_rho); + // Hartree units + rhs *= (4. * M_PI); poisson_solver_->solve(*Poisson::vh_, rhs); // } // else @@ -92,10 +92,11 @@ void Hartree::solve( double final_residual = poisson_solver_->getFinalResidual(); if (onpe0) - (*MPIdata::sout) << setprecision(2) << scientific + (*MPIdata::sout) << std::setprecision(2) << std::scientific << "Hartree: residual reduction = " << residual_reduction - << ", final residual = " << final_residual << endl; + << ", final residual = " << final_residual + << std::endl; Poisson::Int_vhrho_ = vel * Poisson::vh_->gdot(rho); Poisson::Int_vhrhoc_ = vel * Poisson::vh_->gdot(rhoc); diff --git a/src/Hartree.h b/src/Hartree.h index a7050e24..b87ff01e 100644 --- a/src/Hartree.h +++ b/src/Hartree.h @@ -7,14 +7,12 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id$ #ifndef included_Hartree #define included_Hartree #include "Poisson.h" #include "PoissonInterface.h" -// pb #include "SolverLap.h" template diff --git a/src/Hartree_CG.cc b/src/Hartree_CG.cc index f9bf3860..375237f8 100644 --- a/src/Hartree_CG.cc +++ b/src/Hartree_CG.cc @@ -81,6 +81,7 @@ void Hartree_CG::solve( // { /* solve with POTDTYPE precision */ pb::GridFunc rhs(work_rho); + rhs *= (4. * M_PI); poisson_solver_->solve(*Poisson::vh_, rhs); // } // else diff --git a/src/PCGSolver.cc b/src/PCGSolver.cc index 12103aaa..b3bd7ec3 100644 --- a/src/PCGSolver.cc +++ b/src/PCGSolver.cc @@ -11,14 +11,13 @@ #include #include -using namespace std; -template -void PCGSolver::clear() +template +void PCGSolver::clear() { - for (short i = 0; i < (short)pc_oper_.size(); i++) + for (short i = 0; i < (short)precond_oper_.size(); i++) { - delete pc_oper_[i]; + delete precond_oper_[i]; } for (short i = 0; i < (short)gf_work_.size(); i++) { @@ -35,26 +34,26 @@ void PCGSolver::clear() assert(gf_newv_[i] != nullptr); delete gf_newv_[i]; } - // delete grids after pb::GridFunc objects since those + // delete grids after pb::GridFunc objects since those // have data members references to grids for (short i = 0; i < (short)grid_.size(); i++) { delete grid_[i]; } - pc_oper_.clear(); + precond_oper_.clear(); grid_.clear(); gf_work_.clear(); gf_rcoarse_.clear(); gf_newv_.clear(); } -template -void PCGSolver::setupPrecon() +template +void PCGSolver::setupPrecon() { // check if precon is already setup // Assumes operator does not change, hence // a single setup is sufficient - if (is_pc_setup_) return; + if (is_precond_setup_) return; // fine level pb::Grid* mygrid = new pb::Grid(oper_.grid()); @@ -63,7 +62,7 @@ void PCGSolver::setupPrecon() pb::Lap* myoper = LapFactory::createLap(*grid_[0], lap_type_); - pc_oper_.push_back(myoper); + precond_oper_.push_back(myoper); pb::GridFunc* gf_work = new pb::GridFunc( @@ -92,7 +91,7 @@ void PCGSolver::setupPrecon() pb::Lap* myoper = LapFactory::createLap(*coarse_grid, 1); - pc_oper_.push_back(myoper); + precond_oper_.push_back(myoper); gf_work = new pb::GridFunc( *coarse_grid, bc_[0], bc_[1], bc_[2]); @@ -109,12 +108,13 @@ void PCGSolver::setupPrecon() mygrid = coarse_grid; } - is_pc_setup_ = true; + is_precond_setup_ = true; } // MG V-cycle with no mask -template -void PCGSolver::preconSolve(pb::GridFunc& gf_v, +template +void PCGSolver::preconSolve( + pb::GridFunc& gf_v, const pb::GridFunc& gf_f, const short level) { //(*MPIdata::sout)<<"Preconditioning::mg() at level "<::preconSolve(pb::GridFunc& gf_v, ncycl = 4 > (nu1_ + nu2_) ? 4 : (nu1_ + nu2_); } - pb::Lap* myoper = pc_oper_[level]; + pb::Lap* myoper = precond_oper_[level]; // SMOOTHING for (short it = 0; it < ncycl; it++) @@ -161,25 +161,23 @@ void PCGSolver::preconSolve(pb::GridFunc& gf_v, } // Left Preconditioned CG -template -bool PCGSolver::solve(pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs) +template +bool PCGSolver::solve( + pb::GridFunc& gf_phi, const pb::GridFunc& gf_rhs) { bool converged = false; const pb::Grid& finegrid = gf_phi.grid(); // initial data and residual - We assume a nonzero initial guess - pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); // scale initial guess with epsilon oper_.inv_transform(gf_phi); // compute initial residual: r := b - Ax /* compute Ax */ oper_.apply(gf_phi, lhs); /* set r = b */ - pb::GridFunc res(gf_rhs); + pb::GridFunc res(gf_rhs); oper_.transform(res); - // Hartree units - const double hu = 4. * M_PI; - res *= hu; /* compute r = r - Ax */ res -= lhs; @@ -199,11 +197,11 @@ bool PCGSolver::solve(pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs) /* preconditioning step */ prec_z.setValues(0.); preconSolve(prec_z, prec_res, 0); - pb::GridFunc z(prec_z); + pb::GridFunc z(prec_z); // conjugate vectors - pb::GridFunc p(prec_z); - pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc p(prec_z); + pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); double rtz = res.gdot(z); @@ -251,13 +249,14 @@ bool PCGSolver::solve(pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs) } // Left Preconditioned CG -template -bool PCGSolver::solve(T2* phi, T2* rhs, const char dis) +template +bool PCGSolver::solve( + ScalarType* phi, ScalarType* rhs, const char dis) { - pb::GridFunc gf_phi(oper_.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc gf_phi(oper_.grid(), bc_[0], bc_[1], bc_[2]); gf_phi.assign(phi, dis); - pb::GridFunc gf_work(oper_.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc gf_work(oper_.grid(), bc_[0], bc_[1], bc_[2]); gf_work.assign(rhs, dis); bool converged = solve(gf_phi, gf_work); diff --git a/src/PCGSolver.h b/src/PCGSolver.h index 8ba76158..04e1028f 100644 --- a/src/PCGSolver.h +++ b/src/PCGSolver.h @@ -7,8 +7,8 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifndef _PCG_SOLVER_H_ -#define _PCG_SOLVER_H_ +#ifndef MGMOL_PCG_SOLVER_H +#define MGMOL_PCG_SOLVER_H #include "Control.h" #include "Lap.h" @@ -17,32 +17,41 @@ #include -template +template class PCGSolver { - private: std::vector grid_; short lap_type_; short bc_[3]; bool fully_periodic_; - // operators + + // operator to solve for T oper_; - std::vector*> pc_oper_; + + // preconditioner operator for each MG level + std::vector*> precond_oper_; std::vector*> gf_work_; std::vector*> gf_rcoarse_; std::vector*> gf_newv_; - // solver params + + // solver parameters int maxiters_; double tol_; double final_residual_; double residual_reduction_; - // precon params + + // preconditioner parameters short nu1_; short nu2_; short max_nlevels_; short nlevels_; - bool is_pc_setup_; + bool is_precond_setup_; + + void preconSolve(pb::GridFunc& gf_v, + const pb::GridFunc& gf_f, const short level = 0); + void setupPrecon(); + void clear(); public: PCGSolver(T& oper, const short px, const short py, const short pz) @@ -61,15 +70,10 @@ class PCGSolver bc_[1] = py; bc_[2] = pz; fully_periodic_ = ((bc_[0] == 1) && (bc_[1] == 1) && (bc_[2] == 1)); - // fine grid info - // pb::Grid* mygrid=new pb::Grid(oper.grid()); - // grid_.push_back(mygrid); - // fine grid operator - Control& ct = *(Control::instance()); - lap_type_ = ct.lap_type; - is_pc_setup_ = false; - // pb::Lap* myoper = LapFactory::createLap(*grid_[0],lap_type_); - // pc_oper_.push_back(myoper); + + Control& ct = *(Control::instance()); + lap_type_ = ct.lap_type; + is_precond_setup_ = false; }; void setup(const short nu1, const short nu2, const short max_sweeps, @@ -83,16 +87,10 @@ class PCGSolver setupPrecon(); } - void clear(); - - void setupPrecon(); - - void preconSolve(pb::GridFunc& gf_v, - const pb::GridFunc& gf_f, const short level = 0); - - bool solve(pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs); + bool solve(pb::GridFunc& gf_phi, + const pb::GridFunc& gf_rhs); - bool solve(T2* phi, T2* rhs, const char dis); + bool solve(ScalarType* phi, ScalarType* rhs, const char dis); double getFinalResidual() const { return final_residual_; } double getResidualReduction() const { return residual_reduction_; } diff --git a/src/PCGSolver_Diel.cc b/src/PCGSolver_Diel.cc index 0cdc5aed..4e7c88a9 100644 --- a/src/PCGSolver_Diel.cc +++ b/src/PCGSolver_Diel.cc @@ -9,10 +9,8 @@ #include "PCGSolver_Diel.h" -using namespace std; - -template -void PCGSolver_Diel::clear() +template +void PCGSolver_Diel::clear() { for (short i = 0; i < (short)pc_oper_.size(); i++) { @@ -33,7 +31,7 @@ void PCGSolver_Diel::clear() assert(gf_newv_[i] != nullptr); delete gf_newv_[i]; } - // delete grids after pb::GridFunc objects since those + // delete grids after pb::GridFunc objects since those // have data members references to grids for (short i = 0; i < (short)grid_.size(); i++) { @@ -46,8 +44,8 @@ void PCGSolver_Diel::clear() gf_newv_.clear(); } -template -void PCGSolver_Diel::setupPrecon() +template +void PCGSolver_Diel::setupPrecon() { // fine level pb::Grid* mygrid = new pb::Grid(oper_.grid()); @@ -57,8 +55,8 @@ void PCGSolver_Diel::setupPrecon() T* myoper = new T(oper_); pc_oper_.push_back(myoper); - pb::GridFunc* gf_work - = new pb::GridFunc(*grid_[0], bc_[0], bc_[1], bc_[2]); + pb::GridFunc* gf_work + = new pb::GridFunc(*grid_[0], bc_[0], bc_[1], bc_[2]); gf_work_.push_back(gf_work); // coarse levels @@ -86,24 +84,25 @@ void PCGSolver_Diel::setupPrecon() T* myoper = new T(pc_oper_[ln - 1]->coarseOp(*mygrid)); pc_oper_.push_back(myoper); - gf_work = new pb::GridFunc(*coarse_grid, bc_[0], bc_[1], bc_[2]); + gf_work = new pb::GridFunc( + *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_work_.push_back(gf_work); - pb::GridFunc* gf_rcoarse - = new pb::GridFunc(*coarse_grid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc* gf_rcoarse = new pb::GridFunc( + *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_rcoarse_.push_back(gf_rcoarse); - pb::GridFunc* gf_newv - = new pb::GridFunc(*coarse_grid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc* gf_newv = new pb::GridFunc( + *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_newv_.push_back(gf_newv); mygrid = coarse_grid; } } -template +template // MG V-cycle with no mask -void PCGSolver_Diel::preconSolve( - pb::GridFunc& gf_v, const pb::GridFunc& gf_f, const short level) +void PCGSolver_Diel::preconSolve(pb::GridFunc& gf_v, + const pb::GridFunc& gf_f, const short level) { //(*MPIdata::sout)<<"Preconditioning::mg() at level "<::preconSolve( // COARSE GRID CORRECTION // restrictions - pb::GridFunc* rcoarse = gf_rcoarse_[level]; + pb::GridFunc* rcoarse = gf_rcoarse_[level]; gf_work_[level]->restrict3D(*rcoarse); // storage functions for coarse grid - pb::GridFunc* newv = gf_newv_[level]; + pb::GridFunc* newv = gf_newv_[level]; // call mgrid solver on a coarser level newv->resetData(); @@ -149,15 +148,16 @@ void PCGSolver_Diel::preconSolve( if (bc_[0] != 1 || bc_[2] != 1 || bc_[2] != 1) gf_v.trade_boundaries(); } -template +template // Left Preconditioned CG -bool PCGSolver_Diel::solve( - pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs) +bool PCGSolver_Diel::solve( + pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs) { if (!oper_.initialized()) { - cout << "Error in PCGSolver_Diel::solve: operator not initialized" - << endl; + std::cout + << "Error in PCGSolver_Diel::solve: operator not initialized" + << std::endl; return 0.; } @@ -165,13 +165,13 @@ bool PCGSolver_Diel::solve( const pb::Grid& finegrid = gf_phi.grid(); // initial data and residual - We assume a nonzero initial guess - pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); - pb::GridFunc res(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc res(finegrid, bc_[0], bc_[1], bc_[2]); // scale initial guess with epsilon oper_.inv_transform(gf_phi); // compute initial residual oper_.apply(gf_phi, lhs); - pb::GridFunc rhs(gf_rhs); + pb::GridFunc rhs(gf_rhs); oper_.transform(rhs); // Hartree units rhs *= (4. * M_PI); @@ -180,13 +180,13 @@ bool PCGSolver_Diel::solve( double rnorm = init_rnorm; // preconditioned residual - pb::GridFunc z(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc z(finegrid, bc_[0], bc_[1], bc_[2]); // preconditioning step z = 0.; preconSolve(z, res, 0); // conjugate vectors - pb::GridFunc p(z); - pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc p(z); + pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); double rtz = res.gdot(z); @@ -225,11 +225,11 @@ bool PCGSolver_Diel::solve( return converged; } -template +template // Left Preconditioned CG -bool PCGSolver_Diel::solve(pb::GridFunc& gf_phi, - pb::GridFunc& gf_rhs, pb::GridFunc& gf_rhod, - pb::GridFunc& gf_vks) +bool PCGSolver_Diel::solve(pb::GridFunc& gf_phi, + pb::GridFunc& gf_rhs, pb::GridFunc& gf_rhod, + pb::GridFunc& gf_vks) { // initialize the linear system operator and the preconditioner oper_.init(gf_rhod); diff --git a/src/PCGSolver_Diel.h b/src/PCGSolver_Diel.h index 9fd97914..f2001a4c 100644 --- a/src/PCGSolver_Diel.h +++ b/src/PCGSolver_Diel.h @@ -7,8 +7,8 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifndef _PCG_SOLVER_DIEL_H_ -#define _PCG_SOLVER_DIEL_H_ +#ifndef MGMOL_PCG_SOLVER_DIEL_H_ +#define MGMOL_PCG_SOLVER_DIEL_H_ #include "Control.h" #include "PB.h" @@ -21,7 +21,7 @@ #include -template +template class PCGSolver_Diel { @@ -32,9 +32,9 @@ class PCGSolver_Diel // operators T oper_; std::vector pc_oper_; - std::vector*> gf_work_; - std::vector*> gf_rcoarse_; - std::vector*> gf_newv_; + std::vector*> gf_work_; + std::vector*> gf_rcoarse_; + std::vector*> gf_newv_; // solver params int maxiters_; double tol_; @@ -47,6 +47,11 @@ class PCGSolver_Diel short nlevels_; void setupPrecon(); + void clear(); + + void preconSolve(pb::GridFunc& gf_v, + const pb::GridFunc& gf_f, const short level = 0); + public: PCGSolver_Diel(T& oper, const short px, const short py, const short pz) : oper_(oper) @@ -78,15 +83,12 @@ class PCGSolver_Diel max_nlevels_ = max_nlevels; } - void clear(); - - void preconSolve(pb::GridFunc& gf_v, const pb::GridFunc& gf_f, - const short level = 0); - - bool solve(pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs); + bool solve( + pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs); - bool solve(pb::GridFunc& gf_phi, pb::GridFunc& gf_rhs, - pb::GridFunc& gf_rhod, pb::GridFunc& gf_vks); + bool solve(pb::GridFunc& gf_phi, + pb::GridFunc& gf_rhs, pb::GridFunc& gf_rhod, + pb::GridFunc& gf_vks); double getFinalResidual() const { return final_residual_; } double getResidualReduction() const { return residual_reduction_; } diff --git a/src/pb/Mgm.h b/src/pb/Mgm.h index d16f5610..4b5ad68d 100644 --- a/src/pb/Mgm.h +++ b/src/pb/Mgm.h @@ -7,7 +7,6 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id: Mgm.h,v 1.13 2010/01/28 22:56:47 jeanluc Exp $ #ifndef PB_MGM_H #define PB_MGM_H @@ -41,8 +40,6 @@ bool Mgm(T1& A, T2& vh, const GridFunc& rho, const short cogr, A.rhs(res, rhs); // Hartree units - rhs *= (4. * M_PI); - // work GridFunc GridFunc lhs(finegrid, bcx, bcy, bcz); diff --git a/src/pb/Solver.h b/src/pb/Solver.h index 062fbaa2..f82b3b6a 100644 --- a/src/pb/Solver.h +++ b/src/pb/Solver.h @@ -7,9 +7,8 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id: Solver.h,v 1.14 2010/01/28 22:56:47 jeanluc Exp $ -#ifndef SOLVE_H -#define SOLVE_H +#ifndef PB_SOLVER_H +#define PB_SOLVER_H #include "GridFunc.h" @@ -33,7 +32,7 @@ class Solver fully_periodic_ = ((bc_[0] == 1) && (bc_[1] == 1) && (bc_[2] == 1)); } - virtual bool solve(GridFunc&, GridFunc&) = 0; + virtual bool solve(GridFunc&, const GridFunc&) = 0; virtual void setup(const short nu1, const short nu2, const short max_sweeps, const double tol, const short max_nlevels, diff --git a/src/pb/SolverLap.cc b/src/pb/SolverLap.cc index 5a85fefe..062755a8 100644 --- a/src/pb/SolverLap.cc +++ b/src/pb/SolverLap.cc @@ -7,8 +7,6 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id: SolverLap.cc,v 1.15 2010/01/28 22:56:31 jeanluc Exp $ - #include "SolverLap.h" #include "Laph2.h" #include "Laph4.h" @@ -26,9 +24,8 @@ Timer vcycle_repvcycle_tm("vcycle_repvcycle"); namespace pb { - // explicit instantiation declaration -#if 1 + // double template class SolverLap, double>; template class SolverLap, double>; @@ -45,77 +42,7 @@ template class SolverLap, float>; template class SolverLap, float>; template class SolverLap, float>; template class SolverLap, float>; -#else -// double -template bool SolverLap, double>::solve( - double*, double*, const char, const int, const int); -template bool SolverLap, double>::solve( - double*, double*, const char, const int, const int); -template bool SolverLap, double>::solve( - double*, double*, const char, const int, const int); -template bool SolverLap, double>::solve( - double*, double*, const char, const int, const int); - -template bool SolverLap, double>::solve( - GridFunc&, GridFunc&, const int); -template bool SolverLap, double>::solve( - GridFunc&, GridFunc&, const int); -template bool SolverLap, double>::solve( - GridFunc&, GridFunc&, const int); -template bool SolverLap, double>::solve( - GridFunc&, GridFunc&, const int); -// float -template bool SolverLap, float>::solve( - float*, float*, const char, const int, const int); -template bool SolverLap, float>::solve( - float*, float*, const char, const int, const int); -template bool SolverLap, float>::solve( - float*, float*, const char, const int, const int); -template bool SolverLap, float>::solve( - float*, float*, const char, const int, const int); -template bool SolverLap, float>::solve( - GridFunc&, GridFunc&, const int); -template bool SolverLap, float>::solve( - GridFunc&, GridFunc&, const int); -template bool SolverLap, float>::solve( - GridFunc&, GridFunc&, const int); -template bool SolverLap, float>::solve( - GridFunc&, GridFunc&, const int); -#endif -/* -template bool Mgm(Laph4MP&, GridFunc&, const GridFunc&, const -short, const short, const double, const short, const short, const bool, - double&,double&,double&,short&); -template bool Mgm(Laph4M&, GridFunc&, const GridFunc&, const -short, const short, const double, const short, const short, const bool, - double&,double&,double&,short&); -template bool Mgm(Laph2&, GridFunc&, const GridFunc&, const -short, const short, const double, const short, const short, const bool, - double&,double&,double&,short&); -template bool Mgm(Laph4&, GridFunc&, const GridFunc&, const -short, const short, const double, const short, const short, const bool, - double&,double&,double&,short&); -template bool Mgm(Laph6&, GridFunc&, const GridFunc&, const -short, const short, const double, const short, const short, const bool, - double&,double&,double&,short&); -template bool Mgm(ShiftedLaph4M&, GridFunc&, const GridFunc&, -const short, const short, const double, const short, const short, const bool, - double&,double&,double&,short&); -*/ -/* -template int Vcycle(ShiftedLaph4M&, GridFunc&, const GridFunc -&, const short, const short, const short, const bool); template int -Vcycle(Laph4MP&, GridFunc&, const GridFunc &, const short, -const short, const short, const bool); template int Vcycle(Laph4M&, -GridFunc&, const GridFunc &, const short, const short, const -short, const bool); template int Vcycle(Laph2&, GridFunc&, const -GridFunc &, const short, const short, const short, const bool); -template int Vcycle(Laph4&, GridFunc&, const GridFunc &, const -short, const short, const short, const bool); template int Vcycle(Laph6&, -GridFunc&, const GridFunc &, const short, const short, const -short, const bool); -*/ template bool SolverLap::solve(T2* phi, T2* rhs, const char dis) { @@ -139,7 +66,7 @@ bool SolverLap::solve(T2* phi, T2* rhs, const char dis) } template -bool SolverLap::solve(GridFunc& gf_phi, GridFunc& gf_rhs) +bool SolverLap::solve(GridFunc& gf_phi, const GridFunc& gf_rhs) { bool conv = Mgm(oper_, gf_phi, gf_rhs, max_nlevels_, max_sweeps_, tol_, nu1_, nu2_, gather_coarse_level_, final_residual_, diff --git a/src/pb/SolverLap.h b/src/pb/SolverLap.h index 939e15bc..567bed05 100644 --- a/src/pb/SolverLap.h +++ b/src/pb/SolverLap.h @@ -64,7 +64,7 @@ class SolverLap : public Solver bool solve(T2* phi, T2* rhs, const char dis); - bool solve(GridFunc& gf_phi, GridFunc& gf_rhs) override; + bool solve(GridFunc& gf_phi, const GridFunc& gf_rhs) override; short getNbSweeps() const override { return nb_sweeps_; } double getFinalResidual() const override { return final_residual_; } diff --git a/src/pb/SolverPB.cc b/src/pb/SolverPB.cc index 53252c26..756b49d8 100644 --- a/src/pb/SolverPB.cc +++ b/src/pb/SolverPB.cc @@ -7,7 +7,6 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id: SolverPB.cc,v 1.13 2010/01/28 22:56:32 jeanluc Exp $ #include "SolverPB.h" #include "Mgm.h" #include "PBh2.h" @@ -85,7 +84,7 @@ bool SolverPB::solve(T2* phi, T2* rhs, T2* rhod, T2* vks, const char dis) } template -bool SolverPB::solve(GridFunc& gf_phi, GridFunc& gf_rhs, +bool SolverPB::solve(GridFunc& gf_phi, const GridFunc& gf_rhs, GridFunc& gf_rhod, GridFunc& gf_vks) { oper_.init(gf_rhod); @@ -101,7 +100,7 @@ bool SolverPB::solve(GridFunc& gf_phi, GridFunc& gf_rhs, } template -bool SolverPB::solve(GridFunc& gf_phi, GridFunc& gf_rhs) +bool SolverPB::solve(GridFunc& gf_phi, const GridFunc& gf_rhs) { if (!oper_.initialized()) { diff --git a/src/pb/SolverPB.h b/src/pb/SolverPB.h index a05e1d49..99eae0e3 100644 --- a/src/pb/SolverPB.h +++ b/src/pb/SolverPB.h @@ -64,9 +64,9 @@ class SolverPB : public Solver } bool solve(T2* phi, T2* rhs, T2* rhod, T2* vks, const char dis); - bool solve(GridFunc& gf_phi, GridFunc& gf_rhs, + bool solve(GridFunc& gf_phi, const GridFunc& gf_rhs, GridFunc& gf_rhod, GridFunc& gf_vks); - bool solve(GridFunc& gf_phi, GridFunc& gf_rhs) override; + bool solve(GridFunc& gf_phi, const GridFunc& gf_rhs) override; ~SolverPB() override{}; From a2ece1adc7c16334d03d785c63018259cd80208b Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 30 Sep 2024 14:08:59 -0400 Subject: [PATCH 03/99] Move some code into PoissonSolverFactory (#279) --- src/Electrostatic.cc | 200 ++------------------------------ src/Electrostatic.h | 1 + src/PoissonSolverFactory.h | 228 +++++++++++++++++++++++++++++++++++++ 3 files changed, 237 insertions(+), 192 deletions(-) create mode 100644 src/PoissonSolverFactory.h diff --git a/src/Electrostatic.cc b/src/Electrostatic.cc index 80cb754b..6ac4fef3 100644 --- a/src/Electrostatic.cc +++ b/src/Electrostatic.cc @@ -11,6 +11,7 @@ #include "Control.h" #include "ExtendedGridOrbitals.h" #include "GridFactory.h" +#include "GridFunc.h" #include "Hartree.h" #include "Hartree_CG.h" #include "Ions.h" @@ -23,15 +24,6 @@ #include "ShiftedHartree.h" #include "mputils.h" -#include "GridFunc.h" -#include "Laph2.h" -#include "Laph4.h" -#include "Laph4M.h" -#include "Laph4MP.h" -#include "Laph6.h" -#include "Laph8.h" -#include "ShiftedLaph4M.h" - Timer Electrostatic::solve_tm_("Electrostatic::solve"); Electrostatic::Electrostatic(PoissonFDtype lap_type, const short bcPoisson[3], @@ -49,109 +41,9 @@ Electrostatic::Electrostatic(PoissonFDtype lap_type, const short bcPoisson[3], Mesh* mymesh = Mesh::instance(); const pb::Grid& myGrid = mymesh->grid(); - Control& ct = *(Control::instance()); - if (ct.MGPoissonSolver()) // use MG for Poisson Solver - { - if (screening_const > 0.) - { - switch (lap_type) - { - case PoissonFDtype::h4M: - poisson_solver_ - = new ShiftedHartree>( - myGrid, bc_, screening_const); - break; - default: - (*MPIdata::sout) - << "Electrostatic, shifted, Undefined option: " - << static_cast(lap_type) << std::endl; - } - } - else - { - switch (lap_type) - { - case PoissonFDtype::h4M: - poisson_solver_ - = new Hartree>(myGrid, bc_); - break; - case PoissonFDtype::h2: - poisson_solver_ - = new Hartree>(myGrid, bc_); - break; - case PoissonFDtype::h4: - poisson_solver_ - = new Hartree>(myGrid, bc_); - break; - case PoissonFDtype::h6: - poisson_solver_ - = new Hartree>(myGrid, bc_); - break; - case PoissonFDtype::h8: - poisson_solver_ - = new Hartree>(myGrid, bc_); - break; - case PoissonFDtype::h4MP: - poisson_solver_ - = new Hartree>(myGrid, bc_); - break; - default: - (*MPIdata::sout) << "Electrostatic, Undefined option: " - << static_cast(lap_type) << std::endl; - } - } - } - else // use PCG for Poisson Solver - { - if (screening_const > 0.) - { - switch (lap_type) - { - case PoissonFDtype::h4M: - poisson_solver_ - = new ShiftedHartree>( - myGrid, bc_, screening_const); - break; - default: - (*MPIdata::sout) - << "PCG Electrostatic, shifted, Undefined option: " - << static_cast(lap_type) << std::endl; - } - } - else - { - switch (lap_type) - { - case PoissonFDtype::h4M: - poisson_solver_ - = new Hartree_CG>(myGrid, bc_); - break; - case PoissonFDtype::h2: - poisson_solver_ - = new Hartree_CG>(myGrid, bc_); - break; - case PoissonFDtype::h4: - poisson_solver_ - = new Hartree_CG>(myGrid, bc_); - break; - case PoissonFDtype::h6: - poisson_solver_ - = new Hartree_CG>(myGrid, bc_); - break; - case PoissonFDtype::h8: - poisson_solver_ - = new Hartree_CG>(myGrid, bc_); - break; - case PoissonFDtype::h4MP: - poisson_solver_ - = new Hartree_CG>(myGrid, bc_); - break; - default: - (*MPIdata::sout) << "PCG Electrostatic, Undefined option: " - << static_cast(lap_type) << std::endl; - } - } - } + // create Poisson solver + poisson_solver_ = PoissonSolverFactory::create( + myGrid, lap_type, bcPoisson, screening_const); grhoc_ = nullptr; diel_flag_ = false; @@ -244,73 +136,8 @@ void Electrostatic::setupPB( ngpts, origin, cell, static_cast(laptype_), true, myPEenv); if (poisson_solver_ != nullptr) delete poisson_solver_; - Control& ct = *(Control::instance()); - if (ct.MGPoissonSolver()) // use MG for Poisson Solver - { - switch (laptype_) - { - case PoissonFDtype::h4M: - poisson_solver_ = new PBdiel>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h2: - poisson_solver_ = new PBdiel>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h4: - poisson_solver_ = new PBdiel>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h6: - poisson_solver_ = new PBdiel>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h8: - poisson_solver_ = new PBdiel>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h4MP: - poisson_solver_ = new PBdiel>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - default: - (*MPIdata::sout) - << "Electrostatic, Undefined option" << std::endl; - } - } - else // use PCG for Poisson Solver - { - switch (laptype_) - { - case PoissonFDtype::h4M: - poisson_solver_ = new PBdiel_CG>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h2: - poisson_solver_ = new PBdiel_CG>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h4: - poisson_solver_ = new PBdiel_CG>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h6: - poisson_solver_ = new PBdiel_CG>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h8: - poisson_solver_ = new PBdiel_CG>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - case PoissonFDtype::h4MP: - poisson_solver_ = new PBdiel_CG>( - *pbGrid_, bc_, e0, rho0, drho0); - break; - default: - (*MPIdata::sout) - << "Electrostatic, Undefined option" << std::endl; - } - } + poisson_solver_ = PoissonSolverFactory::createDiel( + *pbGrid_, laptype_, bc_, e0, rho0, drho0); if (grhoc_ != nullptr) { @@ -330,6 +157,7 @@ void Electrostatic::setupPB( poisson_solver_->set_vh(gf_vh); } +// This function is only useful for Hartree problem with dielectric continuum void Electrostatic::fillFuncAroundIons(const Ions& ions) { assert(grhod_ != nullptr); @@ -352,7 +180,6 @@ void Electrostatic::fillFuncAroundIons(const Ions& ions) std::vector::const_iterator ion = rc_ions.begin(); while (ion != rc_ions.end()) { - double rc = (*ion)->getRC(); // Special case: silicon if ((*ion)->isMass28()) rc = 2.0; @@ -373,43 +200,32 @@ void Electrostatic::fillFuncAroundIons(const Ions& ions) #endif for (unsigned int ix = 0; ix < pbGrid_->dim(0); ix++) { - xc[1] = pbGrid_->start(1); const int ix1 = (ix + shift) * incx; for (unsigned int iy = 0; iy < pbGrid_->dim(1); iy++) { - xc[2] = pbGrid_->start(2); const int iy1 = ix1 + (iy + shift) * incy; for (unsigned int iz = 0; iz < pbGrid_->dim(2); iz++) { - const double r = (*ion)->minimage(xc, lattice, bc_); if (r < rc) { const double alpha = 0.2 * (1. + cos(r * pi_rc)); - - const int iz1 = iy1 + iz + shift; + const int iz1 = iy1 + iz + shift; vv[iz1] += alpha; } - xc[2] += pbGrid_->hgrid(2); } - xc[1] += pbGrid_->hgrid(1); - } // end for iy - xc[0] += pbGrid_->hgrid(0); - } // end for ix } - ion++; - } // end loop on list of ions return; diff --git a/src/Electrostatic.h b/src/Electrostatic.h index a3b86879..072ce0cc 100644 --- a/src/Electrostatic.h +++ b/src/Electrostatic.h @@ -13,6 +13,7 @@ #include "Control.h" #include "GridFunc.h" #include "Poisson.h" +#include "PoissonSolverFactory.h" #include "Rho.h" #include "Timer.h" diff --git a/src/PoissonSolverFactory.h b/src/PoissonSolverFactory.h new file mode 100644 index 00000000..91b99bf2 --- /dev/null +++ b/src/PoissonSolverFactory.h @@ -0,0 +1,228 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_PoissonSolverFactory +#define MGMOL_PoissonSolverFactory + +#include "Control.h" +#include "Hartree.h" +#include "Hartree_CG.h" +#include "Mesh.h" +#include "PBdiel.h" +#include "PBdiel_CG.h" +#include "ShiftedHartree.h" +#include "mputils.h" + +#include "GridFunc.h" +#include "Laph2.h" +#include "Laph4.h" +#include "Laph4M.h" +#include "Laph4MP.h" +#include "Laph6.h" +#include "Laph8.h" +#include "ShiftedLaph4M.h" + +class PoissonSolverFactory +{ + +public: + /*! + * return specific Poisson solver needed to solve Hartree problem + */ + static Poisson* create(const pb::Grid& myGrid, PoissonFDtype lap_type, + const short bc[3], const double screening_const) + { + Poisson* poisson_solver = nullptr; + + Control& ct = *(Control::instance()); + if (ct.MGPoissonSolver()) // use MG for Poisson Solver + { + if (screening_const > 0.) + { + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver + = new ShiftedHartree>( + myGrid, bc, screening_const); + break; + default: + (*MPIdata::sout) + << "Electrostatic, shifted, Undefined option: " + << static_cast(lap_type) << std::endl; + } + } + else + { + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver + = new Hartree>(myGrid, bc); + break; + case PoissonFDtype::h2: + poisson_solver + = new Hartree>(myGrid, bc); + break; + case PoissonFDtype::h4: + poisson_solver + = new Hartree>(myGrid, bc); + break; + case PoissonFDtype::h6: + poisson_solver + = new Hartree>(myGrid, bc); + break; + case PoissonFDtype::h8: + poisson_solver + = new Hartree>(myGrid, bc); + break; + case PoissonFDtype::h4MP: + poisson_solver + = new Hartree>(myGrid, bc); + break; + default: + (*MPIdata::sout) + << "Electrostatic, Undefined option: " + << static_cast(lap_type) << std::endl; + } + } + } + else // use PCG for Poisson Solver + { + if (screening_const > 0.) + { + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver + = new ShiftedHartree>( + myGrid, bc, screening_const); + break; + default: + (*MPIdata::sout) + << "PCG Electrostatic, shifted, Undefined option: " + << static_cast(lap_type) << std::endl; + } + } + else + { + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver + = new Hartree_CG>(myGrid, bc); + break; + case PoissonFDtype::h2: + poisson_solver + = new Hartree_CG>(myGrid, bc); + break; + case PoissonFDtype::h4: + poisson_solver + = new Hartree_CG>(myGrid, bc); + break; + case PoissonFDtype::h6: + poisson_solver + = new Hartree_CG>(myGrid, bc); + break; + case PoissonFDtype::h8: + poisson_solver + = new Hartree_CG>(myGrid, bc); + break; + case PoissonFDtype::h4MP: + poisson_solver + = new Hartree_CG>(myGrid, bc); + break; + default: + (*MPIdata::sout) + << "PCG Electrostatic, Undefined option: " + << static_cast(lap_type) << std::endl; + } + } + } + + return poisson_solver; + } + + static Poisson* createDiel(pb::Grid& pbGrid, PoissonFDtype lap_type, + const short bc[3], const double e0, const double rho0, + const double drho0) + { + Poisson* poisson_solver = nullptr; + + Control& ct = *(Control::instance()); + if (ct.MGPoissonSolver()) // use MG for Poisson Solver + { + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver = new PBdiel>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h2: + poisson_solver = new PBdiel>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h4: + poisson_solver = new PBdiel>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h6: + poisson_solver = new PBdiel>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h8: + poisson_solver = new PBdiel>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h4MP: + poisson_solver = new PBdiel>( + pbGrid, bc, e0, rho0, drho0); + break; + default: + (*MPIdata::sout) + << "Electrostatic, Undefined option" << std::endl; + } + } + else // use PCG for Poisson Solver + { + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver = new PBdiel_CG>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h2: + poisson_solver = new PBdiel_CG>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h4: + poisson_solver = new PBdiel_CG>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h6: + poisson_solver = new PBdiel_CG>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h8: + poisson_solver = new PBdiel_CG>( + pbGrid, bc, e0, rho0, drho0); + break; + case PoissonFDtype::h4MP: + poisson_solver = new PBdiel_CG>( + pbGrid, bc, e0, rho0, drho0); + break; + default: + (*MPIdata::sout) + << "Electrostatic, Undefined option" << std::endl; + } + } + return poisson_solver; + } +}; + +#endif From 8b577ea75f805f6288e0c9e91fba75b960a59a39 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 3 Oct 2024 10:03:02 -0400 Subject: [PATCH 04/99] Clean up class Potentials (#280) --- src/MGmol.cc | 2 +- src/Potentials.cc | 24 +++----------------- src/Potentials.h | 56 ++++++++++++++++++++++++++++++++--------------- src/tools.cc | 17 +++++++++++++- src/tools.h | 1 + 5 files changed, 59 insertions(+), 41 deletions(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index be4f28aa..4aa1fa26 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -837,7 +837,7 @@ void MGmol::initNuc(Ions& ions) pot.initialize(ions); // Check compensating charges - double comp_rho = pot.getCharge(pot.rho_comp()); + double comp_rho = getCharge(pot.rho_comp()); if (onpe0 && ct.verbose > 1) { diff --git a/src/Potentials.cc b/src/Potentials.cc index ee874f10..380f05a0 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -27,6 +27,7 @@ #include using namespace std; +// unit conversion factor Ha -> Ry const double ha2ry = 2.; Potentials::~Potentials() @@ -57,13 +58,9 @@ Potentials::Potentials(const bool vh_frozen) size_ = dim_[0] * dim_[1] * dim_[2]; - mix_ = 1.; - scf_dvrho_ = 1000.; scf_dv_ = 1000.; - vh_frozen_ = vh_frozen; - vtot_.resize(size_); vtot_old_.resize(size_); @@ -101,7 +98,7 @@ void Potentials::initWithVnuc() double one = 1.; LinearAlgebraUtils::MPaxpy( size_, one, &v_ext_[0], &vtot_[0]); - // factor 2 to get total potential in [Ry] for calculations + // factor ha2ry to get total potential in [Ry] for calculations LinearAlgebraUtils::MPscal(size_, ha2ry, &vtot_[0]); } @@ -568,9 +565,8 @@ template void Potentials::setVxc(const T* const vxc, const int iterativeIndex) { assert(iterativeIndex >= 0); - // int ione=1; + itindex_vxc_ = iterativeIndex; - // Tcopy(&size_, vxc, &ione, &vxc_rho_[0], &ione); MPcpy(&vxc_rho_[0], vxc, size_); } void Potentials::setVh(const POTDTYPE* const vh, const int iterativeIndex) @@ -849,20 +845,6 @@ void Potentials::rescaleRhoComp() if (comp_rho < 0.) mmpi.abort(); } -double Potentials::getCharge(RHODTYPE* rho) -{ - Control& ct = *(Control::instance()); - Mesh* mymesh = Mesh::instance(); - const pb::Grid& mygrid = mymesh->grid(); - - double charge = mygrid.integralOverMesh(rho); - - if (onpe0 && ct.verbose > 0) - cout << setprecision(8) << fixed << "Charge: " << charge << endl; - - return charge; -} - void Potentials::addBackgroundToRhoComp() { if (fabs(background_charge_) > 0.) diff --git a/src/Potentials.h b/src/Potentials.h index 0cd7a8a6..205fb538 100644 --- a/src/Potentials.h +++ b/src/Potentials.h @@ -30,7 +30,6 @@ class Potentials int gdim_[3]; int dim_[3]; bool diel_; - double mix_; double scf_dvrho_; double scf_dv_; @@ -39,8 +38,9 @@ class Potentials double charge_in_cell_; double ionic_charge_; - bool vh_frozen_; - + /*! + * Total KS potential seen by electrons + */ std::vector vtot_; std::vector vtot_old_; @@ -48,16 +48,30 @@ class Potentials std::vector vh_rho_; std::vector vxc_rho_; - // nuclei local potential + /* + * Potential contribution from atomic cores (local pseudopotential) + */ std::vector v_nuc_; - // external potential (read from input) + /*! + * Optional external potential (read from input) + * Used only in special cases. + */ std::vector v_ext_; #ifdef HAVE_TRICUBIC pb::TriCubic* vext_tricubic_; #endif + /*! + * Potential associated with the sum of Gaussian charge distributions + * compensating the Coulomb potential of each atom + */ std::vector v_comp_; + + /*! + * Sum of Gaussian charge distributions compensating the Coulomb potential + * of each atom + */ std::vector rho_comp_; std::vector dv_; @@ -121,14 +135,11 @@ class Potentials void turnOnDiel() { diel_ = true; } int size() const { return size_; } - bool vh_frozen() const { return vh_frozen_; } - void freeze_vh() { vh_frozen_ = true; } double scf_dvrho(void) const { return scf_dvrho_; } double scf_dv(void) const { return scf_dv_; } POTDTYPE* vtot() { return &vtot_[0]; } POTDTYPE* vh_rho() { return &vh_rho_[0]; } - POTDTYPE* vxc_rho() { return &vxc_rho_[0]; } RHODTYPE* rho_comp() { return &rho_comp_[0]; } const std::vector& vnuc() const { return v_nuc_; } @@ -136,12 +147,6 @@ class Potentials POTDTYPE* vext() { return &v_ext_[0]; } POTDTYPE* vepsilon() { return &vepsilon_[0]; } - void set_vcomp(const POTDTYPE val) - { - const int n = (int)v_comp_.size(); - for (int i = 0; i < n; i++) - v_comp_[i] = val; - } void axpVcompToVh(const double alpha); void axpVcomp(POTDTYPE* v, const double alpha); @@ -154,13 +159,29 @@ class Potentials double getChargeInCell() const { return charge_in_cell_; } + /*! + * initialize total potential as local pseudopotential + */ void initWithVnuc(); void getVofRho(std::vector& vrho) const; - double delta_v(const std::vector>&); - double update(const std::vector>&); - void update(const double); + /*! + * evaluate potential correction associated with a new rho + */ + double delta_v(const std::vector>& rho); + + /*! + * update potentials based on argument rho + */ + double update(const std::vector>& rho); + + /*! + * update potentials based on potential correction delta v and mixing + * parameter + */ + void update(const double mix); + double max() const; double min() const; void readAll(std::vector& sp); @@ -172,7 +193,6 @@ class Potentials void initialize(Ions& ions); void rescaleRhoComp(); - double getCharge(RHODTYPE* rho); void initBackground(Ions& ions); void addBackgroundToRhoComp(); diff --git a/src/tools.cc b/src/tools.cc index 4e1bd314..95790c59 100644 --- a/src/tools.cc +++ b/src/tools.cc @@ -228,7 +228,7 @@ void printWithTimeStamp(const std::string& string2print, std::ostream& os) int mpierr=MPI_Reduce(&s, &r, 1, MPI_INT, MPI_SUM, 0, mmpi.commGlobal()); if( mpierr!=MPI_SUCCESS ) { - cerr << " Error in MPI!!!" << endl; + cerr << " Error in MPI!!!" << std::endl; MPI_Abort(mmpi.commGlobal(),1); } if( r!=mmpi.size()*s && onpe0 ) @@ -388,3 +388,18 @@ void getkvector(const int index, const int kmax, int kvector[3]) // std::cout << " k=(" << kvector[0] << "," << kvector[1] << "," // << kvector[2] << ")" << std::endl; } + +double getCharge(double* rho) +{ + Control& ct = *(Control::instance()); + Mesh* mymesh = Mesh::instance(); + const pb::Grid& mygrid = mymesh->grid(); + + double charge = mygrid.integralOverMesh(rho); + + if (onpe0 && ct.verbose > 0) + std::cout << std::setprecision(8) << std::fixed << "Charge: " << charge + << std::endl; + + return charge; +} diff --git a/src/tools.h b/src/tools.h index 4e8374da..be3d13e0 100644 --- a/src/tools.h +++ b/src/tools.h @@ -37,5 +37,6 @@ double minQuadPolynomial(const double e0, const double e1, const double de0, double minQuadPolynomialFrom3values(const double e0, const double e1, const double e12, const bool print_flag, std::ostream& os); void getkvector(const int index, const int kmax, int kvector[3]); +double getCharge(double* rho); #endif From 7ac691c7126e45223c5ea33d043f256de1a52519 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 11 Oct 2024 21:44:42 -0400 Subject: [PATCH 05/99] Clean up class Ions, add test for it (#281) --- src/Control.cc | 1 + src/Ions.cc | 324 ++++++++++++++++----------------------- src/Ions.h | 23 ++- src/KBprojectorSparse.cc | 4 +- tests/CMakeLists.txt | 8 + tests/testIons.cc | 104 +++++++++++++ 6 files changed, 265 insertions(+), 199 deletions(-) create mode 100644 tests/testIons.cc diff --git a/src/Control.cc b/src/Control.cc index af3d8ed9..dd786678 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -70,6 +70,7 @@ Control::Control() dm_approx_ndigits = 1; dm_approx_power_maxits = 100; wf_extrapolation_ = 1; + verbose = 0; // undefined values dm_algo_ = -1; diff --git a/src/Ions.cc b/src/Ions.cc index 940e12e8..852151c7 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -584,11 +584,9 @@ void Ions::printPositionsLocal(std::ostream& os, const int root) const os.setf(std::ios::right, std::ios::adjustfield); os.setf(std::ios::fixed, std::ios::floatfield); - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { - (*ion)->printPosition(os); - ion++; + ion->printPosition(os); } os << std::endl; @@ -627,14 +625,12 @@ void Ions::writeAtomicNumbers(HDFrestart& h5f_file) } else { - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { - assert((*ion)->atomic_number() > 0); - assert((*ion)->atomic_number() < 200); + assert(ion->atomic_number() > 0); + assert(ion->atomic_number() < 200); - data.push_back((*ion)->atomic_number()); - ion++; + data.push_back(ion->atomic_number()); } } @@ -685,17 +681,15 @@ void Ions::writeAtomNames(HDFrestart& h5f_file) void Ions::lockAtom(const std::string& name) { - std::vector::iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { - std::string name_ion((*ion)->name()); + std::string name_ion(ion->name()); if (name.compare(name_ion) == 0) { - (*ion)->lock(); + ion->lock(); if (onpe0) (*MPIdata::sout) << "Lock atom " << name << std::endl; break; } - ion++; } } @@ -975,12 +969,10 @@ void Ions::readRestartPositions(HDFrestart& h5_file) std::vector data; h5_file.readAtomicPositions(data); - int i = 0; - std::vector::iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + int i = 0; + for (auto& ion : local_ions_) { - (*ion)->setPosition(data[3 * i], data[3 * i + 1], data[3 * i + 2]); - ion++; + ion->setPosition(data[3 * i], data[3 * i + 1], data[3 * i + 2]); i++; } } @@ -1109,14 +1101,12 @@ void Ions::removeMassCenterMotion() #endif } - ion = local_ions_.begin(); - i = 0; - while (ion != local_ions_.end()) + i = 0; + for (auto& ion : local_ions_) { const int threei = 3 * i; - (*ion)->setVelocity(velocities[threei] - tmp[0], + ion->setVelocity(velocities[threei] - tmp[0], velocities[threei + 1] - tmp[1], velocities[threei + 2] - tmp[2]); - ion++; i++; } @@ -1124,16 +1114,15 @@ void Ions::removeMassCenterMotion() mv[0] = 0.; mv[1] = 0.; mv[2] = 0.; - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { - velocities[3 * i] = (*ion)->velocity(0); - velocities[3 * i + 1] = (*ion)->velocity(1); - velocities[3 * i + 2] = (*ion)->velocity(2); + velocities[3 * i] = ion->velocity(0); + velocities[3 * i + 1] = ion->velocity(1); + velocities[3 * i + 2] = ion->velocity(2); mv[0] += mass[i] * velocities[3 * i]; mv[1] += mass[i] * velocities[3 * i + 1]; mv[2] += mass[i] * velocities[3 * i + 2]; - ion++; i++; } @@ -1155,12 +1144,10 @@ void Ions::readRestartVelocities(HDFrestart& h5_file) std::vector data; h5_file.readAtomicVelocities(data); - std::vector::iterator ion = local_ions_.begin(); - int i = 0; - while (ion != local_ions_.end()) + int i = 0; + for (auto& ion : local_ions_) { - (*ion)->setVelocity(data[3 * i], data[3 * i + 1], data[3 * i + 2]); - ion++; + ion->setVelocity(data[3 * i], data[3 * i + 1], data[3 * i + 2]); i++; } } @@ -1174,12 +1161,10 @@ void Ions::readRestartRandomStates(HDFrestart& h5f_file) std::vector data; h5f_file.readRestartRandomStates(data); - std::vector::iterator ion = local_ions_.begin(); - int i = 0; - while (ion != local_ions_.end()) + int i = 0; + for (auto& ion : local_ions_) { - (*ion)->setRandomState(data[3 * i], data[3 * i + 1], data[3 * i + 2]); - ion++; + ion->setRandomState(data[3 * i], data[3 * i + 1], data[3 * i + 2]); i++; } } @@ -1202,17 +1187,14 @@ void Ions::writeForces(HDFrestart& h5f_file) } else { - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions().end()) + for (auto& ion : local_ions_) { // get position of local ion double force[3]; - (*ion)->getForce(&force[0]); + ion->getForce(&force[0]); data.push_back(force[0]); data.push_back(force[1]); data.push_back(force[2]); - - ++ion; } } @@ -1364,64 +1346,54 @@ void Ions::printForcesLocal(std::ostream& os, const int root) const << "FX" << std::setw(10) << "FY" << std::setw(10) << "FZ" << std::endl; - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { + ion->printPositionAndForce(os); - (*ion)->printPositionAndForce(os); - - if (!(*ion)->locked()) + if (!ion->locked()) { + avg_forces[0] += fabs(ion->force(0)); + avg_forces[1] += fabs(ion->force(1)); + avg_forces[2] += fabs(ion->force(2)); - avg_forces[0] += fabs((*ion)->force(0)); - avg_forces[1] += fabs((*ion)->force(1)); - avg_forces[2] += fabs((*ion)->force(2)); - - double ff = (*ion)->norm2F(); + double ff = ion->norm2F(); maxf[0] = std::max(maxf[0], ff); - max_forces[0] = std::max(max_forces[0], fabs((*ion)->force(0))); - max_forces[1] = std::max(max_forces[1], fabs((*ion)->force(1))); - max_forces[2] = std::max(max_forces[2], fabs((*ion)->force(2))); + max_forces[0] = std::max(max_forces[0], fabs(ion->force(0))); + max_forces[1] = std::max(max_forces[1], fabs(ion->force(1))); + max_forces[2] = std::max(max_forces[2], fabs(ion->force(2))); num_movable++; } for (short ii = 0; ii < 3; ii++) - sum_forces[ii] += (*ion)->force(ii); + sum_forces[ii] += ion->force(ii); num_atoms++; - - ion++; } } else { - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { - - if (!(*ion)->locked()) + if (!ion->locked()) { + avg_forces[0] += fabs(ion->force(0)); + avg_forces[1] += fabs(ion->force(1)); + avg_forces[2] += fabs(ion->force(2)); - avg_forces[0] += fabs((*ion)->force(0)); - avg_forces[1] += fabs((*ion)->force(1)); - avg_forces[2] += fabs((*ion)->force(2)); - - double ff = (*ion)->norm2F(); + double ff = ion->norm2F(); maxf[0] = std::max(maxf[0], ff); - max_forces[0] = std::max(max_forces[0], fabs((*ion)->force(0))); - max_forces[1] = std::max(max_forces[1], fabs((*ion)->force(1))); - max_forces[2] = std::max(max_forces[2], fabs((*ion)->force(2))); + max_forces[0] = std::max(max_forces[0], fabs(ion->force(0))); + max_forces[1] = std::max(max_forces[1], fabs(ion->force(1))); + max_forces[2] = std::max(max_forces[2], fabs(ion->force(2))); num_movable++; } for (short ii = 0; ii < 3; ii++) - sum_forces[ii] += (*ion)->force(ii); + sum_forces[ii] += ion->force(ii); num_atoms++; - - ion++; } } // global statistics @@ -1479,12 +1451,10 @@ int Ions::countIonsHere() const { return (int)local_ions_.size(); } int Ions::countProjectorsHere() const { - int count = 0; - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + int count = 0; + for (auto& ion : local_ions_) { - count += (*ion)->nProjectors(); - ion++; + count += ion->nProjectors(); } return count; } @@ -1497,12 +1467,10 @@ int Ions::countProjectors() const Mesh* mymesh = Mesh::instance(); const pb::PEenv& myPEenv = mymesh->peenv(); - int nproj = 0; - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + int nproj = 0; + for (auto& ion : local_ions_) { - nproj += (*ion)->nProjectors(); - ion++; + nproj += ion->nProjectors(); } int tmp = nproj; MPI_Allreduce(&tmp, &nproj, 1, MPI_INT, MPI_SUM, myPEenv.comm()); @@ -1569,13 +1537,10 @@ void Ions::setLocalPositions(const std::vector& tau) { assert(tau.size() == 3 * local_ions_.size()); - std::vector::iterator ion = local_ions_.begin(); - int ia = 0; - while (ion != local_ions_.end()) + int ia = 0; + for (auto& ion : local_ions_) { - (*ion)->setPosition(tau[3 * ia + 0], tau[3 * ia + 1], tau[3 * ia + 2]); - - ion++; + ion->setPosition(tau[3 * ia + 0], tau[3 * ia + 1], tau[3 * ia + 2]); ia++; } @@ -1780,45 +1745,8 @@ int Ions::setAtoms( if (ia < 1000) aname.append("0"); if (ia < 10000) aname.append("0"); aname.append(ss.str()); - Ion* new_ion - = new Ion(species_[isp], aname, &crds[3 * ia], velocity, locked); - new_ion->bcast(mmpi.commGlobal()); - - // Populate list_ions_ list - // std::cout<<"crds: "< 2) - (*MPIdata::sout) - << "Ion " << aname << " at position " << crds[3 * ia + 0] - << "," << crds[3 * ia + 1] << "," << crds[3 * ia + 2] - << " added to the list... on PE" << mmpi.mypeGlobal() - << std::endl; - // populate local_ions_ list - if (inLocalIons( - crds[3 * ia + 0], crds[3 * ia + 1], crds[3 * ia + 2])) - { - (new_ion)->set_here(true); - local_ions_.push_back(new_ion); - if (onpe0 && ct.verbose > 2) - (*MPIdata::sout) - << "Ion " << aname << " at position " - << crds[3 * ia + 0] << "," << crds[3 * ia + 1] << "," - << crds[3 * ia + 2] - << " added to the list of local ions... on PE" - << mmpi.mypeGlobal() << std::endl; - } - else - (new_ion)->set_here(false); - } - else - { - //(*MPIdata::sout)<<"Ion "<set_here(true); + local_ions_.push_back(new_ion); + if (onpe0 && ct.verbose > 2) + (*MPIdata::sout) << "Ion " << name << " at position " << crds[0] + << "," << crds[1] << "," << crds[2] + << " added to the list of local ions... on PE" + << mmpi.mypeGlobal() << std::endl; + } + else + (new_ion)->set_here(false); + } + else + { + // delete Ion if not put in list + delete new_ion; + } +} + int Ions::readNatoms(const std::string& filename, const bool cell_relative) { Control& ct(*(Control::instance())); @@ -2117,9 +2085,8 @@ void Ions::setVelocities(const std::vector& tau0, assert(tau0.size() == 3 * local_ions_.size()); assert(taup.size() == 3 * local_ions_.size()); - int ia = 0; - std::vector::iterator iion = local_ions_.begin(); - while (iion != local_ions_.end()) + int ia = 0; + for (auto& ion : local_ions_) { double v[3]; for (short i = 0; i < 3; i++) @@ -2128,8 +2095,7 @@ void Ions::setVelocities(const std::vector& tau0, v[i] -= tau0[3 * ia + i]; v[i] /= dt; } - (*iion)->setVelocity(v[0], v[1], v[2]); - iion++; + ion->setVelocity(v[0], v[1], v[2]); ia++; } } @@ -2138,12 +2104,10 @@ void Ions::getLocalPositions(std::vector& tau) const { assert(tau.size() == 3 * local_ions_.size()); - int ia = 0; - std::vector::const_iterator iion = local_ions_.begin(); - while (iion != local_ions_.end()) + int ia = 0; + for (auto& ion : local_ions_) { - (*iion)->getPosition(&tau[3 * ia]); - iion++; + ion->getPosition(&tau[3 * ia]); ia++; } } @@ -2188,12 +2152,10 @@ void Ions::setTau0() { assert(tau0_.size() == 3 * local_ions_.size()); - int ia = 0; - std::vector::const_iterator iion = local_ions_.begin(); - while (iion != local_ions_.end()) + int ia = 0; + for (auto& ion : local_ions_) { - (*iion)->getPosition(&tau0_[3 * ia]); - iion++; + ion->getPosition(&tau0_[3 * ia]); ia++; } } @@ -2202,13 +2164,11 @@ void Ions::setPositionsToTau0() { assert(tau0_.size() == 3 * local_ions_.size()); - int ia = 0; - std::vector::const_iterator iion = local_ions_.begin(); - while (iion != local_ions_.end()) + int ia = 0; + for (auto& ion : local_ions_) { - (*iion)->setPosition( + ion->setPosition( tau0_[3 * ia + 0], tau0_[3 * ia + 1], tau0_[3 * ia + 2]); - iion++; ia++; } } @@ -2244,13 +2204,11 @@ void Ions::getLocalForces(std::vector& tau) const { assert(tau.size() == 3 * local_ions_.size()); - int ia = 0; - std::vector::const_iterator iion = local_ions_.begin(); - while (iion != local_ions_.end()) + int ia = 0; + for (auto& ion : local_ions_) { assert(3 * ia + 2 < (int)tau.size()); - (*iion)->getForce(&tau[3 * ia]); - iion++; + ion->getForce(&tau[3 * ia]); ia++; } } @@ -2351,12 +2309,10 @@ double Ions::computeMaxVlRadius() const { double radius = 0.; - std::vector::const_iterator iion = local_ions_.begin(); - while (iion != local_ions_.end()) + for (auto& ion : local_ions_) { - double r = (*iion)->computeRadiusVl(); + double r = ion->computeRadiusVl(); radius = r > radius ? r : radius; - iion++; } MGmol_MPI& mmpi(*(MGmol_MPI::instance())); @@ -2393,13 +2349,11 @@ void Ions::gatherNames(std::map& names, const int root, std::vector data; std::vector local_names; - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions().end()) + for (auto& ion : local_ions_) { // get local name and index - local_names.push_back((*ion)->name()); - local_indexes.push_back((*ion)->index()); - ++ion; + local_names.push_back(ion->name()); + local_indexes.push_back(ion->index()); } // gather data to PE root @@ -2695,15 +2649,13 @@ bool Ions::hasLockedAtoms() const return (flag == 1); } -double Ions::getMaxNLradius() const +double Ions::getSpeciesMaxNLradius() const { - double radius = 0; - std::vector::const_iterator spi = species_.begin(); - while (spi != species_.end()) + double radius = 0.; + for (const auto& spi : species_) { - const double nlradius = spi->nlradius(); + const double nlradius = spi.nlradius(); radius = radius > nlradius ? radius : nlradius; - spi++; } MGmol_MPI& mmpi(*(MGmol_MPI::instance())); mmpi.allreduce(&radius, 1, MPI_MAX); @@ -2711,15 +2663,13 @@ double Ions::getMaxNLradius() const return radius; } -double Ions::getMaxLradius() const +double Ions::getSpeciesMaxLradius() const { - double radius = 0; - std::vector::const_iterator spi = species_.begin(); - while (spi != species_.end()) + double radius = 0.; + for (const auto& spi : species_) { - const double lradius = spi->lradius(); + const double lradius = spi.lradius(); radius = radius > lradius ? radius : lradius; - spi++; } MGmol_MPI& mmpi(*(MGmol_MPI::instance())); mmpi.allreduce(&radius, 1, MPI_MAX); @@ -2918,8 +2868,8 @@ void Ions::computeNumIons(void) double Ions::getMaxListRadius() const { // get radius of projectors - const double nlradius = getMaxNLradius(); - const double lradius = getMaxLradius(); + const double nlradius = getSpeciesMaxNLradius(); + const double lradius = getSpeciesMaxLradius(); double rmax = nlradius > lradius ? nlradius : lradius; @@ -3289,21 +3239,15 @@ void Ions::updateIons() Control& ct(*(Control::instance())); // update local_ions data - std::vector::iterator ion = local_ions_.begin(); - int ia = 0; - while (ion != local_ions_.end()) + int ia = 0; + for (auto& ion : local_ions_) { - (*ion)->setPosition( + ion->setPosition( tau0_[3 * ia + 0], tau0_[3 * ia + 1], tau0_[3 * ia + 2]); - // (*ion)->setForce(fion_[3*ia+0], - // fion_[3*ia+1], - // fion_[3*ia+2]); - - (*ion)->setRandomState(rand_states_[3 * ia + 0], - rand_states_[3 * ia + 1], rand_states_[3 * ia + 2]); + ion->setRandomState(rand_states_[3 * ia + 0], rand_states_[3 * ia + 1], + rand_states_[3 * ia + 2]); - ion++; ia++; } diff --git a/src/Ions.h b/src/Ions.h index 772c51d1..ae4f6adb 100644 --- a/src/Ions.h +++ b/src/Ions.h @@ -71,13 +71,14 @@ class Ions void setMapVL(); double computeMaxVlRadius() const; double computeMaxNLprojRadius() const; - double getMaxNLradius() const; - double getMaxLradius() const; - void updateListIons(); - // compute boundary for box containing all atoms to be known on local - // processor that is values for list_boundary_left_, list_boundary_right_ - void setupListIonsBoundaries(const double rmax); + /* + * Evaluate maximum pseudopotential radius among all species in class + */ + double getSpeciesMaxNLradius() const; + double getSpeciesMaxLradius() const; + + void updateListIons(); void augmentIonsData(const int nsteps, const int dir, const int disp, const int locSize, const int maxLocSize, std::vector& data, @@ -163,6 +164,8 @@ class Ions bool hasLockedAtoms() const; void clearLists(); + void rescaleVelocities(const double factor); + public: Ions(const double lat[3], const std::vector& sp); @@ -172,6 +175,10 @@ class Ions void setup(); + // compute boundary for box containing all atoms to be known on local + // processor that is values for list_boundary_left_, list_boundary_right_ + void setupListIonsBoundaries(const double rmax); + std::vector& getLocalNames() { return local_names_; } std::vector& getTau0() { return tau0_; } std::vector& getTaup() { return taup_; } @@ -334,13 +341,15 @@ class Ions void updateForcesInteractingIons(); void updateTaupInteractingIons(); - void rescaleVelocities(const double factor); /*! * Calculate minimum distance between local pairs */ double computeMinLocalSpacing() const; + void addIonToList(const Species& sp, const std::string& name, + const double crds[3], const double velocity[3], const bool lock); + // void checkUnicityLocalIons(); }; diff --git a/src/KBprojectorSparse.cc b/src/KBprojectorSparse.cc index afd8871c..937b95d2 100644 --- a/src/KBprojectorSparse.cc +++ b/src/KBprojectorSparse.cc @@ -103,7 +103,7 @@ void KBprojectorSparse::setNLindex( for (int i = 0; i < size_nl; i++) { - assert(i < lnumpt); + // assert(i < lnumpt); if ((pvec[i] < iloc * lnumpt) || (pvec[i] >= (iloc + 1) * lnumpt)) { (*MPIdata::sout) << " iloc=" << iloc << ", i=" << i @@ -960,7 +960,7 @@ bool KBprojectorSparse::setIndexesAndProjectors() // get "pvec" and "is_in_domain" int icount = get_index_array(pvec, iloc, index_low, index_high); assert(icount <= nl3); - assert(icount <= mymesh->npointsPatch()); + // assert(icount <= mymesh->npointsPatch()); assert(icount > 0); setNLindex(iloc, icount, pvec); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index cc205167..6de34971 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -209,6 +209,8 @@ add_executable(testMGkernels ${CMAKE_SOURCE_DIR}/src/pb/FDkernels.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/tests/ut_main.cc) +add_executable(testIons + ${CMAKE_SOURCE_DIR}/tests/testIons.cc) add_executable(testGramMatrix ${CMAKE_SOURCE_DIR}/tests/testGramMatrix.cc ${CMAKE_SOURCE_DIR}/src/GramMatrix.cc @@ -334,6 +336,10 @@ add_test(NAME testBatchLaph4 add_test(NAME testtMGkernels COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/testMGkernels) +add_test(NAME testIons + COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/testIons + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testGramMatrix COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/testGramMatrix) @@ -541,6 +547,7 @@ target_include_directories(testPowerDistMatrix PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testDensityMatrix PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testGramMatrix PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testAndersonMix PRIVATE ${Boost_INCLUDE_DIRS}) +target_include_directories(testIons PRIVATE ${Boost_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS}) target_link_libraries(testMPI PRIVATE MPI::MPI_CXX) target_link_libraries(testBlacsContext PRIVATE ${SCALAPACK_LIBRARIES} @@ -550,6 +557,7 @@ target_link_libraries(testDirectionalReduce PRIVATE MPI::MPI_CXX) target_link_libraries(testEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testWFEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testDMandEnergyAndForces PRIVATE mgmol_src) +target_link_libraries(testIons PRIVATE mgmol_src) if(${MAGMA_FOUND}) target_link_libraries(testDistVector PRIVATE ${SCALAPACK_LIBRARIES} diff --git a/tests/testIons.cc b/tests/testIons.cc new file mode 100644 index 00000000..30b2ad0c --- /dev/null +++ b/tests/testIons.cc @@ -0,0 +1,104 @@ +#include "Control.h" +#include "Ions.h" +#include "MGmol_MPI.h" +#include "Mesh.h" +#include "Species.h" + +#include + +int main(int argc, char** argv) +{ + int mpirc = MPI_Init(&argc, &argv); + + MPI_Comm comm = MPI_COMM_WORLD; + + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + + MGmol_MPI::setup(comm, std::cout); + Control::setup(comm, false, 0.); + + // create a domain [0.10.]^3 + const double origin[3] = { 0., 0., 0. }; + const double ll = 10.; + const double lattice[3] = { ll, ll, ll }; + const unsigned ngpts[3] = { 32, 24, 20 }; + short lap_type = 0; + + Mesh::setup(comm, ngpts, origin, lattice, lap_type); + + const double h[3] = { ll / (double(ngpts[0])), ll / (double(ngpts[1])), + ll / (double(ngpts[2])) }; + + // random number generator + static std::random_device rd; + static std::mt19937 gen(rd()); + static std::uniform_real_distribution<> dis(0.0, 1.0); + + // create one species + Species sp(MPI_COMM_WORLD); + + // read species info from pseudopotential file + std::string file_path = argv[1]; + std::string filename(file_path + "/pseudo.C_ONCV_PBE_SG15"); + std::cout << "Potential = " << filename << std::endl; + + sp.read_1species(filename); + sp.set_dim_nl(h[0]); + sp.set_dim_l(h[0]); + sp.initPotentials('f', h[0], true); + + // put species into a vector + std::vector vsp; + vsp.push_back(sp); + + Ions ions(lattice, vsp); + ions.setupListIonsBoundaries(10000.); + + double velocity[3] = { 0., 0., 0. }; + + // set "na" atoms coordinates and add them to "ions" + const int na = 10; + for (int i = 0; i < na; i++) + { + double x[3] = { origin[0] + lattice[0] * dis(gen), + origin[1] + lattice[1] * dis(gen), + origin[2] + lattice[2] * dis(gen) }; + if (myrank == 0) + std::cout << "x,y,z = " << x[0] << ", " << x[1] << ", " << x[2] + << std::endl; + + // set all x to the values of PE0 + MPI_Bcast(&x[0], 3, MPI_DOUBLE, 0, comm); + + // make a name for atom based on species and order of reading in + std::string stri = std::to_string(i); + std::string aname("C" + stri); + + ions.addIonToList(sp, aname, &x[0], velocity, false); + } + + ions.setup(); + + std::vector& new_local_ions(ions.local_ions()); + + int nlocal = new_local_ions.size(); + std::cout << "PE " << myrank << ", nlocal = " << nlocal << std::endl; + + int ntotal = 0; + MPI_Allreduce(&nlocal, &ntotal, 1, MPI_INT, MPI_SUM, comm); + mpirc = MPI_Finalize(); + if (mpirc != MPI_SUCCESS) + { + std::cerr << "MPI Finalize failed!!!" << std::endl; + return 1; + } + + if (ntotal != na) + { + std::cout << "ntotal = " << ntotal << std::endl; + return 1; + } + + return 0; +} From c05ba7f4cccb1077faa226e9bb79fb7294c93130 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 25 Nov 2024 13:35:01 -0500 Subject: [PATCH 06/99] Add test MD_MVP (#290) --- tests/CMakeLists.txt | 8 ++++ tests/MD_MVP/li2.xyz | 4 ++ tests/MD_MVP/md.cfg | 39 ++++++++++++++++++ tests/MD_MVP/quench.cfg | 34 ++++++++++++++++ tests/MD_MVP/test.py | 89 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 174 insertions(+) create mode 100644 tests/MD_MVP/li2.xyz create mode 100644 tests/MD_MVP/md.cfg create mode 100644 tests/MD_MVP/quench.cfg create mode 100755 tests/MD_MVP/test.py diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6de34971..72378704 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -491,6 +491,14 @@ add_test(NAME testMD_D72 ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/lrs.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testMD_MVP + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MD_MVP/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/MD_MVP/quench.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/MD_MVP/md.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/MD_MVP/li2.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testLBFGS COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/LBFGS/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} diff --git a/tests/MD_MVP/li2.xyz b/tests/MD_MVP/li2.xyz new file mode 100644 index 00000000..fce3cd72 --- /dev/null +++ b/tests/MD_MVP/li2.xyz @@ -0,0 +1,4 @@ +2 + +Li 1.33 0.0 0.0 +Li -1.33 0.0 0.0 diff --git a/tests/MD_MVP/md.cfg b/tests/MD_MVP/md.cfg new file mode 100644 index 00000000..83d5aa4c --- /dev/null +++ b/tests/MD_MVP/md.cfg @@ -0,0 +1,39 @@ +verbosity=0 +xcFunctional=LDA +FDtype=4th +[Mesh] +nx=32 +ny=32 +nz=32 +[Domain] +ox=-5. +oy=-5. +oz=-5. +lx=10. +ly=10. +lz=10. +[Potentials] +pseudopotential=pseudo.Li_ONCVPSP_LDA +[Run] +type=MD +[MD] +num_steps=5 +dt=15. +[Quench] +solver=PSD +max_steps=30 +atol=1.e-8 +[Orbitals] +nempty=1 +initial_type=random +temperature=300. +[ProjectedMatrices] +solver=exact +[DensityMatrix] +solver=MVP +nb_inner_it=1 +mixing=1. +[Restart] +input_filename=wave.out +input_level=3 +output_level=3 diff --git a/tests/MD_MVP/quench.cfg b/tests/MD_MVP/quench.cfg new file mode 100644 index 00000000..135dbde7 --- /dev/null +++ b/tests/MD_MVP/quench.cfg @@ -0,0 +1,34 @@ +verbosity=0 +xcFunctional=LDA +FDtype=4th +[Mesh] +nx=32 +ny=32 +nz=32 +[Domain] +ox=-5. +oy=-5. +oz=-5. +lx=10. +ly=10. +lz=10. +[Potentials] +pseudopotential=pseudo.Li_ONCVPSP_LDA +[Run] +type=QUENCH +[Quench] +solver=PSD +max_steps=120 +atol=1.e-8 +[Orbitals] +nempty=1 +initial_type=random +temperature=300. +[ProjectedMatrices] +solver=exact +[DensityMatrix] +solver=MVP +nb_inner_it=1 +mixing=1. +[Restart] +output_level=3 diff --git a/tests/MD_MVP/test.py b/tests/MD_MVP/test.py new file mode 100755 index 00000000..489c9c8c --- /dev/null +++ b/tests/MD_MVP/test.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python +import sys +import os +import subprocess +import string +import shutil + +print("Test MD with MVP solver...") + +nargs=len(sys.argv) + +mpicmd = sys.argv[1]+" "+sys.argv[2]+" "+sys.argv[3] +for i in range(4,nargs-5): + mpicmd = mpicmd + " "+sys.argv[i] +print("MPI run command: {}".format(mpicmd)) + +exe = sys.argv[nargs-5] +inp1 = sys.argv[nargs-4] +inp2 = sys.argv[nargs-3] +coords = sys.argv[nargs-2] +print("coordinates file: %s"%coords) + +#create links to potentials files +dst = 'pseudo.Li_ONCVPSP_LDA' +src = sys.argv[-1] + '/' + dst + +if not os.path.exists(dst): + print("Create link to %s"%dst) + os.symlink(src, dst) + +#run quench +command = "{} {} -c {} -i {}".format(mpicmd,exe,inp1,coords) +print("Run command: {}".format(command)) +output1 = subprocess.check_output(command,shell=True) +lines=output1.split(b'\n') + +#analyse output of quench +for line in lines: + num_matches = line.count(b'%%') + if num_matches: + print(line) + +#run MD +for i in range(2): + command = "ls -ld snapshot0* | awk '{ print $9 }' | tail -n1" + print(command) + restart_file = subprocess.check_output(command,shell=True) + restart_file=str(restart_file[:-1],'utf-8') + print(restart_file) + + os.rename(restart_file, 'wave.out') + + #run MGmol + command = "{} {} -c {} -i {}".format(mpicmd,exe,inp2,coords) + output2 = subprocess.check_output(command,shell=True) + + #remove used restart files + shutil.rmtree('wave.out') + + #analyse mgmol standard output + lines=output2.split(b'\n') + + print("Check energy conservation...") + tol = 1.e-4 + energy = 0. + count = 0 + for line in lines: + if line.count(b'%%'): + print(line) + if line.count(b'Total') and line.count(b'Energy'): + print(line) + count=count+1 + words=line.split() + + energy=eval(words[2]) + if count==1: + first_energy=energy + + if count>1 and abs(energy-first_energy)>tol: + print("ERROR Energy = {} != {}".format(energy,first_energy)) + sys.exit(1) + +#remove last restart files +command = "ls -ld snapshot0* | awk '{ print $9 }' | tail -n1" +restart_file = subprocess.check_output(command,shell=True) +restart_file=str(restart_file[:-1],'utf-8') +shutil.rmtree(restart_file) + +sys.exit(0) From bb8051d2cce7299c4baf61f863a51488476aea1a Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 2 Dec 2024 08:17:13 -0500 Subject: [PATCH 07/99] Clean up code related to DM restart data (#292) --- src/DensityMatrix.cc | 58 +++++- src/DensityMatrix.h | 10 +- src/ExtendedGridOrbitals.cc | 28 +-- src/ExtendedGridOrbitals.h | 3 +- src/HDFrestart.cc | 8 +- src/HDFrestart.h | 32 ++-- src/LBFGS_IonicStepper.cc | 13 +- src/LocGridOrbitals.cc | 27 +-- src/LocGridOrbitals.h | 3 +- src/MGmol.h | 2 +- src/ProjectedMatrices.cc | 106 +---------- src/ProjectedMatrices.h | 11 +- src/ProjectedMatricesInterface.h | 10 +- src/hdf_tools.cc | 292 ++++++++++++++++++++----------- src/hdf_tools.h | 6 +- src/md.cc | 31 ++-- src/restart.cc | 7 +- tests/CMakeLists.txt | 5 +- 18 files changed, 338 insertions(+), 314 deletions(-) diff --git a/src/DensityMatrix.cc b/src/DensityMatrix.cc index e2c7b473..78d7008e 100644 --- a/src/DensityMatrix.cc +++ b/src/DensityMatrix.cc @@ -12,6 +12,8 @@ #include "DistMatrix.h" #include "MGmol_MPI.h" #include "ReplicatedMatrix.h" +#include "ReplicatedWorkSpace.h" +#include "hdf_tools.h" #include #include @@ -22,25 +24,28 @@ const double factor_kernel4dot = 10.; #define PROCRUSTES 0 +#define MGMOL_DENSITYMATRIX_FAIL(X) \ + { \ + std::cerr << "DensityMatrix failure:" << std::endl; \ + std::cerr << "Error Message: " << X << std::endl; \ + } + // occupations in [0,1] // DM eigenvalues in [0,orbital_occupation] template DensityMatrix::DensityMatrix(const int ndim) + : dim_(ndim), + orbitals_index_(-1), + occ_uptodate_(false), + uniform_occ_(false), + stripped_(false) { assert(ndim > 0); - dim_ = ndim; - - occ_uptodate_ = false; - stripped_ = false; - uniform_occ_ = false; - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); orbital_occupation_ = mmpi.nspin() > 1 ? 1. : 2.; - orbitals_index_ = -1; - dm_ = new MatrixType("DM", ndim, ndim); kernel4dot_ = new MatrixType("K4dot", ndim, ndim); work_ = new MatrixType("work", ndim, ndim); @@ -438,6 +443,43 @@ void DensityMatrix::mix( orbitals_index_ = new_orbitals_index; } +template +int DensityMatrix::write(HDFrestart& h5f_file, std::string& name) +{ + ReplicatedWorkSpace& wspace( + ReplicatedWorkSpace::instance()); + + wspace.initSquareMatrix(*dm_); + + DISTMATDTYPE* work_matrix = wspace.square_matrix(); + + hid_t file_id = h5f_file.file_id(); + return mgmol_tools::write_matrix(file_id, name, work_matrix, dim_); +} + +template +int DensityMatrix::read(HDFrestart& h5f_file, std::string& name) +{ + ReplicatedWorkSpace& wspace( + ReplicatedWorkSpace::instance()); + DISTMATDTYPE* work_matrix = wspace.square_matrix(); + + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + + int ierr = 0; + if (mmpi.instancePE0()) + { + hid_t file_id = h5f_file.file_id(); + ierr = mgmol_tools::read_matrix(file_id, name, work_matrix); + } + mmpi.bcast(&ierr, 1); + + if (ierr >= 0) wspace.mpiBcastSquareMatrix(); + if (ierr >= 0) initMatrix(work_matrix); + + return ierr; +} + template class DensityMatrix>; #ifdef HAVE_MAGMA template class DensityMatrix; diff --git a/src/DensityMatrix.h b/src/DensityMatrix.h index 84804a86..1d09e79e 100644 --- a/src/DensityMatrix.h +++ b/src/DensityMatrix.h @@ -10,6 +10,7 @@ #ifndef MGMOL_DENSITYMATRIX_H #define MGMOL_DENSITYMATRIX_H +#include "HDFrestart.h" #include "MGmol_MPI.h" #include "global.h" @@ -23,7 +24,7 @@ template class DensityMatrix { - int dim_; + const int dim_; std::vector occupation_; MatrixType* dm_; @@ -35,6 +36,10 @@ class DensityMatrix bool occ_uptodate_; bool uniform_occ_; bool stripped_; + + /*! + * Max. occupation of an orbital: 1 with spin, 2 otherwise + */ double orbital_occupation_; DensityMatrix(); @@ -146,6 +151,9 @@ class DensityMatrix double getExpectation(const MatrixType& A); void mix( const double mix, const MatrixType& matA, const int new_orbitals_index); + + int write(HDFrestart& h5f_file, std::string& name); + int read(HDFrestart& h5f_file, std::string& name); }; #endif diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 12e5b04c..19c0d26b 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -584,7 +584,6 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) Control& ct = *(Control::instance()); - hid_t file_id = h5f_file.file_id(); std::string name = "Function"; int ierr = read_func_hdf5(h5f_file, name); if (ierr < 0) @@ -603,7 +602,7 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) // Read DM if (!ct.fullyOccupied()) { - ierr = proj_matrices_->read_dm_hdf5(file_id); + ierr = proj_matrices_->readDM(h5f_file); if (ierr < 0) { (*MPIdata::serr) @@ -618,28 +617,7 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) return ierr; } -int ExtendedGridOrbitals::write_hdf5( - HDFrestart& h5f_file, const std::string& name) -{ - assert(proj_matrices_ != nullptr); - Control& ct = *(Control::instance()); - - if (!ct.fullyOccupied()) - { - MGmol_MPI& mmpi(*(MGmol_MPI::instance())); - mmpi.barrier(); - - int ierr = proj_matrices_->writeDM_hdf5(h5f_file); - if (ierr < 0) return ierr; - } - - int ierr = write_func_hdf5(h5f_file, name); - - return ierr; -} - -int ExtendedGridOrbitals::write_func_hdf5( - HDFrestart& h5f_file, const std::string& name) +int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) { if (onpe0) (*MPIdata::sout) << "ExtendedGridOrbitals::write_func_hdf5()...\n"; @@ -829,7 +807,7 @@ int ExtendedGridOrbitals::read_func_hdf5( const std::string datasetname(getDatasetName(name, icolor)); // check if dataset exists... - int err_id = h5f_file.dset_exists(datasetname); + int err_id = h5f_file.checkDataExists(datasetname); if (h5f_file.gatherDataX()) mmpi.bcast(&err_id, 1); if (err_id == 0) break; // dataset does not exists diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index fb7a4b42..c1a718d0 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -374,8 +374,7 @@ class ExtendedGridOrbitals : public Orbitals void multiplyByMatrix2states(const int st1, const int st2, const double* mat, ExtendedGridOrbitals& product); - int write_hdf5(HDFrestart& h5f_file, const std::string& name = "Function"); - int write_func_hdf5(HDFrestart&, const std::string& name = "Function"); + int write(HDFrestart&, const std::string& name = "Function"); int read_hdf5(HDFrestart& h5f_file); int read_func_hdf5(HDFrestart&, const std::string& name = "Function"); diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index f95292ac..08018d47 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -933,12 +933,12 @@ int HDFrestart::getLRCenters(std::multimap& centers, std::string datasetname(getDatasetName(name, color)); - int err_id = dset_exists(datasetname); + int err_id = checkDataExistsLocal(datasetname); if (err_id == 0) { // dataset does not exists // try older version datasetname = getDatasetName_old(name, color); - err_id = dset_exists(datasetname); + err_id = checkDataExistsLocal(datasetname); } if (err_id == 0) @@ -1052,12 +1052,12 @@ int HDFrestart::getLRs(std::shared_ptr lrs, std::string datasetname(getDatasetName(name, color)); - int err_id = dset_exists(datasetname); + int err_id = checkDataExistsLocal(datasetname); if (err_id == 0) { // dataset does not exists // try older version datasetname = getDatasetName_old(name, color); - err_id = dset_exists(datasetname); + err_id = checkDataExistsLocal(datasetname); } if (err_id == 0) { // dataset does not exists diff --git a/src/HDFrestart.h b/src/HDFrestart.h index fef634c2..7ffadff7 100644 --- a/src/HDFrestart.h +++ b/src/HDFrestart.h @@ -155,28 +155,30 @@ class HDFrestart return 0; } - hid_t dset_exists(const std::string& datasetname) const + hid_t checkDataExists(const std::string& datasetname) const { - if (active_) - { - return dset_exists(datasetname.c_str()); - } - return 0; + return checkDataExists(datasetname.c_str()); } - herr_t dset_exists(const char* const datasetname) const + herr_t checkDataExists(const char* const datasetname) const + { + herr_t err_id = checkDataExistsLocal(datasetname); + + short id = (short)err_id; + MPI_Bcast(&id, 1, MPI_SHORT, 0, comm_data_); + return (herr_t)id; + } + + hid_t checkDataExistsLocal(const std::string& datasetname) const + { + return checkDataExistsLocal(datasetname.c_str()); + } + + herr_t checkDataExistsLocal(const char* const datasetname) const { herr_t err_id = 0; if (active_) { err_id = H5LTfind_dataset(file_id_, datasetname); - - if (err_id < 0) - { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart::dset_exists() failed for dataset " - << datasetname << std::endl; - } } return err_id; } diff --git a/src/LBFGS_IonicStepper.cc b/src/LBFGS_IonicStepper.cc index caea5f84..93ce03f5 100644 --- a/src/LBFGS_IonicStepper.cc +++ b/src/LBFGS_IonicStepper.cc @@ -328,11 +328,11 @@ int LBFGS_IonicStepper::read_lbfgs(HDFrestart& h5f_file) short check_data = 0; std::vector attr_d(16); + // Open an existing dataset. + std::string datasetname("/LBFGS"); + int err_id = h5f_file.checkDataExists(datasetname); if (onpe0) { - // Open an existing dataset. - std::string datasetname("/LBFGS"); - int err_id = h5f_file.dset_exists(datasetname); if (err_id < 0) { // dataset does not exists (*MPIdata::sout) << "Warning: no dataset " << datasetname @@ -343,10 +343,9 @@ int LBFGS_IonicStepper::read_lbfgs(HDFrestart& h5f_file) dataset_id = H5Dopen2(file_id, "/LBFGS", H5P_DEFAULT); if (dataset_id < 0) { - if (onpe0) - (*MPIdata::sout) << "Warning: H5Dopen failed for /LBFGS-> " - "no restart info for LBFGS" - << std::endl; + (*MPIdata::sout) << "Warning: H5Dopen failed for /LBFGS-> " + "no restart info for LBFGS" + << std::endl; } else { diff --git a/src/LocGridOrbitals.cc b/src/LocGridOrbitals.cc index ae8a4d1d..4a1b9442 100644 --- a/src/LocGridOrbitals.cc +++ b/src/LocGridOrbitals.cc @@ -966,7 +966,6 @@ int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) Control& ct = *(Control::instance()); - hid_t file_id = h5f_file.file_id(); std::string name = "Function"; int ierr = read_func_hdf5(h5f_file, name); if (ierr < 0) @@ -984,7 +983,7 @@ int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) // Read DM if (!ct.fullyOccupied()) { - ierr = proj_matrices_->read_dm_hdf5(file_id); + ierr = proj_matrices_->readDM(h5f_file); if (ierr < 0) { (*MPIdata::serr) @@ -997,27 +996,7 @@ int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) return ierr; } -int LocGridOrbitals::write_hdf5(HDFrestart& h5f_file, const std::string& name) -{ - assert(proj_matrices_ != nullptr); - Control& ct = *(Control::instance()); - - if (!ct.fullyOccupied()) - { - MGmol_MPI& mmpi(*(MGmol_MPI::instance())); - mmpi.barrier(); - - int ierr = proj_matrices_->writeDM_hdf5(h5f_file); - if (ierr < 0) return ierr; - } - - int ierr = write_func_hdf5(h5f_file, name); - - return ierr; -} - -int LocGridOrbitals::write_func_hdf5( - HDFrestart& h5f_file, const std::string& name) +int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) { Control& ct = *(Control::instance()); hid_t file_id = h5f_file.file_id(); @@ -1249,7 +1228,7 @@ int LocGridOrbitals::read_func_hdf5( const std::string key(itcenter->first); // checkif dataset exists... - int err_id = h5f_file.dset_exists(key); + int err_id = h5f_file.checkDataExistsLocal(key); if (h5f_file.gatherDataX()) mmpi.bcast(&err_id, 1); if (err_id == 0) break; // dataset does not exists diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index 5616c6b6..951e418f 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -410,8 +410,7 @@ class LocGridOrbitals : public Orbitals void multiplyByMatrix2states(const int st1, const int st2, const double* mat, LocGridOrbitals& product); - int write_hdf5(HDFrestart& h5f_file, const std::string& name = "Function"); - int write_func_hdf5(HDFrestart&, const std::string& name = "Function"); + int write(HDFrestart&, const std::string& name = "Function"); int read_hdf5(HDFrestart& h5f_file); int read_func_hdf5(HDFrestart&, const std::string& name = "Function"); diff --git a/src/MGmol.h b/src/MGmol.h index aee2b660..4d82dd64 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -130,7 +130,7 @@ class MGmol : public MGmolInterface KBPsiMatrixSparse* kbpsi, dist_matrix::DistMatrix& hij); void computeHnlPhiAndAdd2HPhi(Ions& ions, OrbitalsType& phi, OrbitalsType& hphi, const KBPsiMatrixSparse* const kbpsi); - int dumpMDrestartFile(OrbitalsType** orbitals, Ions& ions, + int dumpMDrestartFile(OrbitalsType& orbitals, Ions& ions, Rho& rho, const bool write_extrapolated_wf, const short count); diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index 3606aa40..e50e98bc 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -26,6 +26,7 @@ #include "SparseDistMatrix.h" #include "SquareSubMatrix2DistMatrix.h" #include "fermi.h" +#include "hdf_tools.h" #include #include @@ -732,114 +733,23 @@ double ProjectedMatrices::checkCond( (*MPIdata::sout) << " CONDITION NUMBER OF THE OVERLAP MATRIX EXCEEDS TOL: " << rcond << "!!!" << std::endl; - Control& ct = *(Control::instance()); if (flag) mmpi.abort(); } return rcond; } -////// TEMPLATE THIS FOR FLOAT OPTION ?? template -int ProjectedMatrices::writeDM_hdf5(HDFrestart& h5f_file) +int ProjectedMatrices::writeDM(HDFrestart& h5f_file) { - hid_t file_id = h5f_file.file_id(); - - ReplicatedWorkSpace& wspace( - ReplicatedWorkSpace::instance()); - - wspace.initSquareMatrix(dm_->getMatrix()); - - if (file_id < 0) return 0; - - hsize_t dims[2] = { dim_, dim_ }; - - // filespace identifier - hid_t dataspace = H5Screate_simple(2, dims, nullptr); - - hid_t dset_id = H5Dcreate2(file_id, "/Density_Matrix", H5T_NATIVE_DOUBLE, - dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (dset_id < 0) - { - (*MPIdata::serr) << "ProjectedMatrices::write_dm_hdf5: " - "H5Dcreate2 failed!!!" - << std::endl; - return -1; - } - - hid_t memspace = dataspace; - hid_t filespace = dataspace; - - DISTMATDTYPE* work_matrix = wspace.square_matrix(); - herr_t status = H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, - H5P_DEFAULT, work_matrix); - if (status < 0) - { - (*MPIdata::serr) << "Orbitals: H5Dwrite failed!!!" << std::endl; - return -1; - } - - status = H5Dclose(dset_id); - if (status < 0) - { - (*MPIdata::serr) << "ProjectedMatrices::write_dm_hdf5(), " - "H5Dclose failed!!!" - << std::endl; - return -1; - } - status = H5Sclose(dataspace); - if (status < 0) - { - (*MPIdata::serr) << "ProjectedMatrices::write_dm_hdf5(), " - "H5Sclose failed!!!" - << std::endl; - return -1; - } - - return 0; + std::string name("/Density_Matrix"); + return dm_->write(h5f_file, name); } -////// TEMPLATE THIS FOR FLOAT OPTION ?? + template -int ProjectedMatrices::read_dm_hdf5(hid_t file_id) +int ProjectedMatrices::readDM(HDFrestart& h5f_file) { - ReplicatedWorkSpace& wspace( - ReplicatedWorkSpace::instance()); - DISTMATDTYPE* work_matrix = wspace.square_matrix(); - - int ierr = 0; - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); - if (mmpi.instancePE0()) - { - hid_t dset_id = H5Dopen2(file_id, "/Density_Matrix", H5P_DEFAULT); - if (dset_id < 0) - { - (*MPIdata::serr) - << "H5Dopen failed for /Density_Matrix!!!" << std::endl; - } - else - { - ierr = 1; - herr_t status = H5Dread(dset_id, H5T_NATIVE_DOUBLE, H5S_ALL, - H5S_ALL, H5P_DEFAULT, work_matrix); - if (status < 0) - { - (*MPIdata::serr) - << "H5Dread failed for /Density_Matrix!!!" << std::endl; - return -1; - } - - status = H5Dclose(dset_id); - if (status < 0) - { - (*MPIdata::serr) << "H5Dclose failed!!!" << std::endl; - return -1; - } - } - } - mmpi.bcast(&ierr, 1); - if (ierr >= 0) wspace.mpiBcastSquareMatrix(); - if (ierr >= 0) dm_->initMatrix(work_matrix); - - return ierr; + std::string name("/Density_Matrix"); + return dm_->read(h5f_file, name); } template diff --git a/src/ProjectedMatrices.h b/src/ProjectedMatrices.h index 6725b827..9bc5e3c2 100644 --- a/src/ProjectedMatrices.h +++ b/src/ProjectedMatrices.h @@ -93,7 +93,14 @@ class ProjectedMatrices : public ProjectedMatricesInterface std::unique_ptr matHB_; std::unique_ptr matH_; + /*! + * Density Matrix + */ std::unique_ptr> dm_; + + /*! + * Gram matrix of orbitals overlaps + */ std::unique_ptr> gm_; // work matrix for tmp usage @@ -311,8 +318,8 @@ class ProjectedMatrices : public ProjectedMatricesInterface double computeEntropy() override; double computeEntropyWithCheb(const double kbt); double checkCond(const double tol, const bool flag = true) override; - int writeDM_hdf5(HDFrestart& h5f_file) override; - int read_dm_hdf5(hid_t file_id) override; + int writeDM(HDFrestart& h5f_file) override; + int readDM(HDFrestart& h5f_file) override; void printEigenvalues(std::ostream& os) const; void updateDM(const int iterative_index) override; void updateDMwithEigenstates(const int iterative_index); diff --git a/src/ProjectedMatricesInterface.h b/src/ProjectedMatricesInterface.h index 653b386d..e02e0b45 100644 --- a/src/ProjectedMatricesInterface.h +++ b/src/ProjectedMatricesInterface.h @@ -265,19 +265,19 @@ class ProjectedMatricesInterface : public ChebyshevApproximationFunction exitWithErrorMessage("updateDMwithRelax"); } - virtual int read_dm_hdf5(hid_t file_id) + virtual int readDM(HDFrestart& h5f_file) { - (void)file_id; + (void)h5f_file; - exitWithErrorMessage("read_dm_hdf5"); + exitWithErrorMessage("readDM"); return 0; } - virtual int writeDM_hdf5(HDFrestart& h5f_file) + virtual int writeDM(HDFrestart& h5f_file) { (void)h5f_file; - exitWithErrorMessage("writeDM_hdf5"); + exitWithErrorMessage("writeDM"); return 0; } diff --git a/src/hdf_tools.cc b/src/hdf_tools.cc index 69a105dc..e56c0478 100644 --- a/src/hdf_tools.cc +++ b/src/hdf_tools.cc @@ -14,12 +14,16 @@ #include #include -using namespace std; +#define MGMOL_HDF5_FAIL(X) \ + { \ + std::cerr << "MGMOL_HDF5 failure:" << std::endl; \ + std::cerr << "Error Message: " << X << std::endl; \ + } namespace mgmol_tools { -void write1d( - hid_t file_id, const string& datasetname, vector& data, size_t length) +void write1d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t length) { assert(file_id >= 0); @@ -31,7 +35,7 @@ void write1d( hid_t dataspace_id = H5Screate_simple(1, &dim, nullptr); if (dataspace_id < 0) { - cerr << "write1d(), H5Screate_simple failed!!!" << endl; + std::cerr << "write1d(), H5Screate_simple failed!!!" << std::endl; return; } @@ -40,7 +44,7 @@ void write1d( dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dataset_id < 0) { - cerr << "write1d(), H5Dcreate2 failed!!!" << endl; + std::cerr << "write1d(), H5Dcreate2 failed!!!" << std::endl; return; } H5Sclose(dataspace_id); @@ -49,24 +53,24 @@ void write1d( dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data[0]); if (status < 0) { - cerr << "write1d(), H5Dwrite failed!!!" << endl; + std::cerr << "write1d(), H5Dwrite failed!!!" << std::endl; return; } status = H5Dclose(dataset_id); if (status < 0) { - cerr << "write1d(), H5Dclose failed!!!" << endl; + std::cerr << "write1d(), H5Dclose failed!!!" << std::endl; return; } } -void write2d( - hid_t file_id, const string& datasetname, vector& data, size_t* dims) +void write2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims) { assert(file_id >= 0); - // cout<<"Write "<& data, size_t* dims) +void write2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims) { assert(file_id >= 0); - // cout<<"Write "<& data, - size_t* dims) +void write2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims) { assert(file_id >= 0); - // cout<<"Write "<& data, hid_t dataspace_id = H5Screate_simple(2, dimsm, nullptr); if (dataspace_id < 0) { - cerr << "write2d(), H5Screate_simple failed!!!" << endl; + std::cerr << "write2d(), H5Screate_simple failed!!!" << std::endl; return; } @@ -175,7 +179,7 @@ void write2d(hid_t file_id, const string& datasetname, vector& data, H5T_NATIVE_DOUBLE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dataset_id < 0) { - cerr << "write2d(), H5Dcreate2 failed!!!" << endl; + std::cerr << "write2d(), H5Dcreate2 failed!!!" << std::endl; return; } H5Sclose(dataspace_id); @@ -184,28 +188,28 @@ void write2d(hid_t file_id, const string& datasetname, vector& data, dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data[0]); if (status < 0) { - cerr << "write2d(), H5Dwrite failed!!!" << endl; + std::cerr << "write2d(), H5Dwrite failed!!!" << std::endl; return; } status = H5Dclose(dataset_id); if (status < 0) { - cerr << "write2d(), H5Dclose failed!!!" << endl; + std::cerr << "write2d(), H5Dclose failed!!!" << std::endl; return; } } -void write2d(hid_t file_id, const string& datasetname, vector& data, - size_t* dims) +void write2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims) { assert(file_id >= 0); - // create type for strings of length IonData_MaxStrLength + // create type for std::strings of length IonData_MaxStrLength hid_t strtype = H5Tcopy(H5T_C_S1); H5Tset_size(strtype, IonData_MaxStrLength); - // cout<<"Write "<& data, hid_t dataspace_id = H5Screate_simple(2, dimsm, nullptr); if (dataspace_id < 0) { - cerr << "write2d(), H5Screate_simple failed!!!" << endl; + std::cerr << "write2d(), H5Screate_simple failed!!!" << std::endl; return; } @@ -224,14 +228,15 @@ void write2d(hid_t file_id, const string& datasetname, vector& data, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); if (dataset_id < 0) { - cerr << "write2d(), H5Dcreate2 failed!!!" << endl; + std::cerr << "write2d(), H5Dcreate2 failed!!!" << std::endl; return; } H5Sclose(dataspace_id); // First copy the contents of the vector into a temporary container - vector tc; - for (vector::const_iterator i = data.begin(), end = data.end(); + std::vector tc; + for (std::vector::const_iterator i = data.begin(), + end = data.end(); i != end; ++i) { FixedLengthString t; @@ -239,7 +244,7 @@ void write2d(hid_t file_id, const string& datasetname, vector& data, tc.push_back(t); } - string attname("String_Length"); + std::string attname("String_Length"); hsize_t dimsA[1] = { 1 }; hid_t dataspaceA_id = H5Screate_simple(1, dimsA, nullptr); hid_t attribute_id = H5Acreate2(dataset_id, attname.c_str(), H5T_NATIVE_INT, @@ -248,15 +253,15 @@ void write2d(hid_t file_id, const string& datasetname, vector& data, = H5Awrite(attribute_id, H5T_NATIVE_USHORT, &IonData_MaxStrLength); if (status < 0) { - cerr << "write2d(), Attribute: " << attname << " --- H5Awrite failed!!!" - << endl; + std::cerr << "write2d(), Attribute: " << attname + << " --- H5Awrite failed!!!" << std::endl; } status = H5Dwrite(dataset_id, strtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tc[0]); if (status < 0) { - cerr << "write2d(), H5Dwrite failed!!!" << endl; + std::cerr << "write2d(), H5Dwrite failed!!!" << std::endl; return; } @@ -265,25 +270,25 @@ void write2d(hid_t file_id, const string& datasetname, vector& data, status = H5Sclose(dataspaceA_id); if (status < 0) { - cerr << "write2d(), H5Sclose failed!!!" << endl; + std::cerr << "write2d(), H5Sclose failed!!!" << std::endl; } status = H5Aclose(attribute_id); if (status < 0) { - cerr << "write2d(), H5Aclose failed!!!" << endl; + std::cerr << "write2d(), H5Aclose failed!!!" << std::endl; } status = H5Dclose(dataset_id); if (status < 0) { - cerr << "write2d(), H5Dclose failed!!!" << endl; + std::cerr << "write2d(), H5Dclose failed!!!" << std::endl; } } #ifdef MGMOL_USE_HDF5P -void parallelWrite2d(hid_t file_id, const string& datasetname, - vector& data, size_t* dims, MPI_Comm comm) +void parallelWrite2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims, MPI_Comm comm) { assert(file_id >= 0); assert(!data.empty()); @@ -298,15 +303,17 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, hid_t filespace = H5Screate_simple(2, dimsf, nullptr); if (filespace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" + << std::endl; return; } hid_t memspace = H5Screate_simple(2, dimsm, nullptr); if (memspace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" + << std::endl; return; } @@ -317,8 +324,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); if (dset_id < 0) { - cerr << "parallelWrite2d() for dataset " << datasetname - << ", H5Dcreate2() failed!!!" << endl; + std::cerr << "parallelWrite2d() for dataset " << datasetname + << ", H5Dcreate2() failed!!!" << std::endl; return; } H5Pclose(plist_id); @@ -333,7 +340,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, filespace, H5S_SELECT_SET, offset, stride, count, block); if (status < 0) { - cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" + << std::endl; return; } @@ -345,7 +353,7 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, dset_id, H5T_NATIVE_INT, memspace, filespace, plist_id, &data[0]); if (status < 0) { - cerr << "parallelWrite2d(), H5Dwrite failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Dwrite failed!!!" << std::endl; return; } @@ -355,8 +363,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, H5Sclose(memspace); } -void parallelWrite2d(hid_t file_id, const string& datasetname, - vector& data, size_t* dims, MPI_Comm comm) +void parallelWrite2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims, MPI_Comm comm) { assert(file_id >= 0); assert(!data.empty()); @@ -371,15 +379,17 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, hid_t filespace = H5Screate_simple(2, dimsf, nullptr); if (filespace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" + << std::endl; return; } hid_t memspace = H5Screate_simple(2, dimsm, nullptr); if (memspace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" + << std::endl; return; } @@ -390,8 +400,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); if (dset_id < 0) { - cerr << "parallelWrite2d() for dataset " << datasetname - << ", H5Dcreate2() failed!!!" << endl; + std::cerr << "parallelWrite2d() for dataset " << datasetname + << ", H5Dcreate2() failed!!!" << std::endl; return; } H5Pclose(plist_id); @@ -406,7 +416,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, filespace, H5S_SELECT_SET, offset, stride, count, block); if (status < 0) { - cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" + << std::endl; return; } @@ -418,7 +429,7 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, dset_id, H5T_NATIVE_USHORT, memspace, filespace, plist_id, &data[0]); if (status < 0) { - cerr << "parallelWrite2d(), H5Dwrite failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Dwrite failed!!!" << std::endl; return; } @@ -428,8 +439,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, H5Sclose(memspace); } -void parallelWrite2d(hid_t file_id, const string& datasetname, - vector& data, size_t* dims, MPI_Comm comm) +void parallelWrite2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims, MPI_Comm comm) { assert(file_id >= 0); assert(!data.empty()); @@ -444,15 +455,17 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, hid_t filespace = H5Screate_simple(2, dimsf, nullptr); if (filespace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" + << std::endl; return; } hid_t memspace = H5Screate_simple(2, dimsm, nullptr); if (memspace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" + << std::endl; return; } @@ -463,8 +476,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); if (dset_id < 0) { - cerr << "parallelWrite2d() for dataset " << datasetname - << ", H5Dcreate2() failed!!!" << endl; + std::cerr << "parallelWrite2d() for dataset " << datasetname + << ", H5Dcreate2() failed!!!" << std::endl; return; } H5Pclose(plist_id); @@ -479,7 +492,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, filespace, H5S_SELECT_SET, offset, stride, count, block); if (status < 0) { - cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" + << std::endl; return; } @@ -491,7 +505,7 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, plist_id, &data[0]); if (status < 0) { - cerr << "parallelWrite2d(), H5Dwrite failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Dwrite failed!!!" << std::endl; return; } @@ -501,13 +515,13 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, H5Sclose(memspace); } -void parallelWrite2d(hid_t file_id, const string& datasetname, - vector& data, size_t* dims, MPI_Comm comm) +void parallelWrite2d(hid_t file_id, const std::string& datasetname, + std::vector& data, size_t* dims, MPI_Comm comm) { assert(file_id >= 0); assert(!data.empty()); - // create type for strings of length IonData_MaxStrLength + // create type for std::strings of length IonData_MaxStrLength hid_t strtype = H5Tcopy(H5T_C_S1); H5Tset_size(strtype, IonData_MaxStrLength); @@ -521,15 +535,17 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, hid_t filespace = H5Screate_simple(2, dimsf, nullptr); if (filespace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for filespace!!!" + << std::endl; return; } hid_t memspace = H5Screate_simple(2, dimsm, nullptr); if (memspace < 0) { - cerr << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" - << endl; + std::cerr + << "parallelWrite2d(), H5Screate_simple failed for memspace!!!" + << std::endl; return; } @@ -540,8 +556,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, H5P_DEFAULT, plist_id, H5P_DEFAULT); if (dset_id < 0) { - cerr << "parallelWrite2d() for dataset " << datasetname - << ", H5Dcreate2() failed!!!" << endl; + std::cerr << "parallelWrite2d() for dataset " << datasetname + << ", H5Dcreate2() failed!!!" << std::endl; return; } H5Pclose(plist_id); @@ -556,7 +572,8 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, filespace, H5S_SELECT_SET, offset, stride, count, block); if (status < 0) { - cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Sselect_hyperslab() failed!!!" + << std::endl; return; } @@ -565,8 +582,9 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); // First copy the contents of the vector into a temporary container - vector tc; - for (vector::const_iterator i = data.begin(), end = data.end(); + std::vector tc; + for (std::vector::const_iterator i = data.begin(), + end = data.end(); i != end; ++i) { FixedLengthString t; @@ -576,7 +594,7 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, status = H5Dwrite(dset_id, strtype, memspace, filespace, plist_id, &tc[0]); if (status < 0) { - cerr << "parallelWrite2d(), H5Dwrite failed!!!" << endl; + std::cerr << "parallelWrite2d(), H5Dwrite failed!!!" << std::endl; return; } @@ -589,7 +607,7 @@ void parallelWrite2d(hid_t file_id, const string& datasetname, #endif void addAttribute2Dataset( - hid_t dset_id, const char* attname, const vector& attr_data) + hid_t dset_id, const char* attname, const std::vector& attr_data) { assert(dset_id > -1); @@ -600,30 +618,30 @@ void addAttribute2Dataset( hid_t dataspace_id = H5Screate_simple(1, &dim, nullptr); if (dataspace_id < 0) { - cerr << "H5Screate failed!!!" << endl; + std::cerr << "H5Screate failed!!!" << std::endl; return; } hid_t attribute_id = H5Acreate2(dset_id, attname, H5T_NATIVE_DOUBLE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); if (attribute_id < 0) { - cerr << "H5Acreate failed!!!" << endl; + std::cerr << "H5Acreate failed!!!" << std::endl; return; } herr_t status = H5Sclose(dataspace_id); - if (status < 0) cerr << "H5Sclose failed!!!" << endl; + if (status < 0) std::cerr << "H5Sclose failed!!!" << std::endl; //(*MPIdata::sout)<<"Write attribute "<& attr_data) + hid_t dset_id, const char* attname, const std::vector& attr_data) { assert(dset_id > -1); @@ -634,21 +652,21 @@ void addAttribute2Dataset( hid_t dataspace_id = H5Screate_simple(1, &dim, nullptr); if (dataspace_id < 0) { - cerr << "H5Screate failed!!!" << endl; + std::cerr << "H5Screate failed!!!" << std::endl; return; } hid_t attribute_id = H5Acreate2(dset_id, attname, H5T_NATIVE_INT, dataspace_id, H5P_DEFAULT, H5P_DEFAULT); if (attribute_id < 0) { - cerr << "H5Acreate failed!!!" << endl; + std::cerr << "H5Acreate failed!!!" << std::endl; return; } herr_t status = H5Sclose(dataspace_id); if (status < 0) { - cerr << "H5Sclose failed!!!" << endl; + std::cerr << "H5Sclose failed!!!" << std::endl; return; } @@ -656,14 +674,14 @@ void addAttribute2Dataset( status = H5Awrite(attribute_id, H5T_NATIVE_INT, &attr_data[0]); if (status < 0) { - cerr << "H5Awrite failed!!!" << endl; + std::cerr << "H5Awrite failed!!!" << std::endl; return; } status = H5Aclose(attribute_id); if (status < 0) { - cerr << "H5Aclose failed!!!" << endl; + std::cerr << "H5Aclose failed!!!" << std::endl; } } @@ -676,7 +694,7 @@ int whatisopen(hid_t fid) if (cnt <= 0) return cnt; - if (cnt > 1) cout << "HDF5 file: " << cnt << " object(s) open\n"; + if (cnt > 1) std::cout << "HDF5 file: " << cnt << " object(s) open\n"; // objs = malloc(cnt * sizeof(hid_t)); hid_t* objs = new hid_t[cnt]; @@ -699,4 +717,80 @@ int whatisopen(hid_t fid) return howmany; } + +int write_matrix( + hid_t file_id, std::string& name, const double* matrix, const int dim) +{ + if (file_id < 0) return 0; + + hsize_t dims[2] = { (hsize_t)dim, (hsize_t)dim }; + + // filespace identifier + hid_t dataspace = H5Screate_simple(2, dims, nullptr); + + hid_t dset_id = H5Dcreate2(file_id, name.c_str(), H5T_NATIVE_DOUBLE, + dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + if (dset_id < 0) + { + MGMOL_HDF5_FAIL("H5Dcreate2 failed!!!"); + return -1; + } + + hid_t memspace = dataspace; + hid_t filespace = dataspace; + + herr_t status = H5Dwrite( + dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, H5P_DEFAULT, matrix); + if (status < 0) + { + MGMOL_HDF5_FAIL("H5Dwrite failed!!!"); + return -1; + } + + status = H5Dclose(dset_id); + if (status < 0) + { + MGMOL_HDF5_FAIL("H5Dclose failed!!!"); + return -1; + } + status = H5Sclose(dataspace); + if (status < 0) + { + MGMOL_HDF5_FAIL("H5Sclose failed!!!"); + return -1; + } + + return 0; +} + +int read_matrix(hid_t file_id, std::string& name, double* matrix) +{ + int ierr = 0; + hid_t dset_id = H5Dopen2(file_id, name.c_str(), H5P_DEFAULT); + if (dset_id < 0) + { + MGMOL_HDF5_FAIL("H5Dopen failed!!"); + ierr = -1; + } + else + { + herr_t status = H5Dread( + dset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, H5P_DEFAULT, matrix); + if (status < 0) + { + MGMOL_HDF5_FAIL("H5Dread failed!!"); + ierr = -1; + } + + status = H5Dclose(dset_id); + if (status < 0) + { + MGMOL_HDF5_FAIL("H5Dclose failed!!!"); + ierr = -1; + } + } + + return ierr; } + +} // namespace diff --git a/src/hdf_tools.h b/src/hdf_tools.h index e1a0ed1f..7a64bdce 100644 --- a/src/hdf_tools.h +++ b/src/hdf_tools.h @@ -6,8 +6,8 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE - #include "hdf5.h" + #include #include #include @@ -39,4 +39,8 @@ void addAttribute2Dataset( void addAttribute2Dataset( hid_t dset_id, const char* attname, const std::vector& attr_data); int whatisopen(hid_t fid); + +int write_matrix( + hid_t file_id, std::string& name, const double* matrix, const int dim); +int read_matrix(hid_t file_id, std::string& name, double* matrix); } diff --git a/src/md.cc b/src/md.cc index 7c6e2aa8..b773c27c 100644 --- a/src/md.cc +++ b/src/md.cc @@ -226,7 +226,7 @@ void checkMaxForces(const std::vector& fion, } template -int MGmol::dumpMDrestartFile(OrbitalsType** orbitals, Ions& ions, +int MGmol::dumpMDrestartFile(OrbitalsType& orbitals, Ions& ions, Rho& rho, const bool write_extrapolated_wf, const short count) { MGmol_MPI& mmpi(*(MGmol_MPI::instance())); @@ -245,9 +245,11 @@ int MGmol::dumpMDrestartFile(OrbitalsType** orbitals, Ions& ions, HDFrestart h5file(filename, myPEenv, gdim, ct.out_restart_file_type); - OrbitalsType previous_orbitals("ForDumping", **orbitals, false); + OrbitalsType previous_orbitals("ForDumping", orbitals, false); if (!orbitals_extrapol_->getRestartData(previous_orbitals)) - previous_orbitals.assign(**orbitals); + previous_orbitals.assign(orbitals); + + // write all restart info in HDF5 file int ierr = write_hdf5(h5file, rho.rho_, ions, previous_orbitals, lrs_); mmpi.allreduce(&ierr, 1, MPI_MIN); @@ -259,12 +261,11 @@ int MGmol::dumpMDrestartFile(OrbitalsType** orbitals, Ions& ions, << std::endl; return ierr; } - // write_hdf5(h5file, rho.rho_, ions, *orbitals_minus1); - // stepper->write_hdf5(h5file); if (write_extrapolated_wf && ct.out_restart_info > 2) { - ierr = (*orbitals)->write_func_hdf5(h5file, "ExtrapolatedFunction"); + // write extra info needed for seamless MD restart + ierr = orbitals.write(h5file, "ExtrapolatedFunction"); mmpi.allreduce(&ierr, 1, MPI_MIN); if (ierr < 0) { @@ -354,16 +355,11 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) if (ct.restart_info > 1) { - int flag_extrapolated_data = 0; - if (onpe0) - { + int flag_extrapolated_data + = h5f_file_->checkDataExists("ExtrapolatedFunction0000"); + if (flag_extrapolated_data == 0) flag_extrapolated_data - = h5f_file_->dset_exists("ExtrapolatedFunction0000"); - if (flag_extrapolated_data == 0) - flag_extrapolated_data - = h5f_file_->dset_exists("ExtrapolatedFunction0"); - } - MPI_Bcast(&flag_extrapolated_data, 1, MPI_INT, 0, comm_); + = h5f_file_->checkDataExists("ExtrapolatedFunction0"); if (ct.restart_info > 2) { @@ -377,7 +373,6 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) // need to reset a few things as we just read new orbitals (*orbitals)->computeGramAndInvS(); - dm_strategy_->update(*current_orbitals_); } DFTsolver::setItCountLarge(); @@ -615,7 +610,7 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) { dump_tm_.start(); ierr = dumpMDrestartFile( - orbitals, ions, *rho_, extrapolated_flag, count); + **orbitals, ions, *rho_, extrapolated_flag, count); dump_tm_.stop(); if (onpe0 && ierr < 0 && count < (DUMP_MAX_NUM_TRY - 1)) std::cout @@ -641,7 +636,7 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) { dump_tm_.start(); ierr = dumpMDrestartFile( - orbitals, ions, *rho_, extrapolated_flag, count); + **orbitals, ions, *rho_, extrapolated_flag, count); dump_tm_.stop(); if (onpe0 && ierr < 0 && count < (DUMP_MAX_NUM_TRY - 1)) diff --git a/src/restart.cc b/src/restart.cc index 944b1d8e..c12bb258 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -163,9 +163,14 @@ int MGmol::write_hdf5(HDFrestart& h5f_file, // Write wavefunctions and old centers. if (ct.out_restart_info > 2) { - int ierr = orbitals.write_hdf5(h5f_file); + int ierr = orbitals.write(h5f_file); if (ierr < 0) return ierr; + if (!ct.fullyOccupied()) + { + int ierr = proj_matrices_->writeDM(h5f_file); + if (ierr < 0) return ierr; + } if (ct.isLocMode() && ct.WFExtrapolation() == WFExtrapolationType::Reversible) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 72378704..53793986 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -247,6 +247,8 @@ add_executable(testDensityMatrix ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc + ${CMAKE_SOURCE_DIR}/src/ReplicatedWorkSpace.cc + ${CMAKE_SOURCE_DIR}/src/hdf_tools.cc ${CMAKE_SOURCE_DIR}/tests/ut_magma_main.cc) add_executable(testEnergyAndForces ${CMAKE_SOURCE_DIR}/tests/EnergyAndForces/testEnergyAndForces.cc) @@ -552,7 +554,7 @@ target_include_directories(testConditionDistMatrix PRIVATE ${Boost_INCLUDE_DIRS} target_include_directories(testConditionDistMatrixPower PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testPower PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testPowerDistMatrix PRIVATE ${Boost_INCLUDE_DIRS}) -target_include_directories(testDensityMatrix PRIVATE ${Boost_INCLUDE_DIRS}) +target_include_directories(testDensityMatrix PRIVATE ${Boost_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS}) target_include_directories(testGramMatrix PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testAndersonMix PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testIons PRIVATE ${Boost_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS}) @@ -566,6 +568,7 @@ target_link_libraries(testEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testWFEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testDMandEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testIons PRIVATE mgmol_src) +target_link_libraries(testDensityMatrix PRIVATE ${HDF5_LIBRARIES}) if(${MAGMA_FOUND}) target_link_libraries(testDistVector PRIVATE ${SCALAPACK_LIBRARIES} From a227c04a43d32c932d9bb8e2b3a08695e9ab084f Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 2 Dec 2024 11:53:42 -0500 Subject: [PATCH 08/99] Write dm (#291) * Update use of DM in restart --- src/ProjectedMatrices.cc | 27 +++++++++++++++++++++++++++ src/ProjectedMatrices.h | 2 ++ src/ProjectedMatricesInterface.h | 16 ++++++++++++++++ src/md.cc | 18 ++++++++++++++++++ src/restart.cc | 2 +- 5 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index e50e98bc..74ff01ed 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -745,6 +745,23 @@ int ProjectedMatrices::writeDM(HDFrestart& h5f_file) return dm_->write(h5f_file, name); } +template +int ProjectedMatrices::writeSavedDM(HDFrestart& h5f_file) +{ + std::string name("/Density_Matrix_WF"); + + ReplicatedWorkSpace& wspace( + ReplicatedWorkSpace::instance()); + + const MatrixType* matrix = mat_X_old_.get(); + wspace.initSquareMatrix(*matrix); + + DISTMATDTYPE* work_matrix = wspace.square_matrix(); + + hid_t file_id = h5f_file.file_id(); + return mgmol_tools::write_matrix(file_id, name, work_matrix, dim_); +} + template int ProjectedMatrices::readDM(HDFrestart& h5f_file) { @@ -752,6 +769,16 @@ int ProjectedMatrices::readDM(HDFrestart& h5f_file) return dm_->read(h5f_file, name); } +template +int ProjectedMatrices::readWFDM(HDFrestart& h5f_file) +{ + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + mmpi.barrier(); + if (mmpi.PE0()) std::cout << "ProjectedMatrices::readWFDM..." << std::endl; + std::string name("/Density_Matrix_WF"); + return dm_->read(h5f_file, name); +} + template void ProjectedMatrices::printEigenvalues(std::ostream& os) const { diff --git a/src/ProjectedMatrices.h b/src/ProjectedMatrices.h index 9bc5e3c2..f2881763 100644 --- a/src/ProjectedMatrices.h +++ b/src/ProjectedMatrices.h @@ -319,7 +319,9 @@ class ProjectedMatrices : public ProjectedMatricesInterface double computeEntropyWithCheb(const double kbt); double checkCond(const double tol, const bool flag = true) override; int writeDM(HDFrestart& h5f_file) override; + int writeSavedDM(HDFrestart& h5f_file); int readDM(HDFrestart& h5f_file) override; + int readWFDM(HDFrestart& h5f_file); void printEigenvalues(std::ostream& os) const; void updateDM(const int iterative_index) override; void updateDMwithEigenstates(const int iterative_index); diff --git a/src/ProjectedMatricesInterface.h b/src/ProjectedMatricesInterface.h index e02e0b45..c52595e1 100644 --- a/src/ProjectedMatricesInterface.h +++ b/src/ProjectedMatricesInterface.h @@ -273,6 +273,14 @@ class ProjectedMatricesInterface : public ChebyshevApproximationFunction return 0; } + virtual int readWFDM(HDFrestart& h5f_file) + { + (void)h5f_file; + + exitWithErrorMessage("readWFDM"); + + return 0; + } virtual int writeDM(HDFrestart& h5f_file) { (void)h5f_file; @@ -281,6 +289,14 @@ class ProjectedMatricesInterface : public ChebyshevApproximationFunction return 0; } + virtual int writeSavedDM(HDFrestart& h5f_file) + { + (void)h5f_file; + + exitWithErrorMessage("writeSavedDM"); + + return 0; + } virtual void updateDMwithChebApproximation(const int iterative_index) { (void)iterative_index; diff --git a/src/md.cc b/src/md.cc index b773c27c..b50618c8 100644 --- a/src/md.cc +++ b/src/md.cc @@ -275,6 +275,10 @@ int MGmol::dumpMDrestartFile(OrbitalsType& orbitals, Ions& ions, << std::endl; return ierr; } + + // write DM associated with non-extrapolated wavefunctions + // (last computed solution of KS equations) + proj_matrices_->writeSavedDM(h5file); } ierr = h5file.close(); @@ -579,6 +583,13 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) lrs_->clearOldCenters(); } + // save DM for possible restart write + // note: extrapolation is going to modify it! + if ((ct.out_restart_info > 2) + && (((md_iteration_ % ct.checkpoint) == 0) + || (mdstep == ct.num_MD_steps))) + proj_matrices_->saveDM(); + preWFextrapolation(); if (ct.dt > 0. @@ -656,6 +667,7 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) template void MGmol::loadRestartFile(const std::string filename) { + if (onpe0) std::cout << "loadRestartFile..." << std::endl; MGmol_MPI& mmpi(*(MGmol_MPI::instance())); Control& ct = *(Control::instance()); Mesh* mymesh = Mesh::instance(); @@ -678,6 +690,12 @@ void MGmol::loadRestartFile(const std::string filename) global_exit(0); } + if (!ct.fullyOccupied()) + { + // overwrite DM with restart data in dataset Density_Matrix_WF + if (h5file.checkDataExists("Density_Matrix_WF")) + ierr = proj_matrices_->readWFDM(h5file); + } ierr = h5file.close(); mmpi.allreduce(&ierr, 1, MPI_MIN); diff --git a/src/restart.cc b/src/restart.cc index c12bb258..2fa76ecb 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -168,7 +168,7 @@ int MGmol::write_hdf5(HDFrestart& h5f_file, if (!ct.fullyOccupied()) { - int ierr = proj_matrices_->writeDM(h5f_file); + ierr = proj_matrices_->writeDM(h5f_file); if (ierr < 0) return ierr; } if (ct.isLocMode() From b263532e4f585267d8eab837abb86076bb044195 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 23 Dec 2024 15:04:37 -0500 Subject: [PATCH 09/99] Remove unused function in Control (#294) --- src/Control.cc | 72 -------------------------------------------------- src/Control.h | 1 - 2 files changed, 73 deletions(-) diff --git a/src/Control.cc b/src/Control.cc index dd786678..2272d3cb 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -1119,78 +1119,6 @@ void Control::printThermostatInfo(std::ostream& os) const } } -int Control::readOccupations(std::ifstream* tfile) -{ - int count = 0; - float nel = 0.; - do - { - float t1 = 0.; - int nst = 0; - if (mype_ == 0) - { -#ifdef DEBUG - (*MPIdata::sout) << " Occupations of states..." << std::endl; -#endif - (*tfile) >> nst; - if (nst <= 0) - { - (*MPIdata::sout) - << "Control::readOccupations: numst=" << numst << std::endl; - (*MPIdata::sout) << "Control::readOccupations: nst=" << nst - << ", count=" << count << std::endl; - (*MPIdata::sout) << "Control::readOccupations: Bad repeat " - "count for state occupations" - << std::endl; - return -1; - } - if ((count + nst) > numst) - { - (*MPIdata::sout) << "Control::readOccupations: Occupations " - "specified for too many states" - << std::endl; - return -1; - } - - (*tfile) >> t1; - if (t1 < 0.) - { - (*MPIdata::sout) - << "Control::readOccupations: occupation=" << t1 - << std::endl; - (*MPIdata::sout) << "Control::readOccupations: occupation " - "should be a positive number" - << std::endl; - return -1; - } - finishRead(*tfile); - } - int mpirc = MPI_Bcast(&nst, 1, MPI_INT, 0, comm_global_); - if (mpirc != MPI_SUCCESS) - { - (*MPIdata::sout) - << "MPI Bcast of occupation numbers failed!!!" << std::endl; - return -1; - } - mpirc = MPI_Bcast(&t1, 1, MPI_FLOAT, 0, comm_global_); - if (mpirc != MPI_SUCCESS) - { - (*MPIdata::sout) - << "MPI Bcast of occupation failed!!!" << std::endl; - return -1; - } - nel += nst * t1; - count += nst; - - } while (count < numst); - - nel_ = (int)nel; - - nempty_ = (2 * numst - (int)nel) / 2; - - return count; -} - void Control::setLocMode(const float radius, const float lx, const float ly, const float lz, const float mind_centers) { diff --git a/src/Control.h b/src/Control.h index 5134c718..2e781f79 100644 --- a/src/Control.h +++ b/src/Control.h @@ -294,7 +294,6 @@ class Control const float dtol, const short kim, const short itmax, const short lfil, const short maxfill, const short ilutype); void setSpreadRadius(); - int readOccupations(std::ifstream* tfile); bool checkTimeout() { return timeout_.check(); } bool occupationWidthIsZero() { return occ_width < 1.e-12; } From 3eb52d68f09e997f21b93de90ef7c5a1401e731c Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 3 Jan 2025 09:07:27 -0500 Subject: [PATCH 10/99] Change symlink to restart in tests (#295) * enable tests when old link present --- tests/FIRE/test.py | 6 +++++- tests/LBFGS/test.py | 8 +++++++- tests/MD_D72/test.py | 6 +++++- tests/ShortSighted/test.py | 6 +++++- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/tests/FIRE/test.py b/tests/FIRE/test.py index eaf380d9..6866e3ac 100755 --- a/tests/FIRE/test.py +++ b/tests/FIRE/test.py @@ -49,7 +49,11 @@ restart_file = subprocess.check_output(command,shell=True) restart_file=str(restart_file[:-1],'utf-8') print(restart_file) -os.symlink(restart_file, 'wave.out') +try: + os.symlink(restart_file, 'wave.out') +except FileExistsError: + os.remove('wave.out') + os.symlink(restart_file, 'wave.out') command = "{} {} -c {}".format(mpicmd,exe,inp2) print(command) diff --git a/tests/LBFGS/test.py b/tests/LBFGS/test.py index 2ff69c9a..324fd129 100755 --- a/tests/LBFGS/test.py +++ b/tests/LBFGS/test.py @@ -49,7 +49,12 @@ restart_file = subprocess.check_output(command,shell=True) restart_file=str(restart_file[:-1],'utf-8') print(restart_file) -os.symlink(restart_file, 'wave.out') + +try: + os.symlink(restart_file, 'wave.out') +except FileExistsError: + os.remove('wave.out') + os.symlink(restart_file, 'wave.out') command = "{} {} -c {} -i {} -l {}".format(mpicmd,exe,inp2,coords,lrs) print(command) @@ -75,6 +80,7 @@ os.remove('wave.out') if force>tol: + print("Force larger than tol {}".format(tol)) sys.exit(1) sys.exit(0) diff --git a/tests/MD_D72/test.py b/tests/MD_D72/test.py index a664b305..17b70de2 100755 --- a/tests/MD_D72/test.py +++ b/tests/MD_D72/test.py @@ -48,7 +48,11 @@ restart_file=str(restart_file[:-1],'utf-8') print(restart_file) -os.symlink(restart_file, 'wave.out') +try: + os.symlink(restart_file, 'wave.out') +except FileExistsError: + os.remove('wave.out') + os.symlink(restart_file, 'wave.out') command = "{} {} -c {} -i {}".format(mpicmd,exe,inp2,coords) output2 = subprocess.check_output(command,shell=True) diff --git a/tests/ShortSighted/test.py b/tests/ShortSighted/test.py index 9b731f4c..27da7f7d 100755 --- a/tests/ShortSighted/test.py +++ b/tests/ShortSighted/test.py @@ -48,7 +48,11 @@ restart_file=str(restart_file[:-1],'utf-8') print(restart_file) -os.symlink(restart_file, 'wave.out') +try: + os.symlink(restart_file, 'wave.out') +except FileExistsError: + os.remove('wave.out') + os.symlink(restart_file, 'wave.out') command = "{} {} -c {}".format(mpicmd,exe,inp2) output2 = subprocess.check_output(command,shell=True) From c1d46bc129b8b0ba532676892a08a93a7f51f5b2 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 7 Jan 2025 13:53:37 -0500 Subject: [PATCH 11/99] Extract number empty orbitals from restart file (#296) --- src/Control.h | 3 + src/HDFrestart.cc | 15 ++ src/HDFrestart.h | 3 + src/Hamiltonian.cc | 7 +- src/MVPSolver.cc | 4 +- src/setup.cc | 17 +- tests/CMakeLists.txt | 12 + tests/RestartEnergyAndForces/h2o.xyz | 6 + tests/RestartEnergyAndForces/mgmol.cfg | 33 +++ tests/RestartEnergyAndForces/restart.cfg | 32 +++ tests/RestartEnergyAndForces/test.py | 88 ++++++++ .../testRestartEnergyAndForces.cc | 206 ++++++++++++++++++ 12 files changed, 418 insertions(+), 8 deletions(-) create mode 100644 tests/RestartEnergyAndForces/h2o.xyz create mode 100644 tests/RestartEnergyAndForces/mgmol.cfg create mode 100644 tests/RestartEnergyAndForces/restart.cfg create mode 100755 tests/RestartEnergyAndForces/test.py create mode 100644 tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc diff --git a/src/Control.h b/src/Control.h index 2e781f79..b3401c1d 100644 --- a/src/Control.h +++ b/src/Control.h @@ -242,6 +242,7 @@ class Control const float total_spin, std::string run_directory = "."); void setDefaultValues(); + bool withSpin() { return with_spin_; } bool globalColoring() const { return (coloring_algo_ / 10 == 0); } @@ -263,6 +264,8 @@ class Control float getSpin() const { return total_spin_; } + void setNempty(const int nempty) { nempty_ = nempty; } + short getMGlevels() { return mg_levels_; } bool withPreconditioner() const { return (mg_levels_ >= 0); } diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index 08018d47..ef658955 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -2725,6 +2725,21 @@ void HDFrestart::gatherDataXdir(std::vector& data) } } +int HDFrestart::countFunctionObjects(std::string& name) const +{ + int count = 0; + int found = 0; + do + { + std::string datasetname(getDatasetName(name, count)); + // check if dataset exists... + found = checkDataExists(datasetname); + if (found) count++; + } while (found); // dataset exists + + return count; +} + template int HDFrestart::read_1func_hdf5(float*, const std::string&); template int HDFrestart::read_1func_hdf5(double*, const std::string&); diff --git a/src/HDFrestart.h b/src/HDFrestart.h index 7ffadff7..72c5c1f5 100644 --- a/src/HDFrestart.h +++ b/src/HDFrestart.h @@ -173,6 +173,7 @@ class HDFrestart return checkDataExistsLocal(datasetname.c_str()); } + // Returns zero (false), a positive (true) or a negative (failure) value. herr_t checkDataExistsLocal(const char* const datasetname) const { herr_t err_id = 0; @@ -282,6 +283,8 @@ class HDFrestart int getMDstepFromFile() const; int getFromFile(const std::string& attname) const; + int countFunctionObjects(std::string& name) const; + hid_t createPlist() { hid_t plist_id = H5P_DEFAULT; diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index ba0aa6c4..5a5d2a53 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -59,9 +59,9 @@ const T& Hamiltonian::applyLocal(T& phi, const bool force) if (onpe0) { (*MPIdata::sout) << "Hamiltonian::applyLocal(), new_index =" - << new_index << endl; + << new_index << std::endl; (*MPIdata::sout) << "Hamiltonian::applyLocal(), itindex_ =" - << itindex_ << endl; + << itindex_ << std::endl; } #endif if (force || new_index != itindex_) @@ -76,7 +76,8 @@ const T& Hamiltonian::applyLocal(T& phi, const bool force) if (onpe0) (*MPIdata::sout) << "Hamiltonian::hlphi up to date, itindex_=" << itindex_ - << endl; + << ", Potential index=" << pot_->getIterativeIndex() + << std::endl; #endif } return *hlphi_; diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 6115f933..7ab9f665 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -143,7 +143,6 @@ void MVPSolver::buildTarget_MVP( proj_mat_work_->setHB2H(); proj_mat_work_->updateDM(orbitals_index); - target = proj_mat_work_->dm(); if (ct.verbose > 2) @@ -286,7 +285,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) std::cout << "MVP --- Target energy..." << std::endl; proj_mat_work_->setDM(target, orbitals.getIterativeIndex()); proj_mat_work_->computeOccupationsFromDM(); - if (ct.verbose > 2) current_proj_mat->printOccupations(os_); + if (ct.verbose > 2) proj_mat_work_->printOccupations(os_); const double nel = proj_mat_work_->getNel(); if (onpe0 && ct.verbose > 1) os_ << "MVP --- Number of electrons at beta=1 : " << nel @@ -346,7 +345,6 @@ int MVPSolver::solve(OrbitalsType& orbitals) os_ << "Number of electrons for interpolated DM = " << pnel << std::endl; } - // if( onpe0 )os_<<"Rho..."<computeRho(orbitals, *work_); } diff --git a/src/setup.cc b/src/setup.cc index f022678c..9963bfc5 100644 --- a/src/setup.cc +++ b/src/setup.cc @@ -50,6 +50,7 @@ int MGmol::setupFromInput(const std::string filename) if (ct.isLocMode()) mymesh->subdivGridx(ct.getMGlevels()); const pb::PEenv& myPEenv = mymesh->peenv(); + if (ct.restart_info > 0) h5f_file_.reset( new HDFrestart(ct.restart_file, myPEenv, ct.restart_file_type)); @@ -58,8 +59,20 @@ int MGmol::setupFromInput(const std::string filename) if (status == -1) return -1; const short myspin = mmpi.myspin(); - const int nval = ions_->getNValenceElectrons(); - ct.setNumst(myspin, nval); + const int nel = ions_->getNValenceElectrons(); + // for the case of extended wavefunctions, we can determine the number + // of empty states from the number of wavefunctions in restart file + if (ct.restart_info > 2 && !ct.short_sighted) + { + std::string name = "Function"; + int count = h5f_file_->countFunctionObjects(name); + std::cout << "found " << count << " functions in restart file..." + << std::endl; + int nempty = ct.withSpin() ? count - nel : count - int(0.5 * nel); + ct.setNempty(nempty); + } + ct.setNumst(myspin, nel); + ct.setTolEnergy(); ct.setSpreadRadius(); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 53793986..b29c1aad 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -256,6 +256,8 @@ add_executable(testWFEnergyAndForces ${CMAKE_SOURCE_DIR}/tests/WFEnergyAndForces/testWFEnergyAndForces.cc) add_executable(testDMandEnergyAndForces ${CMAKE_SOURCE_DIR}/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc) +add_executable(testRestartEnergyAndForces + ${CMAKE_SOURCE_DIR}/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc) if(${MAGMA_FOUND}) add_executable(testOpenmpOffload @@ -371,6 +373,15 @@ add_test(NAME testDMandEnergyAndForces ${CMAKE_CURRENT_SOURCE_DIR}/DMandEnergyAndForces/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/DMandEnergyAndForces/lrs.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testRestartEnergyAndForces + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/RestartEnergyAndForces/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_BINARY_DIR}/testRestartEnergyAndForces + ${CMAKE_CURRENT_SOURCE_DIR}/RestartEnergyAndForces/mgmol.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/RestartEnergyAndForces/restart.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/RestartEnergyAndForces/h2o.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) if(${MAGMA_FOUND}) add_test(NAME testOpenmpOffload @@ -567,6 +578,7 @@ target_link_libraries(testDirectionalReduce PRIVATE MPI::MPI_CXX) target_link_libraries(testEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testWFEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testDMandEnergyAndForces PRIVATE mgmol_src) +target_link_libraries(testRestartEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testIons PRIVATE mgmol_src) target_link_libraries(testDensityMatrix PRIVATE ${HDF5_LIBRARIES}) diff --git a/tests/RestartEnergyAndForces/h2o.xyz b/tests/RestartEnergyAndForces/h2o.xyz new file mode 100644 index 00000000..cdc906f6 --- /dev/null +++ b/tests/RestartEnergyAndForces/h2o.xyz @@ -0,0 +1,6 @@ +3 +https://pubchem.ncbi.nlm.nih.gov/compound/Water +O 2.5369 -0.1550 0.0 +H 3.0739 0.1550 0.0 +H 2.0000 0.1550 0.0 + diff --git a/tests/RestartEnergyAndForces/mgmol.cfg b/tests/RestartEnergyAndForces/mgmol.cfg new file mode 100644 index 00000000..e590f810 --- /dev/null +++ b/tests/RestartEnergyAndForces/mgmol.cfg @@ -0,0 +1,33 @@ +verbosity=2 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=64 +ny=64 +nz=64 +[Domain] +ox=-3.4 +oy=-6.4 +oz=-6.4 +lx=12.8 +ly=12.8 +lz=12.8 +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=QUENCH +[Quench] +solver=PSD +max_steps=120 +atol=1.e-8 +[Orbitals] +initial_type=Random +initial_width=1.5 +nempty=2 +[Restart] +output_level=3 +output_filename=WF +[DensityMatrix] +solver=MVP +nb_inner_it=1 diff --git a/tests/RestartEnergyAndForces/restart.cfg b/tests/RestartEnergyAndForces/restart.cfg new file mode 100644 index 00000000..99bc77d8 --- /dev/null +++ b/tests/RestartEnergyAndForces/restart.cfg @@ -0,0 +1,32 @@ +verbosity=2 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=64 +ny=64 +nz=64 +[Domain] +ox=-3.4 +oy=-6.4 +oz=-6.4 +lx=12.8 +ly=12.8 +lz=12.8 +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=QUENCH +[Quench] +solver=PSD +max_steps=120 +atol=1.e-8 +[Orbitals] +initial_type=Random +initial_width=1.5 +[Restart] +input_level=3 +input_filename=WF +[DensityMatrix] +solver=MVP +nb_inner_it=20 diff --git a/tests/RestartEnergyAndForces/test.py b/tests/RestartEnergyAndForces/test.py new file mode 100755 index 00000000..349434e8 --- /dev/null +++ b/tests/RestartEnergyAndForces/test.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +import sys +import os +import subprocess +import string + +print("Test RestartEnergyAndForces...") + +nargs=len(sys.argv) + +mpicmd = sys.argv[1]+" "+sys.argv[2]+" "+sys.argv[3] +for i in range(4,nargs-7): + mpicmd = mpicmd + " "+sys.argv[i] +print("MPI run command: {}".format(mpicmd)) + +mgmol_exe = sys.argv[nargs-6] +test_exe = sys.argv[nargs-5] +input1 = sys.argv[nargs-4] +input2 = sys.argv[nargs-3] +coords = sys.argv[nargs-2] +print("coordinates file: %s"%coords) + +#create links to potentials files +dst1 = 'pseudo.H_ONCV_PBE_SG15' +src1 = sys.argv[-1] + '/' + dst1 + +dst2 = 'pseudo.O_ONCV_PBE_SG15' +src2 = sys.argv[-1] + '/' + dst2 + +if not os.path.exists(dst1): + print("Create link to %s"%dst1) + os.symlink(src1, dst1) + +if not os.path.exists(dst2): + print("Create link to %s"%dst2) + os.symlink(src2, dst2) + +#run mgmol +command = "{} {} -c {} -i {}".format(mpicmd,mgmol_exe,input1,coords) +print("Run command: {}".format(command)) + +output = subprocess.check_output(command,shell=True) +lines=output.split(b'\n') + +#analyse output +ref_energy=1.e18 +for line in lines: + if line.count(b'%%'): + print(line) + words=line.split() + words=words[5].split(b',')[0] + energy = words.decode() + if line.count(b'achieved'): + ref_energy=energy + break + +#sys.exit(0) + +#run test +command = "{} {} -c {} -i {}".format(mpicmd,test_exe,input2,coords) +print("Run command: {}".format(command)) +output = subprocess.check_output(command,shell=True) +lines=output.split(b'\n') + +test_energy=1.e18 +for line in lines: + if line.count(b'%%'): + print(line) + words=line.split() + words=words[5].split(b',')[0] + energy = words.decode() + if line.count(b'Eks'): + print(line) + words=line.split() + print(words) + test_energy = words[2] + break + + +tol = 1.e-6 +diff=eval(test_energy)-eval(ref_energy) +print(diff) +if abs(diff)>tol: + print("Energies differ: {} vs {} !!!".format(ref_energy,test_energy)) + sys.exit(1) + +print("Test SUCCESSFUL!") +sys.exit(0) diff --git a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc new file mode 100644 index 00000000..e323afbc --- /dev/null +++ b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc @@ -0,0 +1,206 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "Control.h" +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "MGmol.h" +#include "MGmol_MPI.h" +#include "MPIdata.h" +#include "mgmol_run.h" + +#include +#include +#include +#include + +#include +namespace po = boost::program_options; + +int main(int argc, char** argv) +{ + int mpirc = MPI_Init(&argc, &argv); + if (mpirc != MPI_SUCCESS) + { + std::cerr << "MPI Initialization failed!!!" << std::endl; + MPI_Abort(MPI_COMM_WORLD, 0); + } + + MPI_Comm comm = MPI_COMM_WORLD; + + /* + * Initialize general things, like magma, openmp, IO, ... + */ + mgmol_init(comm); + + /* + * read runtime parameters + */ + std::string input_filename(""); + std::string lrs_filename; + std::string constraints_filename(""); + + float total_spin = 0.; + bool with_spin = false; + + po::variables_map vm; + + // read from PE0 only + if (MPIdata::onpe0) + { + read_config(argc, argv, vm, input_filename, lrs_filename, + constraints_filename, total_spin, with_spin); + } + + MGmol_MPI::setup(comm, std::cout, with_spin); + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + MPI_Comm global_comm = mmpi.commGlobal(); + + /* + * Setup control struct with run time parameters + */ + Control::setup(global_comm, with_spin, total_spin); + Control& ct = *(Control::instance()); + + ct.setOptions(vm); + + int ret = ct.checkOptions(); + if (ret < 0) return ret; + + mmpi.bcastGlobal(input_filename); + mmpi.bcastGlobal(lrs_filename); + + // Enter main scope + { + if (MPIdata::onpe0) + { + std::cout << "-------------------------" << std::endl; + std::cout << "Construct MGmol object..." << std::endl; + std::cout << "-------------------------" << std::endl; + } + + MGmolInterface* mgmol = new MGmol(global_comm, + *MPIdata::sout, input_filename, lrs_filename, constraints_filename); + + if (MPIdata::onpe0) + { + std::cout << "-------------------------" << std::endl; + std::cout << "MGmol setup..." << std::endl; + std::cout << "-------------------------" << std::endl; + } + mgmol->setup(); + + if (MPIdata::onpe0) + { + std::cout << "-------------------------" << std::endl; + std::cout << "Setup done..." << std::endl; + std::cout << "-------------------------" << std::endl; + } + + // here we just use the atomic positions read in and used + // to initialize MGmol + std::vector positions; + mgmol->getAtomicPositions(positions); + std::vector anumbers; + mgmol->getAtomicNumbers(anumbers); + if (MPIdata::onpe0) + { + std::cout << "Positions:" << std::endl; + std::vector::iterator ita = anumbers.begin(); + for (std::vector::iterator it = positions.begin(); + it != positions.end(); it += 3) + { + std::cout << *ita; + for (int i = 0; i < 3; i++) + std::cout << " " << *(it + i); + std::cout << std::endl; + ita++; + } + } + + Mesh* mymesh = Mesh::instance(); + const pb::Grid& mygrid = mymesh->grid(); + const pb::PEenv& myPEenv = mymesh->peenv(); + + HDFrestart h5file(ct.restart_file, myPEenv, ct.restart_file_type); + std::string name = "Function"; + int count = h5file.countFunctionObjects(name); + if (count != ct.numst) + { + std::cerr << "The number of functions in the restart file, " + << count << " is not equal to ct.numst, " << ct.numst + << std::endl; + MPI_Abort(mmpi.commSameSpin(), 0); + } + + std::shared_ptr projmatrices + = mgmol->getProjectedMatrices(); + + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), + ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, + nullptr); + + // read numst_ wavefunction + int nread = orbitals.read_func_hdf5(h5file, name); + if (nread != ct.numst) + { + std::cerr << "The number of functions read from the restart file, " + << nread << " is not equal to ct.numst, " << ct.numst + << std::endl; + MPI_Abort(mmpi.commSameSpin(), 0); + } + + // set the iterative index to 1 to differentiate it from first instance + // in MGmol initial() function. This is not very clean and could be + // better designed, but works for now + orbitals.setIterativeIndex(1); + + // set initial DM with uniform occupations + projmatrices->setDMuniform(ct.getNelSpin(), 0); + projmatrices->printDM(std::cout); + + // + // evaluate energy and forces with wavefunctions just read + // + std::vector forces; + double eks = mgmol->evaluateDMandEnergyAndForces( + &orbitals, positions, anumbers, forces); + + // print out results + if (MPIdata::onpe0) + { + std::cout << "Eks : " << eks << std::endl; + std::cout << "Forces :" << std::endl; + for (std::vector::iterator it = forces.begin(); + it != forces.end(); it += 3) + { + for (int i = 0; i < 3; i++) + std::cout << " " << *(it + i); + std::cout << std::endl; + } + } + + delete mgmol; + + } // close main scope + + mgmol_finalize(); + + mpirc = MPI_Finalize(); + if (mpirc != MPI_SUCCESS) + { + std::cerr << "MPI Finalize failed!!!" << std::endl; + } + + time_t tt; + time(&tt); + if (onpe0) std::cout << " Run ended at " << ctime(&tt) << std::endl; + + return 0; +} From eb539633b79a4db3386559408188469fb42163ca Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 16 Jan 2025 12:45:26 -0500 Subject: [PATCH 12/99] Clean up MD_IonicStepper restart data write (#297) --- src/Ion.h | 2 +- src/IonicStepper.cc | 83 ++++++++++++++------------- src/Ions.cc | 2 +- src/MD_IonicStepper.cc | 125 +++++------------------------------------ src/MD_IonicStepper.h | 13 +---- 5 files changed, 63 insertions(+), 162 deletions(-) diff --git a/src/Ion.h b/src/Ion.h index 336dee72..ae870820 100644 --- a/src/Ion.h +++ b/src/Ion.h @@ -176,7 +176,7 @@ class Ion } double position(const short i) const { return position_[i]; } - double old_position(const short i) const { return old_position_[i]; } + double getPreviousPosition(const short i) const { return old_position_[i]; } void setPosition(const double x, const double y, const double z) { old_position_[0] = position_[0]; diff --git a/src/IonicStepper.cc b/src/IonicStepper.cc index ca8d53cc..aa3a7f48 100644 --- a/src/IonicStepper.cc +++ b/src/IonicStepper.cc @@ -13,8 +13,6 @@ #include "MGmol_blas1.h" #include -using namespace std; - IonicStepper::IonicStepper(const double dt, const std::vector& atmove, std::vector& tau0, std::vector& taup) : atmove_(atmove), tau0_(tau0), taup_(taup) @@ -51,7 +49,8 @@ IonicStepper::IonicStepper(const double dt, const std::vector& atmove, } int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, - const vector& data, const string& name, const bool create) const + const std::vector& data, const std::string& name, + const bool create) const { hid_t file_id = h5f_file.file_id(); if (file_id < 0) return 0; @@ -62,7 +61,6 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, if (create) { - // Create the data space for new datasets hsize_t dims[2] = { (hsize_t)data.size() / 3, 3 }; dataspace_id = H5Screate_simple(2, dims, nullptr); @@ -70,7 +68,7 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, { (*MPIdata::serr) << "IonicStepper::writeAtomicFields, H5Screate_simple failed!!!" - << endl; + << std::endl; return -1; } @@ -81,7 +79,7 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, { (*MPIdata::serr) << "IonicStepper::writeAtomicFields, H5Dcreate2 " "failed for dataset " - << name << "!!!" << endl; + << name << "!!!" << std::endl; return -1; } } @@ -93,7 +91,7 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, { (*MPIdata::serr) << "IonicStepper::writeAtomicFields, H5Dopen2 " "failed for dataset " - << name << "!!!" << endl; + << name << "!!!" << std::endl; return -1; } } @@ -103,7 +101,7 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, if (status < 0) { (*MPIdata::serr) << "IonicStepper::writeAtomicFields: H5Dwrite " << name - << " failed!!!" << endl; + << " failed!!!" << std::endl; return -1; } else @@ -111,14 +109,15 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, if (onpe0) (*MPIdata::sout) << "IonicStepper::writeAtomicFields, Data written into file " - << h5f_file.filename() << endl; + << h5f_file.filename() << std::endl; } status = H5Dclose(dataset_id); if (status < 0) { (*MPIdata::serr) - << "IonicStepper::writeAtomicFields, H5Dclose failed!!!" << endl; + << "IonicStepper::writeAtomicFields, H5Dclose failed!!!" + << std::endl; return -1; } @@ -129,7 +128,7 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, { (*MPIdata::serr) << "IonicStepper::writeAtomicFields, H5Sclose failed!!!" - << endl; + << std::endl; return -1; } } @@ -137,7 +136,8 @@ int IonicStepper::writeAtomicFields(HDFrestart& h5f_file, return 0; } -int IonicStepper::writePositions(HDFrestart& h5f_file, const string& name) const +int IonicStepper::writePositions( + HDFrestart& h5f_file, const std::string& name) const { return writeAtomicFields(h5f_file, tau0_, name, false); } @@ -155,7 +155,8 @@ int IonicStepper::writeVelocities(HDFrestart& h5f_file) const hid_t dataspace_id = H5Screate_simple(2, dims, nullptr); if (dataspace_id < 0) { - (*MPIdata::serr) << "IonicStepper: H5Screate_simple failed!!!" << endl; + (*MPIdata::serr) << "IonicStepper: H5Screate_simple failed!!!" + << std::endl; return -1; } @@ -170,7 +171,7 @@ int IonicStepper::writeVelocities(HDFrestart& h5f_file) const { std::cerr << "IonicStepper::writeVelocities, H5Dopen2 " "failed for dataset " - << name << "!!!" << endl; + << name << "!!!" << std::endl; return -1; } } @@ -182,12 +183,12 @@ int IonicStepper::writeVelocities(HDFrestart& h5f_file) const { (*MPIdata::serr) << "IonicStepper:: H5Dcreate2 /Ionic_velocities failed!!!" - << endl; + << std::endl; return -1; } } - vector data(taup_); + std::vector data(taup_); double minus = -1.; int n = (int)tau0_.size(), ione = 1; DAXPY(&n, &minus, &tau0_[0], &ione, &data[0], &ione); @@ -202,20 +203,20 @@ int IonicStepper::writeVelocities(HDFrestart& h5f_file) const if (status < 0) { (*MPIdata::serr) << "IonicStepper::H5Dwrite velocities failed!!!" - << endl; + << std::endl; return -1; } else { if (onpe0) (*MPIdata::sout) << "Ionic velocities written into " - << h5f_file.filename() << endl; + << h5f_file.filename() << std::endl; } status = H5Dclose(dataset_id); if (status < 0) { - (*MPIdata::serr) << "H5Dclose failed!!!" << endl; + (*MPIdata::serr) << "H5Dclose failed!!!" << std::endl; return -1; } H5Sclose(dataspace_id); @@ -224,7 +225,7 @@ int IonicStepper::writeVelocities(HDFrestart& h5f_file) const } int IonicStepper::readAtomicFields( - HDFrestart& h5f_file, vector& data, const string& name) + HDFrestart& h5f_file, std::vector& data, const std::string& name) { hid_t file_id = h5f_file.file_id(); @@ -239,7 +240,7 @@ int IonicStepper::readAtomicFields( if (dataset_id < 0) { (*MPIdata::serr) << "IonicStepper, H5Dopen2 failed for " << name - << " !!!" << endl; + << " !!!" << std::endl; return -1; } @@ -249,14 +250,15 @@ int IonicStepper::readAtomicFields( H5S_ALL, H5P_DEFAULT, &data[0]); if (status < 0) { - (*MPIdata::serr) << "IonicStepper, H5Dread failed!!!" << endl; + (*MPIdata::serr) + << "IonicStepper, H5Dread failed!!!" << std::endl; return -1; } // close dataset status = H5Dclose(dataset_id); if (status < 0) { - (*MPIdata::serr) << "H5Dclose failed!!!" << endl; + (*MPIdata::serr) << "H5Dclose failed!!!" << std::endl; return -1; } } @@ -265,13 +267,14 @@ int IonicStepper::readAtomicFields( return 0; } -int IonicStepper::readPositions_hdf5(HDFrestart& h5f_file, const string& name) +int IonicStepper::readPositions_hdf5( + HDFrestart& h5f_file, const std::string& name) { return readAtomicFields(h5f_file, tau0_, name); } int IonicStepper::writeRandomStates(HDFrestart& h5f_file, - vector& data, const string& name) const + std::vector& data, const std::string& name) const { hid_t file_id = h5f_file.file_id(); bool create = false; @@ -282,7 +285,7 @@ int IonicStepper::writeRandomStates(HDFrestart& h5f_file, if (dataset_id < 0) { (*MPIdata::serr) << "IonicStepper:: H5Dopen2 " << name - << " failed!!! Creating new data file " << endl; + << " failed!!! Creating new data file " << std::endl; // Create the data space for the dataset hsize_t dims[2] = { (hsize_t)data.size() / 3, 3 }; @@ -291,7 +294,7 @@ int IonicStepper::writeRandomStates(HDFrestart& h5f_file, { (*MPIdata::serr) << "Ions::writeRandomStates: H5Screate_simple failed!!!" - << endl; + << std::endl; return -1; } // Create the dataset @@ -300,7 +303,7 @@ int IonicStepper::writeRandomStates(HDFrestart& h5f_file, if (dataset_id < 0) { (*MPIdata::serr) - << "Ions::writeRandomStates: H5Dcreate2 failed!!!" << endl; + << "Ions::writeRandomStates: H5Dcreate2 failed!!!" << std::endl; return -1; } create = true; @@ -312,7 +315,7 @@ int IonicStepper::writeRandomStates(HDFrestart& h5f_file, if (status < 0) { (*MPIdata::serr) << "IonicStepper::writeRandomStates: H5Dwrite " << name - << " failed!!!" << endl; + << " failed!!!" << std::endl; return -1; } else @@ -320,14 +323,15 @@ int IonicStepper::writeRandomStates(HDFrestart& h5f_file, if (onpe0) (*MPIdata::sout) << "IonicStepper::writeRandomStates, Data written into file " - << h5f_file.filename() << endl; + << h5f_file.filename() << std::endl; } status = H5Dclose(dataset_id); if (status < 0) { (*MPIdata::serr) - << "IonicStepper::writeRandomStates, H5Dclose failed!!!" << endl; + << "IonicStepper::writeRandomStates, H5Dclose failed!!!" + << std::endl; return -1; } @@ -338,7 +342,7 @@ int IonicStepper::writeRandomStates(HDFrestart& h5f_file, { (*MPIdata::serr) << "IonicStepper::writeAtomicFields, H5Sclose failed!!!" - << endl; + << std::endl; return -1; } } @@ -346,8 +350,8 @@ int IonicStepper::writeRandomStates(HDFrestart& h5f_file, return 0; } -int IonicStepper::readRandomStates( - HDFrestart& h5f_file, vector& data, const string& name) +int IonicStepper::readRandomStates(HDFrestart& h5f_file, + std::vector& data, const std::string& name) { hid_t file_id = h5f_file.file_id(); @@ -360,10 +364,10 @@ int IonicStepper::readRandomStates( if (onpe0) { (*MPIdata::sout) - << "H5Dopen failed for /Ionic_RandomStates" << endl; + << "H5Dopen failed for /Ionic_RandomStates" << std::endl; (*MPIdata::sout) << "Set random states to default computed in Ion.cc" - << endl; + << std::endl; } } else @@ -373,19 +377,20 @@ int IonicStepper::readRandomStates( if (onpe0) (*MPIdata::sout) << "Read Ionic random states from " - << h5f_file.filename() << endl; + << h5f_file.filename() << std::endl; herr_t status = H5Dread(dataset_id, H5T_NATIVE_USHORT, H5S_ALL, H5S_ALL, H5P_DEFAULT, &data[0]); if (status < 0) { - (*MPIdata::serr) << "IonicStepper: H5Dread failed!!!" << endl; + (*MPIdata::serr) + << "IonicStepper: H5Dread failed!!!" << std::endl; return -1; } // close dataset status = H5Dclose(dataset_id); if (status < 0) { - (*MPIdata::serr) << "H5Dclose failed!!!" << endl; + (*MPIdata::serr) << "H5Dclose failed!!!" << std::endl; return -1; } } diff --git a/src/Ions.cc b/src/Ions.cc index 852151c7..8b956706 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -3209,7 +3209,7 @@ void Ions::initStepperData() for (short i = 0; i < 3; i++) { - taum_.push_back((*lion)->old_position(i)); + taum_.push_back((*lion)->getPreviousPosition(i)); tau0_.push_back((*lion)->position(i)); fion_.push_back((*lion)->force(i)); velocity_.push_back((*lion)->velocity(i)); diff --git a/src/MD_IonicStepper.cc b/src/MD_IonicStepper.cc index 7e797602..a1b4acb1 100644 --- a/src/MD_IonicStepper.cc +++ b/src/MD_IonicStepper.cc @@ -114,7 +114,8 @@ int MD_IonicStepper::init(HDFrestart& /*h5f_file*/) (*MPIdata::sout) << "MD_IonicStepper::init() --- use positions " "from restart file with dt=" << dt_ << endl; - // taum_: velocities -> displacements = -dt*vel + // taum_ was initialized with velocities + // set taum_ to displacements = -dt*vel double alpha = -1. * dt_; DSCAL(&size_tau, &alpha, &taum_[0], &ione); @@ -123,117 +124,13 @@ int MD_IonicStepper::init(HDFrestart& /*h5f_file*/) taup_ = tau0_; DAXPY(&size_tau, &minus_one, &taum_[0], &ione, &taup_[0], &ione); - // taum_ -> previous positions: tau0_ - dt*vel + // Now set taum_ to previous positions: tau0_ - dt*vel DAXPY(&size_tau, &one, &tau0_[0], &ione, &taum_[0], &ione); } return 0; } -int MD_IonicStepper::writeForces(HDFrestart& h5f_file) -{ - hid_t file_id = h5f_file.file_id(); - - // Create the data space for new datasets - hsize_t dims[2] = { (hsize_t)tau0_.size() / 3, 3 }; - - hid_t dataspace_id = H5Screate_simple(2, dims, nullptr); - if (dataspace_id < 0) - { - (*MPIdata::serr) << "MD_IonicStepper: H5Screate_simple failed!!!" - << endl; - return -1; - } - - // Open dataset - hid_t dataset_id = H5Dcreate2(file_id, "/Ionic_forces", H5T_NATIVE_DOUBLE, - dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (dataset_id < 0) - { - (*MPIdata::serr) - << "MD_IonicStepper::H5Dcreate2 /Ionic_forces failed!!!" << endl; - return -1; - } - - // Write forces - herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &fion_[0]); - if (status < 0) - { - (*MPIdata::serr) << "MD_IonicStepper::H5Dwrite forces failed!!!" - << endl; - return -1; - } - else - { - if (onpe0) - (*MPIdata::sout) - << "Ionic forces written into " << h5f_file.filename() << endl; - } - - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::serr) << "H5Dclose failed!!!" << endl; - return -1; - } - H5Sclose(dataspace_id); - - return 0; -} - -int MD_IonicStepper::writeTaum(HDFrestart& h5f_file) -{ - hid_t file_id = h5f_file.file_id(); - - // Create the data space for new datasets - hsize_t dims[2] = { (hsize_t)taum_.size() / 3, 3 }; - - hid_t dataspace_id = H5Screate_simple(2, dims, nullptr); - if (dataspace_id < 0) - { - (*MPIdata::serr) << "MD_IonicStepper: H5Screate_simple failed!!!" - << endl; - return -1; - } - - // Open dataset - hid_t dataset_id = H5Dcreate2(file_id, "/Ionic_velocities", - H5T_NATIVE_DOUBLE, dataspace_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (dataset_id < 0) - { - (*MPIdata::serr) - << "MD_IonicStepper::H5Dcreate2 /Ionic_velocities failed!!!" - << endl; - return -1; - } - - // Write forces - herr_t status = H5Dwrite(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &taum_[0]); - if (status < 0) - { - (*MPIdata::serr) << "MD_IonicStepper::H5Dwrite taum failed!!!" << endl; - return -1; - } - else - { - if (onpe0) - (*MPIdata::sout) << "Ionic velocities written into " - << h5f_file.filename() << endl; - } - - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::serr) << "H5Dclose failed!!!" << endl; - return -1; - } - H5Sclose(dataspace_id); - - return 0; -} - int MD_IonicStepper::write_hdf5(HDFrestart& h5f_file) { hid_t file_id = h5f_file.file_id(); @@ -251,11 +148,18 @@ int MD_IonicStepper::write_hdf5(HDFrestart& h5f_file) if (status < 0) return status; if (dt_ > 0.) + { writeVelocities(h5f_file); + std::string datasetname("/Ionic_previous_positions"); + writeAtomicFields(h5f_file, taum_, datasetname, true); + } else - writeTaum(h5f_file); - - writeForces(h5f_file); + { + std::string datasetname("/Ionic_velocities"); + writeAtomicFields(h5f_file, taum_, datasetname, true); + } + std::string datasetname("/Ionic_forces"); + writeAtomicFields(h5f_file, fion_, datasetname, true); } // @@ -469,7 +373,8 @@ void MD_IonicStepper::updateTau() tau0_ = taup_; if (dt_ > 0.) { - // update taup to be able to compute velocity... + // update taup_ to be able to compute velocity: + // taup_ <- tau0_-taum_ int size_tau = (int)tau0_.size(); int ione = 1; double alpha = 1.; diff --git a/src/MD_IonicStepper.h b/src/MD_IonicStepper.h index c62b57a5..24705a1a 100644 --- a/src/MD_IonicStepper.h +++ b/src/MD_IonicStepper.h @@ -7,15 +7,8 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -//////////////////////////////////////////////////////////////////////////////// -// -// MD_IonicStepper.h: -// -//////////////////////////////////////////////////////////////////////////////// -// $Id$ - -#ifndef MD_IONICSTEPPER_H -#define MD_IONICSTEPPER_H +#ifndef MGMOL_MD_IONICSTEPPER_H +#define MGMOL_MD_IONICSTEPPER_H #include "IonicStepper.h" #include @@ -72,8 +65,6 @@ class MD_IonicStepper : public IonicStepper void updateTau(); double etol(void) const override; int write_hdf5(HDFrestart&) override; - int writeForces(HDFrestart& h5f_file); - int writeTaum(HDFrestart& h5f_file); int init(HDFrestart&) override; void printVelocities(std::ostream& os) const; From 857673f8219c16724e39119685f5bfd9df86e9c4 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 16 Jan 2025 21:53:54 -0500 Subject: [PATCH 13/99] Clean up code based on compiler warnings (#299) --- scripts/build_ubuntu22_openmpi.sh | 2 +- src/Control.cc | 17 ----------------- src/DistributedIonicData.cc | 13 ++++++------- src/HDFrestart.cc | 2 +- src/HamiltonianMVP_DMStrategy.cc | 2 +- src/IonData.cc | 17 ++++++++--------- src/Ions.cc | 6 ------ src/MGmol.cc | 2 ++ src/MVP_DMStrategy.h | 2 +- src/Potentials.cc | 3 +-- src/Potentials.h | 2 +- src/hdf_tools.cc | 30 ++++++++++++++---------------- src/mgmol_run.cc | 1 - tests/testSetGhostValues.cc | 4 ---- 14 files changed, 36 insertions(+), 67 deletions(-) diff --git a/scripts/build_ubuntu22_openmpi.sh b/scripts/build_ubuntu22_openmpi.sh index 23f510b6..474d61e5 100755 --- a/scripts/build_ubuntu22_openmpi.sh +++ b/scripts/build_ubuntu22_openmpi.sh @@ -25,7 +25,7 @@ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DMPIEXEC_PREFLAGS="--oversubscribe" \ -DMGMOL_WITH_CLANG_FORMAT=ON \ -DCMAKE_PREFIX_PATH=${HOME}/bin \ - -D CMAKE_CXX_FLAGS="-Wall -pedantic -Wextra" \ + -D CMAKE_CXX_FLAGS="-Wall -pedantic -Wextra -Wno-cast-function-type" \ .. # call make install diff --git a/src/Control.cc b/src/Control.cc index 2272d3cb..2c501aa9 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -31,23 +31,6 @@ float Control::total_spin_ = 0.; std::string Control::run_directory_("."); bool Control::with_spin_ = false; -static void finishRead(std::ifstream& tfile) -{ - // while( tfile.get()!='\n'); - // string str; - // getline(tfile,str); - char str[256]; - tfile.getline(str, 256); - - char cc = (char)tfile.peek(); - while (cc == ('#') || (cc == '\n') || cc == ' ') - { - while (tfile.get() != '\n') - ; - cc = (char)tfile.peek(); // look at next character - } -} - Control::Control() { assert(comm_global_ != MPI_COMM_NULL); diff --git a/src/DistributedIonicData.cc b/src/DistributedIonicData.cc index 6652c41a..05399cac 100644 --- a/src/DistributedIonicData.cc +++ b/src/DistributedIonicData.cc @@ -12,7 +12,6 @@ #include "tools.h" #include -using namespace std; DistributedIonicData::DistributedIonicData( const std::vector& local_names, @@ -31,16 +30,16 @@ DistributedIonicData::DistributedIonicData( int DistributedIonicData::pack(char* cbuff, double* dbuff) { - vector::iterator itf = data_.begin(); - double* dptr = dbuff; + std::vector::iterator itf = data_.begin(); + double* dptr = dbuff; int idx = 0; - for (vector::iterator it = ion_names_.begin(); + for (std::vector::iterator it = ion_names_.begin(); it != ion_names_.end(); ++it) { - string s(*it); + std::string s(*it); FixedLengthString t; - strncpy(t.mystring, s.c_str(), IonData_MaxStrLength); + strncpy(t.mystring, s.c_str(), IonData_MaxStrLength - 1); memcpy(&cbuff[idx], t.mystring, IonData_MaxStrLength); idx += IonData_MaxStrLength; @@ -58,7 +57,7 @@ void DistributedIonicData::unpack(char*& cptr, double*& dptr, const short ndata) for (short i = 0; i < ndata; i++) { // get name - string name(cptr, IonData_MaxStrLength); + std::string name(cptr, IonData_MaxStrLength); stripLeadingAndTrailingBlanks(name); ion_names_.push_back(name); cptr += IonData_MaxStrLength; diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index ef658955..ac856ff2 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -428,7 +428,7 @@ void HDFrestart::addReleaseNumber2File(const char* release) // if( onpe0 ) { HDF_FixedLengthString t; - strncpy(t.mystring, release, MyHDFStrLength); + strncpy(t.mystring, release, MyHDFStrLength - 1); herr_t status = H5Awrite(attribute_id, strtype, &t); if (status < 0) { diff --git a/src/HamiltonianMVP_DMStrategy.cc b/src/HamiltonianMVP_DMStrategy.cc index 2b5f8913..be62b894 100644 --- a/src/HamiltonianMVP_DMStrategy.cc +++ b/src/HamiltonianMVP_DMStrategy.cc @@ -57,7 +57,7 @@ HamiltonianMVP_DMStrategy void HamiltonianMVP_DMStrategy::initialize(OrbitalsType& orbitals) + OrbitalsType>::initialize(OrbitalsType&) { } diff --git a/src/IonData.cc b/src/IonData.cc index 8f62ddd8..89c1bd0c 100644 --- a/src/IonData.cc +++ b/src/IonData.cc @@ -10,12 +10,11 @@ #include "IonData.h" #include "tools.h" #include -using namespace std; void IonData::unpack(char*& cptr, int*& iptr, double*& dptr) { // get name - string name(cptr, IonData_MaxStrLength); + std::string name(cptr, IonData_MaxStrLength); stripLeadingAndTrailingBlanks(name); ion_name = name; cptr += IonData_MaxStrLength; @@ -52,16 +51,16 @@ void IonData::unpack(char*& cptr, int*& iptr, double*& dptr) // pack Ions data for communication void IonData::packIonData( - char* cbuff, int* ibuff, double* dbuff, vector& data) + char* cbuff, int* ibuff, double* dbuff, std::vector& data) { // pack ion_names buffer int idx = 0; - for (vector::iterator idata = data.begin(); idata != data.end(); - ++idata) + for (std::vector::iterator idata = data.begin(); + idata != data.end(); ++idata) { - string s = (*idata).ion_name; + std::string s = (*idata).ion_name; FixedLengthString t; - strncpy(t.mystring, s.c_str(), IonData_MaxStrLength); + strncpy(t.mystring, s.c_str(), IonData_MaxStrLength - 1); memcpy(&cbuff[idx], t.mystring, IonData_MaxStrLength); idx += IonData_MaxStrLength; } @@ -69,8 +68,8 @@ void IonData::packIonData( // pack integer datatypes int* iptr = &ibuff[0]; // first pack local data size - *(iptr++) = data.size(); - vector::iterator idata = data.begin(); + *(iptr++) = data.size(); + std::vector::iterator idata = data.begin(); while (idata != data.end()) { // pack atomic_num diff --git a/src/Ions.cc b/src/Ions.cc index 8b956706..11c42f64 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -1608,7 +1608,6 @@ int Ions::readAtomsFromXYZ( const std::string& filename, const bool cell_relative) { MGmol_MPI& mmpi(*(MGmol_MPI::instance())); - Control& ct(*(Control::instance())); // set up list boundaries // get radius of projectors @@ -1698,9 +1697,6 @@ int Ions::readAtomsFromXYZ( int Ions::setAtoms( const std::vector& crds, const std::vector& spec) { - MGmol_MPI& mmpi(*(MGmol_MPI::instance())); - Control& ct(*(Control::instance())); - const int natoms = crds.size() / 3; double velocity[3] = { 0., 0., 0. }; @@ -1748,8 +1744,6 @@ int Ions::setAtoms( addIonToList(species_[isp], aname, &crds[3 * ia], velocity, locked); } - // std::cout<& data, std::vector& tc) +{ + tc.clear(); + for (auto& d : data) + { + FixedLengthString t; + strncpy(t.mystring, d.c_str(), IonData_MaxStrLength - 1); + tc.push_back(t); + } +} + void write1d(hid_t file_id, const std::string& datasetname, std::vector& data, size_t length) { @@ -235,14 +247,7 @@ void write2d(hid_t file_id, const std::string& datasetname, // First copy the contents of the vector into a temporary container std::vector tc; - for (std::vector::const_iterator i = data.begin(), - end = data.end(); - i != end; ++i) - { - FixedLengthString t; - strncpy(t.mystring, i->c_str(), IonData_MaxStrLength); - tc.push_back(t); - } + string2fixedlength(data, tc); std::string attname("String_Length"); hsize_t dimsA[1] = { 1 }; @@ -583,14 +588,7 @@ void parallelWrite2d(hid_t file_id, const std::string& datasetname, // First copy the contents of the vector into a temporary container std::vector tc; - for (std::vector::const_iterator i = data.begin(), - end = data.end(); - i != end; ++i) - { - FixedLengthString t; - strncpy(t.mystring, i->c_str(), IonData_MaxStrLength); - tc.push_back(t); - } + string2fixedlength(data, tc); status = H5Dwrite(dset_id, strtype, memspace, filespace, plist_id, &tc[0]); if (status < 0) { diff --git a/src/mgmol_run.cc b/src/mgmol_run.cc index e746611c..a5de1ad5 100644 --- a/src/mgmol_run.cc +++ b/src/mgmol_run.cc @@ -91,7 +91,6 @@ int mgmol_check() Control& ct = *(Control::instance()); Mesh* mymesh = Mesh::instance(); const pb::PEenv& myPEenv = mymesh->peenv(); - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); if (myPEenv.color() > 0) { diff --git a/tests/testSetGhostValues.cc b/tests/testSetGhostValues.cc index f8520055..c3680087 100644 --- a/tests/testSetGhostValues.cc +++ b/tests/testSetGhostValues.cc @@ -18,8 +18,6 @@ TEST_CASE("Set ghost values", "[set ghosts") const double ll = 1.; const double lattice[3] = { ll, ll, ll }; - const int nfunc = 10; - MGmol_MPI::setup(MPI_COMM_WORLD, std::cout); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); @@ -101,8 +99,6 @@ TEST_CASE("Set ghost values", "[set ghosts") gf.assign(inner_data.data(), 'd'); gf.set_updated_boundaries(false); - double norm_before = gf.norm2(); - // fill ghost values const bool direction[3] = { true, true, true }; gf.defaultTrade_boundaries(); From f294d3c262801ff173e198113533413b5fa0e225 Mon Sep 17 00:00:00 2001 From: "Siu Wun \"Tony\" Cheung" Date: Thu, 23 Jan 2025 05:19:12 -0800 Subject: [PATCH 14/99] Add getForces for evaluateDMandEnergyAndForces (#300) From 98a79dee2df4c5d653e088fde18e4ce691262b2b Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 24 Jan 2025 12:52:46 -0500 Subject: [PATCH 15/99] Add check for compatibility MVP/Mehrstellen (#301) --- src/Control.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/Control.cc b/src/Control.cc index 2c501aa9..d266aa2e 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -1818,6 +1818,14 @@ int Control::checkOptions() return -1; } + if (DM_solver_ > 0 && lap_type == 0) + { + std::cerr << "DM_solver_ = " << DM_solver_ << std::endl; + std::cerr << "ERROR: Mehrstellen not compatible with MVP inner solvers!" + << std::endl; + return -1; + } + if (it_algo_type_ == 3 && lap_type == 0) { std::cerr From 8eda83e52d9132892337daf92383522bfee3726c Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 3 Feb 2025 11:14:17 -0500 Subject: [PATCH 16/99] Add support for Br atom (#302) --- potentials/pseudo.Br_ONCV_PBE_SG15 | 2529 ++++++++++++++++++++++++++++ src/Ions.cc | 14 +- 2 files changed, 2536 insertions(+), 7 deletions(-) create mode 100644 potentials/pseudo.Br_ONCV_PBE_SG15 diff --git a/potentials/pseudo.Br_ONCV_PBE_SG15 b/potentials/pseudo.Br_ONCV_PBE_SG15 new file mode 100644 index 00000000..c135617b --- /dev/null +++ b/potentials/pseudo.Br_ONCV_PBE_SG15 @@ -0,0 +1,2529 @@ +# This pseudopotential file has been produced using the code +# ONCVPSP (Optimized Norm-Conservinng Vanderbilt PSeudopotential) +# scalar-relativistic version 2.1.1, 03/26/2014 by D. R. Hamann +# The code is available through a link at URL www.mat-simresearch.com. +# Documentation with the package provides a full discription of the +# input data below. +# +# +# While it is not required under the terms of the GNU GPL, it is +# suggested that you cite D. R. Hamann, Phys. Rev. B 88, 085117 (2013) +# in any publication using these pseudopotentials. +# +# +# Copyright 2015 The Regents of the University of California +# +# This work is licensed under the Creative Commons Attribution-ShareAlike +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# +# This pseudopotential is part of the Schlipf-Gygi norm-conserving +# pseudopotential library. Its construction parameters were tuned to +# reproduce materials of a training set with very high accuracy and +# should be suitable as a general purpose pseudopotential to treat a +# variety of different compounds. For details of the construction and +# testing of the pseudopotential please refer to: +# +# M. Schlipf, F. Gygi, Comp. Phys. Comm. 196, 36 (2015) +# http://dx.doi.org/10.1016/j.cpc.2015.05.011 +# +# We kindly ask that you include this reference in all publications +# associated to this pseudopotential. +# +# +# Input file for PP generation: +# +# # ATOM AND REFERENCE CONFIGURATION +# # atsym z nc nv iexc psfile +# Br 35.00 6 2 4 fpmd +# # +# # n l f energy (Ha) +# 1 0 2.00 +# 2 0 2.00 +# 2 1 6.00 +# 3 0 2.00 +# 3 1 6.00 +# 3 2 10.00 +# 4 0 2.00 +# 4 1 5.00 +# # +# # PSEUDOPOTENTIAL AND OPTIMIZATION +# # lmax +# 2 +# # +# # l, rc, ep, ncon, nbas, qcut +# 0 2.49793 -0.73993 5 8 5.10687 +# 1 2.49793 -0.28884 5 8 4.46675 +# 2 2.49793 0.09432 5 8 4.91674 +# # +# # LOCAL POTENTIAL +# # lloc, lpopt, rc(5), dvloc0 +# 4 5 2.24365 0.00000 +# # +# # VANDERBILT-KLEINMAN-BYLANDER PROJECTORs +# # l, nproj, debl +# 0 2 1.26029 +# 1 2 1.41324 +# 2 2 1.05278 +# # +# # MODEL CORE CHARGE +# # icmod, fcfact +# 0 0.00000 +# # +# # LOG DERIVATIVE ANALYSIS +# # epsh1, epsh2, depsh +# -5.00 3.00 0.02 +# # +# # OUTPUT GRID +# # rlmax, drl +# 6.00 0.01 +# # +# # TEST CONFIGURATIONS +# # ncnf +# 0 +# # nvcnf +# # n l f +# +Br_ONCV_PBE +# +color +#radii of balls and covalent bonds +-1. -1. +# Nlcc flag +0 +# Atomic number +35 +# Atomic mass +79.90399933 +# Number of valence electrons +7 +#Gaussian core charge parameter rc +1.0 +# Number of potentials +4 +# l-value for state which is local, then type of potential format +3 3 +# Local potential radius +3.2 +# Non-local potential radius +3.2 +# number of points in radial grid +602 +# VANDERBILT-KLEINMAN-BYLANDER PROJECTORs +# l, nproj +0 2 -0.3626325499E+01 0.1750463805E+01 +1 2 0.4803824052E+02 0.7506916045E+00 +2 2 0.1514320363E+02 0.8114306630E-01 +# l= 0 +0.0 -3.0708154786 -2.4047930787 +0.01 -3.070772567 -2.4042133705 +0.02 -3.0706536405 -2.4024662354 +0.03 -3.0704488283 -2.3995597251 +0.04 -3.0701482591 -2.3955018914 +0.05 -3.0697381761 -2.3903039657 +0.06 -3.0692009986 -2.3839803178 +0.07 -3.0685154018 -2.376548404 +0.08 -3.0676564136 -2.3680287033 +0.09 -3.0665955275 -2.3584446428 +0.1 -3.0653008322 -2.3478225123 +0.11 -3.063737157 -2.3361913688 +0.12 -3.0618662322 -2.3235829307 +0.13 -3.0596468635 -2.3100314626 +0.14 -3.0570351213 -2.2955736506 +0.15 -3.053984542 -2.2802484691 +0.16 -3.0504463422 -2.2640970395 +0.17 -3.0463696444 -2.2471624809 +0.18 -3.0417017127 -2.2294897538 +0.19 -3.0363881987 -2.2111254979 +0.2 -3.030373396 -2.1921178621 +0.21 -3.0236005018 -2.1725163322 +0.22 -3.0160118859 -2.1523715511 +0.23 -3.0075493645 -2.1317351366 +0.24 -2.9981544792 -2.1106594957 +0.25 -2.987768779 -2.0891976363 +0.26 -2.976334104 -2.0674029769 +0.27 -2.9637928718 -2.0453291551 +0.28 -2.9500883622 -2.0230298363 +0.29 -2.9351650013 -2.0005585216 +0.3 -2.9189686442 -1.9779683581 +0.31 -2.9014468534 -1.9553119497 +0.32 -2.8825491704 -1.9326411716 +0.33 -2.8622273897 -1.910006986 +0.34 -2.840435816 -1.8874592637 +0.35 -2.8171315174 -1.8650466102 +0.36 -2.7922745751 -1.8428161943 +0.37 -2.7658283185 -1.820813584 +0.38 -2.7377595404 -1.7990825945 +0.39 -2.7080387236 -1.7776651304 +0.4 -2.6766402368 -1.7566010457 +0.41 -2.643542518 -1.7359280108 +0.42 -2.6087282508 -1.7156813851 +0.43 -2.5721845223 -1.6958940993 +0.44 -2.5339029669 -1.6765965483 +0.45 -2.4938798925 -1.6578164926 +0.46 -2.4521163913 -1.639578971 +0.47 -2.4086184283 -1.6219062255 +0.48 -2.3633969093 -1.6048176402 +0.49 -2.3164677785 -1.588329662 +0.5 -2.2678520179 -1.5724557793 +0.51 -2.2175756537 -1.5572064995 +0.52 -2.165669807 -1.5425892978 +0.53 -2.1121706776 -1.5286086079 +0.54 -2.0571194038 -1.5152658873 +0.55 -2.0005622031 -1.5025595131 +0.56 -1.9425500828 -1.4904849389 +0.57 -1.8831389418 -1.4790346138 +0.58 -1.8223892906 -1.4681981355 +0.59 -1.760366311 -1.4579621933 +0.6 -1.6971394766 -1.4483107855 +0.61 -1.6327826828 -1.439225116 +0.62 -1.5673737973 -1.4306838573 +0.63 -1.5009946281 -1.4226631485 +0.64 -1.4337307139 -1.415136705 +0.65 -1.365670995 -1.4080760077 +0.66 -1.2969076816 -1.4014503615 +0.67 -1.227536009 -1.3952270271 +0.68 -1.1576539096 -1.3893714093 +0.69 -1.087361756 -1.3838471989 +0.7 -1.0167621481 -1.3786164792 +0.71 -0.94595957813 -1.3736399185 +0.72 -0.87506012463 -1.3688769435 +0.73 -0.80417116303 -1.3642858995 +0.74 -0.7334010578 -1.3598242236 +0.75 -0.66285885089 -1.355448623 +0.76 -0.59265395182 -1.351115238 +0.77 -0.52289580245 -1.3467798445 +0.78 -0.45369356504 -1.3423980267 +0.79 -0.38515580233 -1.337925358 +0.8 -0.31739015855 -1.3333175826 +0.81 -0.25050304259 -1.3285307948 +0.82 -0.18459931451 -1.3235216181 +0.83 -0.11978197659 -1.3182473801 +0.84 -0.056151869857 -1.3126662853 +0.85 0.0061926226578 -1.3067375838 +0.86 0.067155864931 -1.3004217357 +0.87 0.12664524827 -1.2936805697 +0.88 0.18457146187 -1.2864774371 +0.89 0.2408487527 -1.2787773586 +0.9 0.29539517376 -1.2705471649 +0.91 0.34813281968 -1.2617556292 +0.92 0.39898804895 -1.2523735929 +0.93 0.44789169169 -1.2423740812 +0.94 0.49477924217 -1.2317324118 +0.95 0.53959097092 -1.2204262598 +0.96 0.58227215742 -1.2084357833 +0.97 0.6227733871 -1.1957437779 +0.98 0.66105043859 -1.1823356271 +0.99 0.6970644809 -1.1681994044 +1.0 0.73078216308 -1.1533259198 +1.01 0.76217568426 -1.1377087564 +1.02 0.79122266757 -1.1213442058 +1.03 0.81790653051 -1.1042314518 +1.04 0.84221661359 -1.0863726296 +1.05 0.86414774871 -1.0677726042 +1.06 0.88370040536 -1.048439037 +1.07 0.90088046461 -1.0283822657 +1.08 0.91569937292 -1.0076153718 +1.09 0.92817465654 -0.98615441727 +1.1 0.93832889415 -0.96401792656 +1.11 0.9461899162 -0.94122697008 +1.12 0.9517900713 -0.91780480034 +1.13 0.95516808541 -0.8937777179 +1.14 0.95636695802 -0.86917403945 +1.15 0.95543424502 -0.84402421656 +1.16 0.95242155537 -0.81836058881 +1.17 0.94738611915 -0.79221806224 +1.18 0.94038861073 -0.76563307802 +1.19 0.93149314032 -0.73864360126 +1.2 0.9207680708 -0.71128944543 +1.21 0.90828544129 -0.68361195316 +1.22 0.89411988748 -0.65565351703 +1.23 0.87834887992 -0.62745766744 +1.24 0.86105341064 -0.5990692311 +1.25 0.84231617223 -0.57053359643 +1.26 0.82222172706 -0.54189679743 +1.27 0.80085735674 -0.51320561099 +1.28 0.77831122726 -0.48450694828 +1.29 0.75467272985 -0.45584799729 +1.3 0.73003271242 -0.42727593895 +1.31 0.70448226061 -0.39883777816 +1.32 0.6781130858 -0.37058037334 +1.33 0.65101693162 -0.34254982352 +1.34 0.62328542089 -0.31479192232 +1.35 0.59500981379 -0.28735157546 +1.36 0.56628028474 -0.26027246962 +1.37 0.53718687691 -0.23359805009 +1.38 0.50781737223 -0.20736952858 +1.39 0.47825865123 -0.18162739142 +1.4 0.44859623165 -0.15641087651 +1.41 0.41891237101 -0.13175663087 +1.42 0.38928923477 -0.10770120711 +1.43 0.35980439328 -0.084277653647 +1.44 0.33053421696 -0.061518184098 +1.45 0.30155239027 -0.03945302065 +1.46 0.27292797429 -0.0181091649 +1.47 0.24473036233 0.002486186983 +1.48 0.2170211898 0.022311130325 +1.49 0.18986298266 0.041343142397 +1.5 0.16331164708 0.059564125665 +1.51 0.13742115 0.076957214701 +1.52 0.11224181642 0.093507941835 +1.53 0.087818451049 0.10920533268 +1.54 0.064195549675 0.12403864261 +1.55 0.041408563173 0.13800275831 +1.56 0.019495861764 0.15109077964 +1.57 -0.0015158417959 0.16330294016 +1.58 -0.021596235849 0.17463728157 +1.59 -0.040725475422 0.18509813619 +1.6 -0.058881960784 0.19468876976 +1.61 -0.076052534895 0.20341742456 +1.62 -0.092224070602 0.21129232349 +1.63 -0.10739003129 0.21832549573 +1.64 -0.12154529981 0.2245297272 +1.65 -0.13468985191 0.22992068159 +1.66 -0.14682597759 0.23451524024 +1.67 -0.15795999363 0.23833250484 +1.68 -0.16810089293 0.24139293702 +1.69 -0.17726108092 0.24371882479 +1.7 -0.18545548467 0.24533367189 +1.71 -0.19270225049 0.24626263687 +1.72 -0.19902144307 0.24653172825 +1.73 -0.20443649328 0.2461685876 +1.74 -0.20897181658 0.24520121549 +1.75 -0.21265555109 0.24365927067 +1.76 -0.21551572366 0.24157228058 +1.77 -0.21758447021 0.23897137241 +1.78 -0.21889277302 0.23588720289 +1.79 -0.21947591783 0.23235173626 +1.8 -0.21936729817 0.22839644157 +1.81 -0.21860435564 0.224053418 +1.82 -0.2172225127 0.21935470487 +1.83 -0.21526018396 0.21433189433 +1.84 -0.21275469141 0.20901710885 +1.85 -0.2097443342 0.2034411581 +1.86 -0.20626747958 0.19763549074 +1.87 -0.20236211696 0.19163011691 +1.88 -0.19806642786 0.18545493817 +1.89 -0.19341811175 0.17913927614 +1.9 -0.18845384334 0.17271059028 +1.91 -0.18321119098 0.16619768388 +1.92 -0.17772431694 0.15962506074 +1.93 -0.17202997839 0.15302021981 +1.94 -0.1661601512 0.14640550171 +1.95 -0.16014881031 0.13980526812 +1.96 -0.15402698692 0.13324102441 +1.97 -0.14782425499 0.12673280543 +1.98 -0.14157131194 0.12030174533 +1.99 -0.13529270013 0.11396333406 +2.0 -0.12901767217 0.10773726718 +2.01 -0.12276761168 0.10163636831 +2.02 -0.11656673377 0.095675877908 +2.03 -0.1104363775 0.089868634417 +2.04 -0.10439399399 0.084224202689 +2.05 -0.098461395649 0.0787557483 +2.06 -0.09265062254 0.07346847772 +2.07 -0.086979074138 0.068371912909 +2.08 -0.081458820035 0.06347135213 +2.09 -0.076100125576 0.058770653954 +2.1 -0.070916200757 0.054275981844 +2.11 -0.065911720591 0.049986904382 +2.12 -0.061096352253 0.04590680729 +2.13 -0.05647481096 0.04203534513 +2.14 -0.052050288108 0.038371018518 +2.15 -0.047828987027 0.034914562031 +2.16 -0.043809718782 0.031661293642 +2.17 -0.039995100139 0.028609286026 +2.18 -0.036385130803 0.025754761958 +2.19 -0.032977565996 0.023092357681 +2.2 -0.029772938118 0.020618609458 +2.21 -0.02676722353 0.018327015582 +2.22 -0.023957902849 0.016212045303 +2.23 -0.021342381035 0.014268136652 +2.24 -0.018915988096 0.01248852304 +2.25 -0.016675937928 0.010867451055 +2.26 -0.014617052811 0.0093979619195 +2.27 -0.012733401274 0.0080727169653 +2.28 -0.011021551686 0.0068855480404 +2.29 -0.0094727420429 0.0058278549281 +2.3 -0.0080800306285 0.004891808646 +2.31 -0.0068367888466 0.0040696905805 +2.32 -0.0057335326398 0.0033529863328 +2.33 -0.0047622051973 0.002733387986 +2.34 -0.0039142269282 0.0022027527767 +2.35 -0.003180287976 0.001753168643 +2.36 -0.0025514070016 0.0013760738931 +2.37 -0.002018701345 0.0010641189905 +2.38 -0.0015733073204 0.00081018747083 +2.39 -0.0012057416961 0.000605822272 +2.4 -0.00090807593031 0.00044534423372 +2.41 -0.00067211676854 0.00032260120986 +2.42 -0.00048836884014 0.00022964110509 +2.43 -0.00035064527094 0.00016295096584 +2.44 -0.00025146290618 0.00011730333122 +2.45 -0.00018232534191 8.6257107481e-05 +2.46 -0.00013859768358 6.798715492e-05 +2.47 -0.00011459867708 5.9010018177e-05 +2.48 -0.00010538902877 5.5962206051e-05 +2.49 -0.0001014873045 5.4745829591e-05 +2.5 -9.0886540089e-05 4.9286426798e-05 +2.51 -5.7587161462e-05 3.1411431787e-05 +2.52 -2.3406234748e-05 1.2820803441e-05 +2.53 2.0554012075e-06 -1.1129389439e-06 +2.54 6.3698356898e-06 -3.4490775717e-06 +2.55 4.1835875739e-06 -2.265288898e-06 +2.56 0.0 0.0 +2.57 0.0 0.0 +2.58 0.0 0.0 +2.59 0.0 0.0 +2.6 0.0 0.0 +2.61 0.0 0.0 +2.62 0.0 0.0 +2.63 0.0 0.0 +2.64 0.0 0.0 +2.65 0.0 0.0 +2.66 0.0 0.0 +2.67 0.0 0.0 +2.68 0.0 0.0 +2.69 0.0 0.0 +2.7 0.0 0.0 +2.71 0.0 0.0 +2.72 0.0 0.0 +2.73 0.0 0.0 +2.74 0.0 0.0 +2.75 0.0 0.0 +2.76 0.0 0.0 +2.77 0.0 0.0 +2.78 0.0 0.0 +2.79 0.0 0.0 +2.8 0.0 0.0 +2.81 0.0 0.0 +2.82 0.0 0.0 +2.83 0.0 0.0 +2.84 0.0 0.0 +2.85 0.0 0.0 +2.86 0.0 0.0 +2.87 0.0 0.0 +2.88 0.0 0.0 +2.89 0.0 0.0 +2.9 0.0 0.0 +2.91 0.0 0.0 +2.92 0.0 0.0 +2.93 0.0 0.0 +2.94 0.0 0.0 +2.95 0.0 0.0 +2.96 0.0 0.0 +2.97 0.0 0.0 +2.98 0.0 0.0 +2.99 0.0 0.0 +3.0 0.0 0.0 +3.01 0.0 0.0 +3.02 0.0 0.0 +3.03 0.0 0.0 +3.04 0.0 0.0 +3.05 0.0 0.0 +3.06 0.0 0.0 +3.07 0.0 0.0 +3.08 0.0 0.0 +3.09 0.0 0.0 +3.1 0.0 0.0 +3.11 0.0 0.0 +3.12 0.0 0.0 +3.13 0.0 0.0 +3.14 0.0 0.0 +3.15 0.0 0.0 +3.16 0.0 0.0 +3.17 0.0 0.0 +3.18 0.0 0.0 +3.19 0.0 0.0 +3.2 0.0 0.0 +3.21 0.0 0.0 +3.22 0.0 0.0 +3.23 0.0 0.0 +3.24 0.0 0.0 +3.25 0.0 0.0 +3.26 0.0 0.0 +3.27 0.0 0.0 +3.28 0.0 0.0 +3.29 0.0 0.0 +3.3 0.0 0.0 +3.31 0.0 0.0 +3.32 0.0 0.0 +3.33 0.0 0.0 +3.34 0.0 0.0 +3.35 0.0 0.0 +3.36 0.0 0.0 +3.37 0.0 0.0 +3.38 0.0 0.0 +3.39 0.0 0.0 +3.4 0.0 0.0 +3.41 0.0 0.0 +3.42 0.0 0.0 +3.43 0.0 0.0 +3.44 0.0 0.0 +3.45 0.0 0.0 +3.46 0.0 0.0 +3.47 0.0 0.0 +3.48 0.0 0.0 +3.49 0.0 0.0 +3.5 0.0 0.0 +3.51 0.0 0.0 +3.52 0.0 0.0 +3.53 0.0 0.0 +3.54 0.0 0.0 +3.55 0.0 0.0 +3.56 0.0 0.0 +3.57 0.0 0.0 +3.58 0.0 0.0 +3.59 0.0 0.0 +3.6 0.0 0.0 +3.61 0.0 0.0 +3.62 0.0 0.0 +3.63 0.0 0.0 +3.64 0.0 0.0 +3.65 0.0 0.0 +3.66 0.0 0.0 +3.67 0.0 0.0 +3.68 0.0 0.0 +3.69 0.0 0.0 +3.7 0.0 0.0 +3.71 0.0 0.0 +3.72 0.0 0.0 +3.73 0.0 0.0 +3.74 0.0 0.0 +3.75 0.0 0.0 +3.76 0.0 0.0 +3.77 0.0 0.0 +3.78 0.0 0.0 +3.79 0.0 0.0 +3.8 0.0 0.0 +3.81 0.0 0.0 +3.82 0.0 0.0 +3.83 0.0 0.0 +3.84 0.0 0.0 +3.85 0.0 0.0 +3.86 0.0 0.0 +3.87 0.0 0.0 +3.88 0.0 0.0 +3.89 0.0 0.0 +3.9 0.0 0.0 +3.91 0.0 0.0 +3.92 0.0 0.0 +3.93 0.0 0.0 +3.94 0.0 0.0 +3.95 0.0 0.0 +3.96 0.0 0.0 +3.97 0.0 0.0 +3.98 0.0 0.0 +3.99 0.0 0.0 +4.0 0.0 0.0 +4.01 0.0 0.0 +4.02 0.0 0.0 +4.03 0.0 0.0 +4.04 0.0 0.0 +4.05 0.0 0.0 +4.06 0.0 0.0 +4.07 0.0 0.0 +4.08 0.0 0.0 +4.09 0.0 0.0 +4.1 0.0 0.0 +4.11 0.0 0.0 +4.12 0.0 0.0 +4.13 0.0 0.0 +4.14 0.0 0.0 +4.15 0.0 0.0 +4.16 0.0 0.0 +4.17 0.0 0.0 +4.18 0.0 0.0 +4.19 0.0 0.0 +4.2 0.0 0.0 +4.21 0.0 0.0 +4.22 0.0 0.0 +4.23 0.0 0.0 +4.24 0.0 0.0 +4.25 0.0 0.0 +4.26 0.0 0.0 +4.27 0.0 0.0 +4.28 0.0 0.0 +4.29 0.0 0.0 +4.3 0.0 0.0 +4.31 0.0 0.0 +4.32 0.0 0.0 +4.33 0.0 0.0 +4.34 0.0 0.0 +4.35 0.0 0.0 +4.36 0.0 0.0 +4.37 0.0 0.0 +4.38 0.0 0.0 +4.39 0.0 0.0 +4.4 0.0 0.0 +4.41 0.0 0.0 +4.42 0.0 0.0 +4.43 0.0 0.0 +4.44 0.0 0.0 +4.45 0.0 0.0 +4.46 0.0 0.0 +4.47 0.0 0.0 +4.48 0.0 0.0 +4.49 0.0 0.0 +4.5 0.0 0.0 +4.51 0.0 0.0 +4.52 0.0 0.0 +4.53 0.0 0.0 +4.54 0.0 0.0 +4.55 0.0 0.0 +4.56 0.0 0.0 +4.57 0.0 0.0 +4.58 0.0 0.0 +4.59 0.0 0.0 +4.6 0.0 0.0 +4.61 0.0 0.0 +4.62 0.0 0.0 +4.63 0.0 0.0 +4.64 0.0 0.0 +4.65 0.0 0.0 +4.66 0.0 0.0 +4.67 0.0 0.0 +4.68 0.0 0.0 +4.69 0.0 0.0 +4.7 0.0 0.0 +4.71 0.0 0.0 +4.72 0.0 0.0 +4.73 0.0 0.0 +4.74 0.0 0.0 +4.75 0.0 0.0 +4.76 0.0 0.0 +4.77 0.0 0.0 +4.78 0.0 0.0 +4.79 0.0 0.0 +4.8 0.0 0.0 +4.81 0.0 0.0 +4.82 0.0 0.0 +4.83 0.0 0.0 +4.84 0.0 0.0 +4.85 0.0 0.0 +4.86 0.0 0.0 +4.87 0.0 0.0 +4.88 0.0 0.0 +4.89 0.0 0.0 +4.9 0.0 0.0 +4.91 0.0 0.0 +4.92 0.0 0.0 +4.93 0.0 0.0 +4.94 0.0 0.0 +4.95 0.0 0.0 +4.96 0.0 0.0 +4.97 0.0 0.0 +4.98 0.0 0.0 +4.99 0.0 0.0 +5.0 0.0 0.0 +5.01 0.0 0.0 +5.02 0.0 0.0 +5.03 0.0 0.0 +5.04 0.0 0.0 +5.05 0.0 0.0 +5.06 0.0 0.0 +5.07 0.0 0.0 +5.08 0.0 0.0 +5.09 0.0 0.0 +5.1 0.0 0.0 +5.11 0.0 0.0 +5.12 0.0 0.0 +5.13 0.0 0.0 +5.14 0.0 0.0 +5.15 0.0 0.0 +5.16 0.0 0.0 +5.17 0.0 0.0 +5.18 0.0 0.0 +5.19 0.0 0.0 +5.2 0.0 0.0 +5.21 0.0 0.0 +5.22 0.0 0.0 +5.23 0.0 0.0 +5.24 0.0 0.0 +5.25 0.0 0.0 +5.26 0.0 0.0 +5.27 0.0 0.0 +5.28 0.0 0.0 +5.29 0.0 0.0 +5.3 0.0 0.0 +5.31 0.0 0.0 +5.32 0.0 0.0 +5.33 0.0 0.0 +5.34 0.0 0.0 +5.35 0.0 0.0 +5.36 0.0 0.0 +5.37 0.0 0.0 +5.38 0.0 0.0 +5.39 0.0 0.0 +5.4 0.0 0.0 +5.41 0.0 0.0 +5.42 0.0 0.0 +5.43 0.0 0.0 +5.44 0.0 0.0 +5.45 0.0 0.0 +5.46 0.0 0.0 +5.47 0.0 0.0 +5.48 0.0 0.0 +5.49 0.0 0.0 +5.5 0.0 0.0 +5.51 0.0 0.0 +5.52 0.0 0.0 +5.53 0.0 0.0 +5.54 0.0 0.0 +5.55 0.0 0.0 +5.56 0.0 0.0 +5.57 0.0 0.0 +5.58 0.0 0.0 +5.59 0.0 0.0 +5.6 0.0 0.0 +5.61 0.0 0.0 +5.62 0.0 0.0 +5.63 0.0 0.0 +5.64 0.0 0.0 +5.65 0.0 0.0 +5.66 0.0 0.0 +5.67 0.0 0.0 +5.68 0.0 0.0 +5.69 0.0 0.0 +5.7 0.0 0.0 +5.71 0.0 0.0 +5.72 0.0 0.0 +5.73 0.0 0.0 +5.74 0.0 0.0 +5.75 0.0 0.0 +5.76 0.0 0.0 +5.77 0.0 0.0 +5.78 0.0 0.0 +5.79 0.0 0.0 +5.8 0.0 0.0 +5.81 0.0 0.0 +5.82 0.0 0.0 +5.83 0.0 0.0 +5.84 0.0 0.0 +5.85 0.0 0.0 +5.86 0.0 0.0 +5.87 0.0 0.0 +5.88 0.0 0.0 +5.89 0.0 0.0 +5.9 0.0 0.0 +5.91 0.0 0.0 +5.92 0.0 0.0 +5.93 0.0 0.0 +5.94 0.0 0.0 +5.95 0.0 0.0 +5.96 0.0 0.0 +5.97 0.0 0.0 +5.98 0.0 0.0 +5.99 0.0 0.0 +6.0 0.0 0.0 +6.01 0.0 0.0 +# l= 1 +0.0 0.0 0.0 +0.01 0.16768810909 -0.09197895777 +0.02 0.33490177013 -0.18377178817 +0.03 0.50116814078 -0.27519297668 +0.04 0.66601758415 -0.36605823207 +0.05 0.82898525636 -0.45618509197 +0.06 0.98961267609 -0.54539352126 +0.07 1.1474492702 -0.63350650089 +0.08 1.3020538895 -0.7203506047 +0.09 1.4529962891 -0.80575656223 +0.1 1.5998585678 -0.88955980496 +0.11 1.7422365611 -0.97160099425 +0.12 1.8797411823 -1.0517265285 +0.13 2.0119997077 -1.129789028 +0.14 2.138657 -1.2056477949 +0.15 2.2593766658 -1.2791692479 +0.16 2.3738421434 -1.3502273279 +0.17 2.4817577163 -1.4187038753 +0.18 2.5828494491 -1.4844889757 +0.19 2.6768660426 -1.5474812743 +0.2 2.7635796037 -1.6075882562 +0.21 2.8427863308 -1.6647264933 +0.22 2.9143071076 -1.7188218556 +0.23 2.9779880072 -1.7698096863 +0.24 3.0337007024 -1.8176349412 +0.25 3.0813427856 -1.862252292 +0.26 3.1208379864 -1.9036261898 +0.27 3.1521363034 -1.9417308947 +0.28 3.1752140319 -1.9765504665 +0.29 3.1900737018 -2.0080787184 +0.3 3.1967439186 -2.0363191351 +0.31 3.1952791115 -2.0612847542 +0.32 3.1857591886 -2.0829980127 +0.33 3.1682891025 -2.1014905583 +0.34 3.1429983262 -2.1168030277 +0.35 3.110040245 -2.128984793 +0.36 3.0695914617 -2.1380936747 +0.37 3.0218510224 -2.1441956261 +0.38 2.9670395749 -2.1473643925 +0.39 2.9053984349 -2.1476811361 +0.4 2.8371885957 -2.1452340407 +0.41 2.7626896752 -2.1401178945 +0.42 2.6821987951 -2.1324336474 +0.43 2.596029406 -2.1222879509 +0.44 2.5045100614 -2.1097926797 +0.45 2.4079831454 -2.0950644377 +0.46 2.3068035597 -2.0782240514 +0.47 2.2013373814 -2.0593960544 +0.48 2.0919605029 -2.0387081682 +0.49 1.979057148 -2.0162907368 +0.5 1.8630185217 -1.9922762184 +0.51 1.7442414153 -1.9667986605 +0.52 1.6231266566 -1.9399931205 +0.53 1.500077649 -1.9119951227 +0.54 1.3754991997 -1.88294023 +0.55 1.2497955863 -1.852963328 +0.56 1.123369708 -1.8221983257 +0.57 0.99662116191 -1.7907774522 +0.58 0.86994536282 -1.7588309508 +0.59 0.74373162879 -1.7264863891 +0.6 0.61836264621 -1.6938684873 +0.61 0.49421220571 -1.6610983091 +0.62 0.37164494723 -1.628293199 +0.63 0.25101461062 -1.5955661733 +0.64 0.1326628874 -1.5630255372 +0.65 0.016918843032 -1.5307747113 +0.66 -0.095902538203 -1.4989117459 +0.67 -0.20550134835 -1.4675290072 +0.68 -0.31159326886 -1.4367130384 +0.69 -0.4139103595 -1.4065443242 +0.7 -0.51220225666 -1.377096921 +0.71 -0.60623655903 -1.3484383751 +0.72 -0.6957993 -1.3206296129 +0.73 -0.78069554004 -1.2937247949 +0.74 -0.86074977919 -1.267771236 +0.75 -0.93580620366 -1.2428093839 +0.76 -1.0057293808 -1.218872661 +0.77 -1.0704039576 -1.1959876322 +0.78 -1.1297349296 -1.1741739926 +0.79 -1.1836476649 -1.1534446391 +0.8 -1.23208785 -1.1338057713 +0.81 -1.2750213562 -1.1152570226 +0.82 -1.3124340302 -1.0977916176 +0.83 -1.34433141 -1.0813965584 +0.84 -1.3707383679 -1.0660528348 +0.85 -1.3916986843 -1.0517356601 +0.86 -1.4072745528 -1.0384147296 +0.87 -1.4175460222 -1.0260545009 +0.88 -1.422610376 -1.0146144944 +0.89 -1.4225814539 -1.0040496119 +0.9 -1.4175889193 -0.99431047237 +0.91 -1.4077774754 -0.98534376271 +0.92 -1.393306035 -0.97709260167 +0.93 -1.3743468472 -0.96949691479 +0.94 -1.3510845875 -0.96249381864 +0.95 -1.3237154565 -0.95601798287 +0.96 -1.2924462012 -0.9500020467 +0.97 -1.2574929566 -0.94437710703 +0.98 -1.2190803772 -0.93907301456 +0.99 -1.1774405576 -0.9340188113 +1.0 -1.1328119871 -0.92914312957 +1.01 -1.0854385025 -0.92437458501 +1.02 -1.0355685243 -0.91964203242 +1.03 -0.98345355948 -0.9148751682 +1.04 -0.92934684605 -0.91000502329 +1.05 -0.87350300645 -0.90496400678 +1.06 -0.816176798 -0.89968636628 +1.07 -0.75762253462 -0.89410834934 +1.08 -0.6980927773 -0.88816867824 +1.09 -0.63783592919 -0.88180942862 +1.1 -0.57709733472 -0.87497547403 +1.11 -0.51611779897 -0.86761500666 +1.12 -0.45513452401 -0.85967911403 +1.13 -0.39437481773 -0.8511240992 +1.14 -0.33406039561 -0.84190970478 +1.15 -0.27440564306 -0.83199970915 +1.16 -0.2156182574 -0.82136164295 +1.17 -0.15789253305 -0.80996907161 +1.18 -0.10141514283 -0.79779940144 +1.19 -0.046364491037 -0.78483408344 +1.2 0.0070936861676 -0.77106002408 +1.21 0.058806100624 -0.75646931936 +1.22 0.10862841944 -0.74105803457 +1.23 0.15642703646 -0.72482675268 +1.24 0.20208507927 -0.70778222023 +1.25 0.24549451563 -0.68993480428 +1.26 0.28655709783 -0.67129881979 +1.27 0.32519346013 -0.65189485473 +1.28 0.3613324373 -0.63174663121 +1.29 0.39491300311 -0.61088161585 +1.3 0.42589280427 -0.58933283509 +1.31 0.45423664439 -0.56713591443 +1.32 0.47992110013 -0.54433022226 +1.33 0.50293883528 -0.52095932209 +1.34 0.52328858456 -0.49706895242 +1.35 0.54098326189 -0.47270841667 +1.36 0.55604726384 -0.44792957482 +1.37 0.56851053426 -0.42278632121 +1.38 0.5784190736 -0.3973351547 +1.39 0.58582409631 -0.37163397592 +1.4 0.59078619814 -0.34574254453 +1.41 0.59337646951 -0.3197213658 +1.42 0.59367015608 -0.29363258185 +1.43 0.59175335557 -0.26753813652 +1.44 0.58771624712 -0.24150074456 +1.45 0.58165595284 -0.21558336285 +1.46 0.57367486997 -0.18984748079 +1.47 0.56387989108 -0.16435632354 +1.48 0.55238206952 -0.13916913159 +1.49 0.53929613871 -0.11434699408 +1.5 0.52473924056 -0.089947318262 +1.51 0.50883077179 -0.066027001308 +1.52 0.49169241327 -0.042641156566 +1.53 0.47344482644 -0.019841140122 +1.54 0.45421261948 0.0023209806635 +1.55 0.43411462436 0.023800386214 +1.56 0.41327662113 0.044548868159 +1.57 0.39181369387 0.064528338728 +1.58 0.36984863239 0.083696839073 +1.59 0.34749294808 0.10202192594 +1.6 0.32486335081 0.11946916202 +1.61 0.30206634054 0.13601212183 +1.62 0.27921056742 0.15162430019 +1.63 0.25639594087 0.16628574014 +1.64 0.23372158101 0.17997799414 +1.65 0.21127927552 0.19268795322 +1.66 0.18915759431 0.20440513617 +1.67 0.16743876844 0.21512355823 +1.68 0.1462002202 0.22484043033 +1.69 0.12551333682 0.23355703571 +1.7 0.10544417695 0.24127785752 +1.71 0.08605219679 0.24801143198 +1.72 0.067392201978 0.25376898991 +1.73 0.049510885271 0.25856612142 +1.74 0.032452166492 0.26242022156 +1.75 0.016249456911 0.26535352678 +1.76 0.0009363277765 0.26738900334 +1.77 -0.01346722955 0.26855489619 +1.78 -0.026937386187 0.26887918915 +1.79 -0.039467262404 0.26839525362 +1.8 -0.051042203966 0.26713561709 +1.81 -0.061667545688 0.26513764513 +1.82 -0.071337628583 0.26243806272 +1.83 -0.08006745591 0.25907689464 +1.84 -0.087863054815 0.25509471266 +1.85 -0.094744724104 0.25053311675 +1.86 -0.10073011925 0.24543531389 +1.87 -0.10584404869 0.23984427835 +1.88 -0.11011403507 0.23380415827 +1.89 -0.11356907255 0.22735934704 +1.9 -0.11624417545 0.22055306109 +1.91 -0.11817265478 0.21343130087 +1.92 -0.11939406617 0.20603495539 +1.93 -0.11994678428 0.1984103598 +1.94 -0.1198720834 0.19059676551 +1.95 -0.11921240203 0.18263758346 +1.96 -0.11801032113 0.17457234326 +1.97 -0.1163093863 0.1664389658 +1.98 -0.11415448856 0.15827800515 +1.99 -0.11158739315 0.15012088135 +2.0 -0.10865483203 0.14200718854 +2.01 -0.10539681541 0.13396470695 +2.02 -0.10185745506 0.12602623509 +2.03 -0.098078174326 0.11822061558 +2.04 -0.094096518357 0.11057099092 +2.05 -0.089956724139 0.10310784468 +2.06 -0.085689851344 0.095846617351 +2.07 -0.081335739687 0.088811521611 +2.08 -0.07692635626 0.082018660843 +2.09 -0.072491428123 0.075481468962 +2.1 -0.068066323213 0.069218277483 +2.11 -0.063671603987 0.063233949849 +2.12 -0.059337660545 0.057541412683 +2.13 -0.055085519235 0.052145715315 +2.14 -0.050933564418 0.047049428304 +2.15 -0.046906930776 0.042260020905 +2.16 -0.043014644681 0.037772793136 +2.17 -0.039274785396 0.03358922043 +2.18 -0.035699058908 0.029706278232 +2.19 -0.032293597937 0.026116974923 +2.2 -0.029072509077 0.022818973327 +2.21 -0.026036085371 0.019801644234 +2.22 -0.023189532297 0.017056816878 +2.23 -0.020537674284 0.014575946706 +2.24 -0.018077146728 0.012346366913 +2.25 -0.015810412032 0.010357603026 +2.26 -0.01373392254 0.0085968309318 +2.27 -0.011842116593 0.0070502736984 +2.28 -0.010133845747 0.0057048888874 +2.29 -0.0085993951615 0.0045459798677 +2.3 -0.0072316775018 0.0035587606278 +2.31 -0.0060239032063 0.0027280843754 +2.32 -0.0049651302707 0.0020399454291 +2.33 -0.0040460304292 0.0014785514847 +2.34 -0.0032566099898 0.0010293505502 +2.35 -0.0025860125292 0.00067967964524 +2.36 -0.002022999236 0.00041341795836 +2.37 -0.0015572677849 0.00021897250312 +2.38 -0.0011785405003 8.5467448311e-05 +2.39 -0.00087447260941 -2.6406389151e-06 +2.4 -0.00063670211965 -5.2354589398e-05 +2.41 -0.00045599180494 -7.2310661646e-05 +2.42 -0.00031988217501 -7.6466931673e-05 +2.43 -0.00022336127653 -6.6408931605e-05 +2.44 -0.00015833856538 -4.8596801665e-05 +2.45 -0.00011438780266 -3.2809722084e-05 +2.46 -8.9293750204e-05 -1.6747173877e-05 +2.47 -7.7560959826e-05 -3.3623200012e-06 +2.48 -7.3416025735e-05 3.0092130857e-06 +2.49 -7.2127889582e-05 6.165594886e-06 +2.5 -6.5365569508e-05 6.6445226696e-06 +2.51 -4.1806181002e-05 4.9128311678e-06 +2.52 -1.7125758779e-05 2.2069704328e-06 +2.53 1.4939152703e-06 -1.4573388792e-07 +2.54 4.6480499279e-06 -4.5342490346e-07 +2.55 3.0647699647e-06 -2.98973343e-07 +2.56 0.0 0.0 +2.57 0.0 0.0 +2.58 0.0 0.0 +2.59 0.0 0.0 +2.6 0.0 0.0 +2.61 0.0 0.0 +2.62 0.0 0.0 +2.63 0.0 0.0 +2.64 0.0 0.0 +2.65 0.0 0.0 +2.66 0.0 0.0 +2.67 0.0 0.0 +2.68 0.0 0.0 +2.69 0.0 0.0 +2.7 0.0 0.0 +2.71 0.0 0.0 +2.72 0.0 0.0 +2.73 0.0 0.0 +2.74 0.0 0.0 +2.75 0.0 0.0 +2.76 0.0 0.0 +2.77 0.0 0.0 +2.78 0.0 0.0 +2.79 0.0 0.0 +2.8 0.0 0.0 +2.81 0.0 0.0 +2.82 0.0 0.0 +2.83 0.0 0.0 +2.84 0.0 0.0 +2.85 0.0 0.0 +2.86 0.0 0.0 +2.87 0.0 0.0 +2.88 0.0 0.0 +2.89 0.0 0.0 +2.9 0.0 0.0 +2.91 0.0 0.0 +2.92 0.0 0.0 +2.93 0.0 0.0 +2.94 0.0 0.0 +2.95 0.0 0.0 +2.96 0.0 0.0 +2.97 0.0 0.0 +2.98 0.0 0.0 +2.99 0.0 0.0 +3.0 0.0 0.0 +3.01 0.0 0.0 +3.02 0.0 0.0 +3.03 0.0 0.0 +3.04 0.0 0.0 +3.05 0.0 0.0 +3.06 0.0 0.0 +3.07 0.0 0.0 +3.08 0.0 0.0 +3.09 0.0 0.0 +3.1 0.0 0.0 +3.11 0.0 0.0 +3.12 0.0 0.0 +3.13 0.0 0.0 +3.14 0.0 0.0 +3.15 0.0 0.0 +3.16 0.0 0.0 +3.17 0.0 0.0 +3.18 0.0 0.0 +3.19 0.0 0.0 +3.2 0.0 0.0 +3.21 0.0 0.0 +3.22 0.0 0.0 +3.23 0.0 0.0 +3.24 0.0 0.0 +3.25 0.0 0.0 +3.26 0.0 0.0 +3.27 0.0 0.0 +3.28 0.0 0.0 +3.29 0.0 0.0 +3.3 0.0 0.0 +3.31 0.0 0.0 +3.32 0.0 0.0 +3.33 0.0 0.0 +3.34 0.0 0.0 +3.35 0.0 0.0 +3.36 0.0 0.0 +3.37 0.0 0.0 +3.38 0.0 0.0 +3.39 0.0 0.0 +3.4 0.0 0.0 +3.41 0.0 0.0 +3.42 0.0 0.0 +3.43 0.0 0.0 +3.44 0.0 0.0 +3.45 0.0 0.0 +3.46 0.0 0.0 +3.47 0.0 0.0 +3.48 0.0 0.0 +3.49 0.0 0.0 +3.5 0.0 0.0 +3.51 0.0 0.0 +3.52 0.0 0.0 +3.53 0.0 0.0 +3.54 0.0 0.0 +3.55 0.0 0.0 +3.56 0.0 0.0 +3.57 0.0 0.0 +3.58 0.0 0.0 +3.59 0.0 0.0 +3.6 0.0 0.0 +3.61 0.0 0.0 +3.62 0.0 0.0 +3.63 0.0 0.0 +3.64 0.0 0.0 +3.65 0.0 0.0 +3.66 0.0 0.0 +3.67 0.0 0.0 +3.68 0.0 0.0 +3.69 0.0 0.0 +3.7 0.0 0.0 +3.71 0.0 0.0 +3.72 0.0 0.0 +3.73 0.0 0.0 +3.74 0.0 0.0 +3.75 0.0 0.0 +3.76 0.0 0.0 +3.77 0.0 0.0 +3.78 0.0 0.0 +3.79 0.0 0.0 +3.8 0.0 0.0 +3.81 0.0 0.0 +3.82 0.0 0.0 +3.83 0.0 0.0 +3.84 0.0 0.0 +3.85 0.0 0.0 +3.86 0.0 0.0 +3.87 0.0 0.0 +3.88 0.0 0.0 +3.89 0.0 0.0 +3.9 0.0 0.0 +3.91 0.0 0.0 +3.92 0.0 0.0 +3.93 0.0 0.0 +3.94 0.0 0.0 +3.95 0.0 0.0 +3.96 0.0 0.0 +3.97 0.0 0.0 +3.98 0.0 0.0 +3.99 0.0 0.0 +4.0 0.0 0.0 +4.01 0.0 0.0 +4.02 0.0 0.0 +4.03 0.0 0.0 +4.04 0.0 0.0 +4.05 0.0 0.0 +4.06 0.0 0.0 +4.07 0.0 0.0 +4.08 0.0 0.0 +4.09 0.0 0.0 +4.1 0.0 0.0 +4.11 0.0 0.0 +4.12 0.0 0.0 +4.13 0.0 0.0 +4.14 0.0 0.0 +4.15 0.0 0.0 +4.16 0.0 0.0 +4.17 0.0 0.0 +4.18 0.0 0.0 +4.19 0.0 0.0 +4.2 0.0 0.0 +4.21 0.0 0.0 +4.22 0.0 0.0 +4.23 0.0 0.0 +4.24 0.0 0.0 +4.25 0.0 0.0 +4.26 0.0 0.0 +4.27 0.0 0.0 +4.28 0.0 0.0 +4.29 0.0 0.0 +4.3 0.0 0.0 +4.31 0.0 0.0 +4.32 0.0 0.0 +4.33 0.0 0.0 +4.34 0.0 0.0 +4.35 0.0 0.0 +4.36 0.0 0.0 +4.37 0.0 0.0 +4.38 0.0 0.0 +4.39 0.0 0.0 +4.4 0.0 0.0 +4.41 0.0 0.0 +4.42 0.0 0.0 +4.43 0.0 0.0 +4.44 0.0 0.0 +4.45 0.0 0.0 +4.46 0.0 0.0 +4.47 0.0 0.0 +4.48 0.0 0.0 +4.49 0.0 0.0 +4.5 0.0 0.0 +4.51 0.0 0.0 +4.52 0.0 0.0 +4.53 0.0 0.0 +4.54 0.0 0.0 +4.55 0.0 0.0 +4.56 0.0 0.0 +4.57 0.0 0.0 +4.58 0.0 0.0 +4.59 0.0 0.0 +4.6 0.0 0.0 +4.61 0.0 0.0 +4.62 0.0 0.0 +4.63 0.0 0.0 +4.64 0.0 0.0 +4.65 0.0 0.0 +4.66 0.0 0.0 +4.67 0.0 0.0 +4.68 0.0 0.0 +4.69 0.0 0.0 +4.7 0.0 0.0 +4.71 0.0 0.0 +4.72 0.0 0.0 +4.73 0.0 0.0 +4.74 0.0 0.0 +4.75 0.0 0.0 +4.76 0.0 0.0 +4.77 0.0 0.0 +4.78 0.0 0.0 +4.79 0.0 0.0 +4.8 0.0 0.0 +4.81 0.0 0.0 +4.82 0.0 0.0 +4.83 0.0 0.0 +4.84 0.0 0.0 +4.85 0.0 0.0 +4.86 0.0 0.0 +4.87 0.0 0.0 +4.88 0.0 0.0 +4.89 0.0 0.0 +4.9 0.0 0.0 +4.91 0.0 0.0 +4.92 0.0 0.0 +4.93 0.0 0.0 +4.94 0.0 0.0 +4.95 0.0 0.0 +4.96 0.0 0.0 +4.97 0.0 0.0 +4.98 0.0 0.0 +4.99 0.0 0.0 +5.0 0.0 0.0 +5.01 0.0 0.0 +5.02 0.0 0.0 +5.03 0.0 0.0 +5.04 0.0 0.0 +5.05 0.0 0.0 +5.06 0.0 0.0 +5.07 0.0 0.0 +5.08 0.0 0.0 +5.09 0.0 0.0 +5.1 0.0 0.0 +5.11 0.0 0.0 +5.12 0.0 0.0 +5.13 0.0 0.0 +5.14 0.0 0.0 +5.15 0.0 0.0 +5.16 0.0 0.0 +5.17 0.0 0.0 +5.18 0.0 0.0 +5.19 0.0 0.0 +5.2 0.0 0.0 +5.21 0.0 0.0 +5.22 0.0 0.0 +5.23 0.0 0.0 +5.24 0.0 0.0 +5.25 0.0 0.0 +5.26 0.0 0.0 +5.27 0.0 0.0 +5.28 0.0 0.0 +5.29 0.0 0.0 +5.3 0.0 0.0 +5.31 0.0 0.0 +5.32 0.0 0.0 +5.33 0.0 0.0 +5.34 0.0 0.0 +5.35 0.0 0.0 +5.36 0.0 0.0 +5.37 0.0 0.0 +5.38 0.0 0.0 +5.39 0.0 0.0 +5.4 0.0 0.0 +5.41 0.0 0.0 +5.42 0.0 0.0 +5.43 0.0 0.0 +5.44 0.0 0.0 +5.45 0.0 0.0 +5.46 0.0 0.0 +5.47 0.0 0.0 +5.48 0.0 0.0 +5.49 0.0 0.0 +5.5 0.0 0.0 +5.51 0.0 0.0 +5.52 0.0 0.0 +5.53 0.0 0.0 +5.54 0.0 0.0 +5.55 0.0 0.0 +5.56 0.0 0.0 +5.57 0.0 0.0 +5.58 0.0 0.0 +5.59 0.0 0.0 +5.6 0.0 0.0 +5.61 0.0 0.0 +5.62 0.0 0.0 +5.63 0.0 0.0 +5.64 0.0 0.0 +5.65 0.0 0.0 +5.66 0.0 0.0 +5.67 0.0 0.0 +5.68 0.0 0.0 +5.69 0.0 0.0 +5.7 0.0 0.0 +5.71 0.0 0.0 +5.72 0.0 0.0 +5.73 0.0 0.0 +5.74 0.0 0.0 +5.75 0.0 0.0 +5.76 0.0 0.0 +5.77 0.0 0.0 +5.78 0.0 0.0 +5.79 0.0 0.0 +5.8 0.0 0.0 +5.81 0.0 0.0 +5.82 0.0 0.0 +5.83 0.0 0.0 +5.84 0.0 0.0 +5.85 0.0 0.0 +5.86 0.0 0.0 +5.87 0.0 0.0 +5.88 0.0 0.0 +5.89 0.0 0.0 +5.9 0.0 0.0 +5.91 0.0 0.0 +5.92 0.0 0.0 +5.93 0.0 0.0 +5.94 0.0 0.0 +5.95 0.0 0.0 +5.96 0.0 0.0 +5.97 0.0 0.0 +5.98 0.0 0.0 +5.99 0.0 0.0 +6.0 0.0 0.0 +6.01 0.0 0.0 +# l= 2 +0.0 0.0 0.0 +0.01 -0.0016302809637 0.00035794683369 +0.02 -0.0065163515418 0.0014296822768 +0.03 -0.014643908542 0.0032089002835 +0.04 -0.025989159095 0.005685121165 +0.05 -0.040518889195 0.0088437370407 +0.06 -0.058190559384 0.012666075205 +0.07 -0.07895242731 0.017129479149 +0.08 -0.10274369681 0.022207406906 +0.09 -0.12949469307 0.027869546304 +0.1 -0.15912706346 0.034081946665 +0.11 -0.1915540033 0.040807166393 +0.12 -0.22668050609 0.048004435845 +0.13 -0.26440363738 0.055629834819 +0.14 -0.30461283157 0.06363648392 +0.15 -0.34719021073 0.07197474901 +0.16 -0.39201092451 0.08059245791 +0.17 -0.43894351037 0.089435128454 +0.18 -0.48785027272 0.098446206939 +0.19 -0.53858768033 0.10756731604 +0.2 -0.59100678041 0.11673851104 +0.21 -0.64495362865 0.12589854357 +0.22 -0.70026973351 0.13498513144 +0.23 -0.75679251382 0.14393523369 +0.24 -0.81435576809 0.15268532967 +0.25 -0.87279015472 0.16117170107 +0.26 -0.93192368055 0.16933071533 +0.27 -0.99158219813 0.17709911015 +0.28 -1.0515899082 0.18441427678 +0.29 -1.1117698675 0.19121454188 +0.3 -1.1719445 0.19743944636 +0.31 -1.2319361091 0.20303002007 +0.32 -1.2915673908 0.20792905119 +0.33 -1.3506619462 0.21208134961 +0.34 -1.4090447891 0.21543400246 +0.35 -1.4665428517 0.21793662148 +0.36 -1.522985483 0.21954158083 +0.37 -1.5782049408 0.22020424436 +0.38 -1.632036874 0.21988318183 +0.39 -1.6843207957 0.21854037236 +0.4 -1.7349005444 0.21614139504 +0.41 -1.7836247317 0.21265560581 +0.42 -1.8303471759 0.20805629957 +0.43 -1.8749273204 0.20232085696 +0.44 -1.9172306348 0.19543087537 +0.45 -1.9571289972 0.18737228343 +0.46 -1.9945010581 0.17813543873 +0.47 -2.0292325838 0.16771520866 +0.48 -2.0612167791 0.15611103466 +0.49 -2.090354582 0.14332697147 +0.5 -2.1165549426 0.12937171885 +0.51 -2.1397350778 0.11425863393 +0.52 -2.1598206914 0.098005713701 +0.53 -2.176746171 0.080635565211 +0.54 -2.190454786 0.062175383691 +0.55 -2.2008987911 0.04265684497 +0.56 -2.2080395975 0.022116075258 +0.57 -2.211847815 0.00059350427024 +0.58 -2.2123033659 -0.02186620296 +0.59 -2.2093954581 -0.045214335243 +0.6 -2.203122684 -0.06939820269 +0.61 -2.1934928744 -0.09436140661 +0.62 -2.1805231747 -0.12004391211 +0.63 -2.1642398919 -0.14638230327 +0.64 -2.1446783791 -0.17330999434 +0.65 -2.1218829812 -0.20075738758 +0.66 -2.0959068035 -0.22865215777 +0.67 -2.066811508 -0.25691950291 +0.68 -2.0346671658 -0.28548234681 +0.69 -1.999552031 -0.31426159213 +0.7 -1.961552178 -0.34317645587 +0.71 -1.9207612656 -0.37214470648 +0.72 -1.8772802547 -0.40108292621 +0.73 -1.8312170584 -0.42990680852 +0.74 -1.7826861934 -0.45853144401 +0.75 -1.7318084432 -0.48687159166 +0.76 -1.6787103295 -0.51484205255 +0.77 -1.6235238527 -0.542357874 +0.78 -1.5663860216 -0.56933467063 +0.79 -1.5074384166 -0.59568891442 +0.8 -1.4468267401 -0.62133822183 +0.81 -1.3847003559 -0.64620163671 +0.82 -1.3212118192 -0.6701999082 +0.83 -1.2565163983 -0.69325576249 +0.84 -1.1907715897 -0.71529416746 +0.85 -1.1241366287 -0.73624258926 +0.86 -1.0567719967 -0.75603123994 +0.87 -0.98883892656 -0.77459331519 +0.88 -0.92049890866 -0.79186522139 +0.89 -0.85191319821 -0.80778679106 +0.9 -0.78324232642 -0.82230148608 +0.91 -0.71464561672 -0.83535658783 +0.92 -0.64628070784 -0.84690337375 +0.93 -0.57830308546 -0.85689727953 +0.94 -0.51086562408 -0.86529804664 +0.95 -0.4441181986 -0.8720698411 +0.96 -0.37820717285 -0.87718139491 +0.97 -0.31327485984 -0.88060612622 +0.98 -0.24945933646 -0.88232217166 +0.99 -0.18689397376 -0.88231247569 +1.0 -0.12570707039 -0.88056484165 +1.01 -0.066021505913 -0.87707196654 +1.02 -0.0079545268368 -0.871831461 +1.03 0.048382799934 -0.86484587151 +1.04 0.10288589917 -0.85612263048 +1.05 0.15545667375 -0.84567402721 +1.06 0.20600385579 -0.83351716716 +1.07 0.25444313475 -0.81967394231 +1.08 0.30069748155 -0.80417095395 +1.09 0.34469751688 -0.78703927055 +1.1 0.38638120612 -0.76831446513 +1.11 0.42569413044 -0.74803646205 +1.12 0.46258944515 -0.72624964296 +1.13 0.49702846809 -0.70300202094 +1.14 0.52898007647 -0.67834564529 +1.15 0.5584209087 -0.65233634601 +1.16 0.58533542208 -0.62503380899 +1.17 0.60971583811 -0.59650035482 +1.18 0.63156188005 -0.56680171908 +1.19 0.65088091394 -0.5360069337 +1.2 0.66768769801 -0.5041873814 +1.21 0.68200399062 -0.47141655935 +1.22 0.69385878215 -0.43777069314 +1.23 0.70328821897 -0.40332830942 +1.24 0.71033429279 -0.3681684753 +1.25 0.71504576268 -0.33237249494 +1.26 0.71747825821 -0.29602370728 +1.27 0.71769167806 -0.25920453432 +1.28 0.7157522691 -0.22199923787 +1.29 0.71173247372 -0.18449341844 +1.3 0.70570736789 -0.14677065047 +1.31 0.69775812741 -0.10891610311 +1.32 0.68797057633 -0.071014918121 +1.33 0.67643200525 -0.033149740959 +1.34 0.66323581364 0.0045952849381 +1.35 0.64847703464 0.042138299609 +1.36 0.63225221039 0.079399428431 +1.37 0.61466413529 0.1162974046 +1.38 0.59581231224 0.15275639672 +1.39 0.57580091001 0.18870011769 +1.4 0.55473560956 0.22405425572 +1.41 0.53271899406 0.25874913705 +1.42 0.50985999692 0.29271380992 +1.43 0.48626100505 0.32588396457 +1.44 0.462028045 0.35819554496 +1.45 0.43726595407 0.3895878196 +1.46 0.41207481124 0.42000499326 +1.47 0.38656057126 0.44939063297 +1.48 0.3608175272 0.47769672342 +1.49 0.33494686826 0.5048740466 +1.5 0.30904061714 0.53088029232 +1.51 0.28319158621 0.55567492895 +1.52 0.25748919177 0.57922141737 +1.53 0.2320165748 0.60148809531 +1.54 0.20685930393 0.62244468231 +1.55 0.18209023524 0.64206822004 +1.56 0.15778974576 0.66033516372 +1.57 0.13402102121 0.67723064065 +1.58 0.11085528354 0.69273914034 +1.59 0.088348538575 0.70685266968 +1.6 0.066561188568 0.71956401527 +1.61 0.045541575888 0.73087203775 +1.62 0.025339115934 0.74077747805 +1.63 0.0059938468033 0.74928603386 +1.64 -0.012455641987 0.75640594374 +1.65 -0.02997814 0.762149628 +1.66 -0.046545525399 0.76653231439 +1.67 -0.062135771965 0.76957291518 +1.68 -0.07673072274 0.77129311635 +1.69 -0.090317688661 0.77171796373 +1.7 -0.10288787787 0.77087507287 +1.71 -0.11443785977 0.76879513028 +1.72 -0.12496755668 0.76551117705 +1.73 -0.13448246389 0.76105890947 +1.74 -0.14299056152 0.75547633432 +1.75 -0.15050566682 0.74880338946 +1.76 -0.15704320029 0.74108264673 +1.77 -0.162624416 0.73235738719 +1.78 -0.167271634 0.72267441308 +1.79 -0.17101238302 0.71207933872 +1.8 -0.17387544024 0.70062292022 +1.81 -0.17589320453 0.68835205878 +1.82 -0.17710054196 0.67532110015 +1.83 -0.17753340761 0.66157787454 +1.84 -0.17723165939 0.64717812643 +1.85 -0.17623431712 0.63217172116 +1.86 -0.17458444823 0.61661402113 +1.87 -0.17232411255 0.60055706063 +1.88 -0.16949748901 0.58405433927 +1.89 -0.16614984949 0.56716024762 +1.9 -0.16232429048 0.54992499374 +1.91 -0.15806950473 0.53240543237 +1.92 -0.15342617165 0.51464805908 +1.93 -0.14844426834 0.49670968764 +1.94 -0.14316373324 0.47863572096 +1.95 -0.13763122805 0.46047840766 +1.96 -0.13188874746 0.44228507248 +1.97 -0.12597657543 0.42410101919 +1.98 -0.1199394249 0.40597590216 +1.99 -0.11381039894 0.38794793686 +2.0 -0.10763397714 0.37006616432 +2.01 -0.10144010235 0.35236584341 +2.02 -0.095265583264 0.33488859255 +2.03 -0.089142973563 0.31767206088 +2.04 -0.083098195882 0.30074770623 +2.05 -0.077166630832 0.28415568454 +2.06 -0.071365978984 0.26791978875 +2.07 -0.065724731008 0.25207376486 +2.08 -0.060262269553 0.23664279998 +2.09 -0.054995055424 0.22164931787 +2.1 -0.049945501387 0.2071213062 +2.11 -0.045120802665 0.19307206246 +2.12 -0.040537413162 0.17952387706 +2.13 -0.036203128407 0.16649020909 +2.14 -0.032122968947 0.15398166035 +2.15 -0.028307414412 0.1420144928 +2.16 -0.024753587768 0.13059030089 +2.17 -0.02146523645 0.11971812052 +2.18 -0.018441211133 0.10940089432 +2.19 -0.015675932017 0.099636041806 +2.2 -0.013168594079 0.090427492502 +2.21 -0.010909475495 0.081766015473 +2.22 -0.0088911546145 0.073646095504 +2.23 -0.00710559859 0.06606113076 +2.24 -0.0055403251186 0.058996127683 +2.25 -0.0041843627321 0.052439195792 +2.26 -0.00302493571 0.046375295857 +2.27 -0.002048258537 0.040787704768 +2.28 -0.0012396425611 0.03565823697 +2.29 -0.00058545857417 0.030970770784 +2.3 -7.0524374943e-05 0.026706982272 +2.31 0.00032128610825 0.022847262124 +2.32 0.00060267490947 0.019374509348 +2.33 0.00079004202243 0.016268357268 +2.34 0.00089784358158 0.013509078375 +2.35 0.00093745353246 0.011078294802 +2.36 0.00092553347338 0.0089539544753 +2.37 0.00087252062706 0.0071167327738 +2.38 0.00078791337712 0.0055471546788 +2.39 0.00068748885458 0.0042218314666 +2.4 0.00057611638219 0.0031221978739 +2.41 0.0004608042569 0.0022279088413 +2.42 0.00035518992628 0.0015146245011 +2.43 0.00025785924786 0.00096472501905 +2.44 0.00017329303283 0.00055694774566 +2.45 0.00010989139907 0.00026733139288 +2.46 6.1872491483e-05 7.829302357e-05 +2.47 2.953833815e-05 -3.334821152e-05 +2.48 1.4806544348e-05 -9.5806441567e-05 +2.49 9.6783992659e-06 -0.00011221658675 +2.5 8.5136479095e-06 -9.7376625643e-05 +2.51 4.2505513471e-06 -6.772287195e-05 +2.52 1.390094449e-06 -2.9382005001e-05 +2.53 -2.082365022e-07 2.20387025e-06 +2.54 -6.504514312e-07 6.8840503135e-06 +2.55 -4.3057462874e-07 4.5569849889e-06 +2.56 0.0 0.0 +2.57 0.0 0.0 +2.58 0.0 0.0 +2.59 0.0 0.0 +2.6 0.0 0.0 +2.61 0.0 0.0 +2.62 0.0 0.0 +2.63 0.0 0.0 +2.64 0.0 0.0 +2.65 0.0 0.0 +2.66 0.0 0.0 +2.67 0.0 0.0 +2.68 0.0 0.0 +2.69 0.0 0.0 +2.7 0.0 0.0 +2.71 0.0 0.0 +2.72 0.0 0.0 +2.73 0.0 0.0 +2.74 0.0 0.0 +2.75 0.0 0.0 +2.76 0.0 0.0 +2.77 0.0 0.0 +2.78 0.0 0.0 +2.79 0.0 0.0 +2.8 0.0 0.0 +2.81 0.0 0.0 +2.82 0.0 0.0 +2.83 0.0 0.0 +2.84 0.0 0.0 +2.85 0.0 0.0 +2.86 0.0 0.0 +2.87 0.0 0.0 +2.88 0.0 0.0 +2.89 0.0 0.0 +2.9 0.0 0.0 +2.91 0.0 0.0 +2.92 0.0 0.0 +2.93 0.0 0.0 +2.94 0.0 0.0 +2.95 0.0 0.0 +2.96 0.0 0.0 +2.97 0.0 0.0 +2.98 0.0 0.0 +2.99 0.0 0.0 +3.0 0.0 0.0 +3.01 0.0 0.0 +3.02 0.0 0.0 +3.03 0.0 0.0 +3.04 0.0 0.0 +3.05 0.0 0.0 +3.06 0.0 0.0 +3.07 0.0 0.0 +3.08 0.0 0.0 +3.09 0.0 0.0 +3.1 0.0 0.0 +3.11 0.0 0.0 +3.12 0.0 0.0 +3.13 0.0 0.0 +3.14 0.0 0.0 +3.15 0.0 0.0 +3.16 0.0 0.0 +3.17 0.0 0.0 +3.18 0.0 0.0 +3.19 0.0 0.0 +3.2 0.0 0.0 +3.21 0.0 0.0 +3.22 0.0 0.0 +3.23 0.0 0.0 +3.24 0.0 0.0 +3.25 0.0 0.0 +3.26 0.0 0.0 +3.27 0.0 0.0 +3.28 0.0 0.0 +3.29 0.0 0.0 +3.3 0.0 0.0 +3.31 0.0 0.0 +3.32 0.0 0.0 +3.33 0.0 0.0 +3.34 0.0 0.0 +3.35 0.0 0.0 +3.36 0.0 0.0 +3.37 0.0 0.0 +3.38 0.0 0.0 +3.39 0.0 0.0 +3.4 0.0 0.0 +3.41 0.0 0.0 +3.42 0.0 0.0 +3.43 0.0 0.0 +3.44 0.0 0.0 +3.45 0.0 0.0 +3.46 0.0 0.0 +3.47 0.0 0.0 +3.48 0.0 0.0 +3.49 0.0 0.0 +3.5 0.0 0.0 +3.51 0.0 0.0 +3.52 0.0 0.0 +3.53 0.0 0.0 +3.54 0.0 0.0 +3.55 0.0 0.0 +3.56 0.0 0.0 +3.57 0.0 0.0 +3.58 0.0 0.0 +3.59 0.0 0.0 +3.6 0.0 0.0 +3.61 0.0 0.0 +3.62 0.0 0.0 +3.63 0.0 0.0 +3.64 0.0 0.0 +3.65 0.0 0.0 +3.66 0.0 0.0 +3.67 0.0 0.0 +3.68 0.0 0.0 +3.69 0.0 0.0 +3.7 0.0 0.0 +3.71 0.0 0.0 +3.72 0.0 0.0 +3.73 0.0 0.0 +3.74 0.0 0.0 +3.75 0.0 0.0 +3.76 0.0 0.0 +3.77 0.0 0.0 +3.78 0.0 0.0 +3.79 0.0 0.0 +3.8 0.0 0.0 +3.81 0.0 0.0 +3.82 0.0 0.0 +3.83 0.0 0.0 +3.84 0.0 0.0 +3.85 0.0 0.0 +3.86 0.0 0.0 +3.87 0.0 0.0 +3.88 0.0 0.0 +3.89 0.0 0.0 +3.9 0.0 0.0 +3.91 0.0 0.0 +3.92 0.0 0.0 +3.93 0.0 0.0 +3.94 0.0 0.0 +3.95 0.0 0.0 +3.96 0.0 0.0 +3.97 0.0 0.0 +3.98 0.0 0.0 +3.99 0.0 0.0 +4.0 0.0 0.0 +4.01 0.0 0.0 +4.02 0.0 0.0 +4.03 0.0 0.0 +4.04 0.0 0.0 +4.05 0.0 0.0 +4.06 0.0 0.0 +4.07 0.0 0.0 +4.08 0.0 0.0 +4.09 0.0 0.0 +4.1 0.0 0.0 +4.11 0.0 0.0 +4.12 0.0 0.0 +4.13 0.0 0.0 +4.14 0.0 0.0 +4.15 0.0 0.0 +4.16 0.0 0.0 +4.17 0.0 0.0 +4.18 0.0 0.0 +4.19 0.0 0.0 +4.2 0.0 0.0 +4.21 0.0 0.0 +4.22 0.0 0.0 +4.23 0.0 0.0 +4.24 0.0 0.0 +4.25 0.0 0.0 +4.26 0.0 0.0 +4.27 0.0 0.0 +4.28 0.0 0.0 +4.29 0.0 0.0 +4.3 0.0 0.0 +4.31 0.0 0.0 +4.32 0.0 0.0 +4.33 0.0 0.0 +4.34 0.0 0.0 +4.35 0.0 0.0 +4.36 0.0 0.0 +4.37 0.0 0.0 +4.38 0.0 0.0 +4.39 0.0 0.0 +4.4 0.0 0.0 +4.41 0.0 0.0 +4.42 0.0 0.0 +4.43 0.0 0.0 +4.44 0.0 0.0 +4.45 0.0 0.0 +4.46 0.0 0.0 +4.47 0.0 0.0 +4.48 0.0 0.0 +4.49 0.0 0.0 +4.5 0.0 0.0 +4.51 0.0 0.0 +4.52 0.0 0.0 +4.53 0.0 0.0 +4.54 0.0 0.0 +4.55 0.0 0.0 +4.56 0.0 0.0 +4.57 0.0 0.0 +4.58 0.0 0.0 +4.59 0.0 0.0 +4.6 0.0 0.0 +4.61 0.0 0.0 +4.62 0.0 0.0 +4.63 0.0 0.0 +4.64 0.0 0.0 +4.65 0.0 0.0 +4.66 0.0 0.0 +4.67 0.0 0.0 +4.68 0.0 0.0 +4.69 0.0 0.0 +4.7 0.0 0.0 +4.71 0.0 0.0 +4.72 0.0 0.0 +4.73 0.0 0.0 +4.74 0.0 0.0 +4.75 0.0 0.0 +4.76 0.0 0.0 +4.77 0.0 0.0 +4.78 0.0 0.0 +4.79 0.0 0.0 +4.8 0.0 0.0 +4.81 0.0 0.0 +4.82 0.0 0.0 +4.83 0.0 0.0 +4.84 0.0 0.0 +4.85 0.0 0.0 +4.86 0.0 0.0 +4.87 0.0 0.0 +4.88 0.0 0.0 +4.89 0.0 0.0 +4.9 0.0 0.0 +4.91 0.0 0.0 +4.92 0.0 0.0 +4.93 0.0 0.0 +4.94 0.0 0.0 +4.95 0.0 0.0 +4.96 0.0 0.0 +4.97 0.0 0.0 +4.98 0.0 0.0 +4.99 0.0 0.0 +5.0 0.0 0.0 +5.01 0.0 0.0 +5.02 0.0 0.0 +5.03 0.0 0.0 +5.04 0.0 0.0 +5.05 0.0 0.0 +5.06 0.0 0.0 +5.07 0.0 0.0 +5.08 0.0 0.0 +5.09 0.0 0.0 +5.1 0.0 0.0 +5.11 0.0 0.0 +5.12 0.0 0.0 +5.13 0.0 0.0 +5.14 0.0 0.0 +5.15 0.0 0.0 +5.16 0.0 0.0 +5.17 0.0 0.0 +5.18 0.0 0.0 +5.19 0.0 0.0 +5.2 0.0 0.0 +5.21 0.0 0.0 +5.22 0.0 0.0 +5.23 0.0 0.0 +5.24 0.0 0.0 +5.25 0.0 0.0 +5.26 0.0 0.0 +5.27 0.0 0.0 +5.28 0.0 0.0 +5.29 0.0 0.0 +5.3 0.0 0.0 +5.31 0.0 0.0 +5.32 0.0 0.0 +5.33 0.0 0.0 +5.34 0.0 0.0 +5.35 0.0 0.0 +5.36 0.0 0.0 +5.37 0.0 0.0 +5.38 0.0 0.0 +5.39 0.0 0.0 +5.4 0.0 0.0 +5.41 0.0 0.0 +5.42 0.0 0.0 +5.43 0.0 0.0 +5.44 0.0 0.0 +5.45 0.0 0.0 +5.46 0.0 0.0 +5.47 0.0 0.0 +5.48 0.0 0.0 +5.49 0.0 0.0 +5.5 0.0 0.0 +5.51 0.0 0.0 +5.52 0.0 0.0 +5.53 0.0 0.0 +5.54 0.0 0.0 +5.55 0.0 0.0 +5.56 0.0 0.0 +5.57 0.0 0.0 +5.58 0.0 0.0 +5.59 0.0 0.0 +5.6 0.0 0.0 +5.61 0.0 0.0 +5.62 0.0 0.0 +5.63 0.0 0.0 +5.64 0.0 0.0 +5.65 0.0 0.0 +5.66 0.0 0.0 +5.67 0.0 0.0 +5.68 0.0 0.0 +5.69 0.0 0.0 +5.7 0.0 0.0 +5.71 0.0 0.0 +5.72 0.0 0.0 +5.73 0.0 0.0 +5.74 0.0 0.0 +5.75 0.0 0.0 +5.76 0.0 0.0 +5.77 0.0 0.0 +5.78 0.0 0.0 +5.79 0.0 0.0 +5.8 0.0 0.0 +5.81 0.0 0.0 +5.82 0.0 0.0 +5.83 0.0 0.0 +5.84 0.0 0.0 +5.85 0.0 0.0 +5.86 0.0 0.0 +5.87 0.0 0.0 +5.88 0.0 0.0 +5.89 0.0 0.0 +5.9 0.0 0.0 +5.91 0.0 0.0 +5.92 0.0 0.0 +5.93 0.0 0.0 +5.94 0.0 0.0 +5.95 0.0 0.0 +5.96 0.0 0.0 +5.97 0.0 0.0 +5.98 0.0 0.0 +5.99 0.0 0.0 +6.0 0.0 0.0 +6.01 0.0 0.0 +# local +0.0 -7.0231214206E+00 +0.01 -7.0453717502E+00 +0.02 -7.1143468216E+00 +0.03 -7.2090176842E+00 +0.04 -7.3083553876E+00 +0.05 -7.3952429292E+00 +0.06 -7.4597953536E+00 +0.07 -7.4991297585E+00 +0.08 -7.5152902525E+00 +0.09 -7.5128744992E+00 +0.1 -7.4971693665E+00 +0.11 -7.4730064339E+00 +0.12 -7.4442385526E+00 +0.13 -7.4136363893E+00 +0.14 -7.3830123504E+00 +0.15 -7.3534314963E+00 +0.16 -7.3254268122E+00 +0.17 -7.2991803384E+00 +0.18 -7.2746597190E+00 +0.19 -7.2517133907E+00 +0.2 -7.2301330081E+00 +0.21 -7.2096920714E+00 +0.22 -7.1901684172E+00 +0.23 -7.1713562496E+00 +0.24 -7.1530717169E+00 +0.25 -7.1351547228E+00 +0.26 -7.1174685568E+00 +0.27 -7.0998985249E+00 +0.28 -7.0823500273E+00 +0.29 -7.0647465397E+00 +0.3 -7.0470276188E+00 +0.31 -7.0291470239E+00 +0.32 -7.0110709744E+00 +0.33 -6.9927765879E+00 +0.34 -6.9742504049E+00 +0.35 -6.9554870639E+00 +0.36 -6.9364880834E+00 +0.37 -6.9172607312E+00 +0.38 -6.8978169707E+00 +0.39 -6.8781725207E+00 +0.4 -6.8583459681E+00 +0.41 -6.8383579698E+00 +0.42 -6.8182305369E+00 +0.43 -6.7979863986E+00 +0.44 -6.7776484445E+00 +0.45 -6.7572392483E+00 +0.46 -6.7367806674E+00 +0.47 -6.7162935188E+00 +0.48 -6.6957973262E+00 +0.49 -6.6753101150E+00 +0.5 -6.6548482902E+00 +0.51 -6.6344265604E+00 +0.52 -6.6140578820E+00 +0.53 -6.5937534575E+00 +0.54 -6.5735227996E+00 +0.55 -6.5533737172E+00 +0.56 -6.5333124572E+00 +0.57 -6.5133437238E+00 +0.58 -6.4934708208E+00 +0.59 -6.4736956904E+00 +0.6 -6.4540190739E+00 +0.61 -6.4344405251E+00 +0.62 -6.4149585733E+00 +0.63 -6.3955707664E+00 +0.64 -6.3762737537E+00 +0.65 -6.3570633874E+00 +0.66 -6.3379347681E+00 +0.67 -6.3188823126E+00 +0.68 -6.2998998303E+00 +0.69 -6.2809805806E+00 +0.7 -6.2621173131E+00 +0.71 -6.2433023331E+00 +0.72 -6.2245275562E+00 +0.73 -6.2057845580E+00 +0.74 -6.1870646259E+00 +0.75 -6.1683588117E+00 +0.76 -6.1496579790E+00 +0.77 -6.1309528635E+00 +0.78 -6.1122341237E+00 +0.79 -6.0934923948E+00 +0.8 -6.0747183433E+00 +0.81 -6.0559027212E+00 +0.82 -6.0370364200E+00 +0.83 -6.0181105239E+00 +0.84 -5.9991163614E+00 +0.85 -5.9800455551E+00 +0.86 -5.9608900694E+00 +0.87 -5.9416422543E+00 +0.88 -5.9222948867E+00 +0.89 -5.9028412073E+00 +0.9 -5.8832749527E+00 +0.91 -5.8635903839E+00 +0.92 -5.8437823081E+00 +0.93 -5.8238460972E+00 +0.94 -5.8037776995E+00 +0.95 -5.7835736400E+00 +0.96 -5.7632310315E+00 +0.97 -5.7427475878E+00 +0.98 -5.7221215886E+00 +0.99 -5.7013518781E+00 +1.0 -5.6804378489E+00 +1.01 -5.6593794226E+00 +1.02 -5.6381770133E+00 +1.03 -5.6168315332E+00 +1.04 -5.5953443695E+00 +1.05 -5.5737173207E+00 +1.06 -5.5519525829E+00 +1.07 -5.5300527090E+00 +1.08 -5.5080205953E+00 +1.09 -5.4858594768E+00 +1.1 -5.4635728450E+00 +1.11 -5.4411644391E+00 +1.12 -5.4186381993E+00 +1.13 -5.3959983139E+00 +1.14 -5.3732491142E+00 +1.15 -5.3503950752E+00 +1.16 -5.3274407901E+00 +1.17 -5.3043909888E+00 +1.18 -5.2812504690E+00 +1.19 -5.2580240944E+00 +1.2 -5.2347167970E+00 +1.21 -5.2113335516E+00 +1.22 -5.1878793578E+00 +1.23 -5.1643592404E+00 +1.24 -5.1407782392E+00 +1.25 -5.1171413949E+00 +1.26 -5.0934537514E+00 +1.27 -5.0697203419E+00 +1.28 -5.0459461891E+00 +1.29 -5.0221363068E+00 +1.3 -4.9982956801E+00 +1.31 -4.9744292806E+00 +1.32 -4.9505420602E+00 +1.33 -4.9266389339E+00 +1.34 -4.9027248049E+00 +1.35 -4.8788045416E+00 +1.36 -4.8548829774E+00 +1.37 -4.8309649370E+00 +1.38 -4.8070551869E+00 +1.39 -4.7831584769E+00 +1.4 -4.7592795265E+00 +1.41 -4.7354229970E+00 +1.42 -4.7115935473E+00 +1.43 -4.6877957587E+00 +1.44 -4.6640341935E+00 +1.45 -4.6403133701E+00 +1.46 -4.6166377312E+00 +1.47 -4.5930117259E+00 +1.48 -4.5694396733E+00 +1.49 -4.5459259073E+00 +1.5 -4.5224746470E+00 +1.51 -4.4990900739E+00 +1.52 -4.4757763045E+00 +1.53 -4.4525373437E+00 +1.54 -4.4293771944E+00 +1.55 -4.4062996642E+00 +1.56 -4.3833086284E+00 +1.57 -4.3604076923E+00 +1.58 -4.3376005463E+00 +1.59 -4.3148906194E+00 +1.6 -4.2922813822E+00 +1.61 -4.2697760768E+00 +1.62 -4.2473779371E+00 +1.63 -4.2250900034E+00 +1.64 -4.2029152612E+00 +1.65 -4.1808565335E+00 +1.66 -4.1589165503E+00 +1.67 -4.1370979017E+00 +1.68 -4.1154030601E+00 +1.69 -4.0938343674E+00 +1.7 -4.0723940409E+00 +1.71 -4.0510841603E+00 +1.72 -4.0299066951E+00 +1.73 -4.0088634503E+00 +1.74 -3.9879561588E+00 +1.75 -3.9671863413E+00 +1.76 -3.9465555084E+00 +1.77 -3.9260648902E+00 +1.78 -3.9057157915E+00 +1.79 -3.8855091504E+00 +1.8 -3.8654460818E+00 +1.81 -3.8455272395E+00 +1.82 -3.8257535560E+00 +1.83 -3.8061254478E+00 +1.84 -3.7866435899E+00 +1.85 -3.7673082659E+00 +1.86 -3.7481198716E+00 +1.87 -3.7290785831E+00 +1.88 -3.7101845281E+00 +1.89 -3.6914377938E+00 +1.9 -3.6728382611E+00 +1.91 -3.6543859374E+00 +1.92 -3.6360805050E+00 +1.93 -3.6179218406E+00 +1.94 -3.5999095315E+00 +1.95 -3.5820432604E+00 +1.96 -3.5643225918E+00 +1.97 -3.5467470213E+00 +1.98 -3.5293160912E+00 +1.99 -3.5120291557E+00 +2.0 -3.4948856938E+00 +2.01 -3.4778850017E+00 +2.02 -3.4610264332E+00 +2.03 -3.4443092916E+00 +2.04 -3.4277328079E+00 +2.05 -3.4112962860E+00 +2.06 -3.3949988785E+00 +2.07 -3.3788398185E+00 +2.08 -3.3628182624E+00 +2.09 -3.3469333416E+00 +2.1 -3.3311842302E+00 +2.11 -3.3155699746E+00 +2.12 -3.3000897024E+00 +2.13 -3.2847424519E+00 +2.14 -3.2695272359E+00 +2.15 -3.2544431327E+00 +2.16 -3.2394890382E+00 +2.17 -3.2246639597E+00 +2.18 -3.2099668049E+00 +2.19 -3.1953963961E+00 +2.2 -3.1809517062E+00 +2.21 -3.1666314047E+00 +2.22 -3.1524342799E+00 +2.23 -3.1383591039E+00 +2.24 -3.1244044055E+00 +2.25 -3.1105688087E+00 +2.26 -3.0968509426E+00 +2.27 -3.0832494382E+00 +2.28 -3.0697627913E+00 +2.29 -3.0563898779E+00 +2.3 -3.0431294386E+00 +2.31 -3.0299801887E+00 +2.32 -3.0169410939E+00 +2.33 -3.0040109831E+00 +2.34 -2.9911887500E+00 +2.35 -2.9784733778E+00 +2.36 -2.9658637789E+00 +2.37 -2.9533589232E+00 +2.38 -2.9409577917E+00 +2.39 -2.9286593571E+00 +2.4 -2.9164625797E+00 +2.41 -2.9043664216E+00 +2.42 -2.8923698868E+00 +2.43 -2.8804718654E+00 +2.44 -2.8686712935E+00 +2.45 -2.8569671495E+00 +2.46 -2.8453582499E+00 +2.47 -2.8338434784E+00 +2.48 -2.8224217322E+00 +2.49 -2.8110918278E+00 +2.5 -2.7998525918E+00 +2.51 -2.7887028310E+00 +2.52 -2.7776415409E+00 +2.53 -2.7666676118E+00 +2.54 -2.7557799085E+00 +2.55 -2.7449775402E+00 +2.56 -2.7342595084E+00 +2.57 -2.7236248015E+00 +2.58 -2.7130724883E+00 +2.59 -2.7026015965E+00 +2.6 -2.6922111078E+00 +2.61 -2.6819001800E+00 +2.62 -2.6716679021E+00 +2.63 -2.6615133021E+00 +2.64 -2.6514355718E+00 +2.65 -2.6414338420E+00 +2.66 -2.6315071900E+00 +2.67 -2.6216548251E+00 +2.68 -2.6118759290E+00 +2.69 -2.6021696307E+00 +2.7 -2.5925351489E+00 +2.71 -2.5829717156E+00 +2.72 -2.5734785124E+00 +2.73 -2.5640547609E+00 +2.74 -2.5546997426E+00 +2.75 -2.5454126928E+00 +2.76 -2.5361928300E+00 +2.77 -2.5270394846E+00 +2.78 -2.5179519390E+00 +2.79 -2.5089294248E+00 +2.8 -2.4999712991E+00 +2.81 -2.4910768846E+00 +2.82 -2.4822454645E+00 +2.83 -2.4734763795E+00 +2.84 -2.4647690035E+00 +2.85 -2.4561226740E+00 +2.86 -2.4475367100E+00 +2.87 -2.4390105350E+00 +2.88 -2.4305435276E+00 +2.89 -2.4221350247E+00 +2.9 -2.4137844549E+00 +2.91 -2.4054912392E+00 +2.92 -2.3972547682E+00 +2.93 -2.3890744373E+00 +2.94 -2.3809497189E+00 +2.95 -2.3728800481E+00 +2.96 -2.3648648185E+00 +2.97 -2.3569035173E+00 +2.98 -2.3489956165E+00 +2.99 -2.3411405637E+00 +3.0 -2.3333378034E+00 +3.01 -2.3255868594E+00 +3.02 -2.3178872183E+00 +3.03 -2.3102383329E+00 +3.04 -2.3026397232E+00 +3.05 -2.2950909160E+00 +3.06 -2.2875914196E+00 +3.07 -2.2801407030E+00 +3.08 -2.2727383444E+00 +3.09 -2.2653838770E+00 +3.1 -2.2580768156E+00 +3.11 -2.2508166903E+00 +3.12 -2.2436030858E+00 +3.13 -2.2364355573E+00 +3.14 -2.2293136286E+00 +3.15 -2.2222368833E+00 +3.16 -2.2152049132E+00 +3.17 -2.2082172941E+00 +3.18 -2.2012735614E+00 +3.19 -2.1943733441E+00 +3.2 -2.1875162423E+00 +3.21 -2.1807018469E+00 +3.22 -2.1739297205E+00 +3.23 -2.1671995163E+00 +3.24 -2.1605108486E+00 +3.25 -2.1538633207E+00 +3.26 -2.1472565296E+00 +3.27 -2.1406901400E+00 +3.28 -2.1341637841E+00 +3.29 -2.1276770788E+00 +3.3 -2.1212296497E+00 +3.31 -2.1148211738E+00 +3.32 -2.1084513002E+00 +3.33 -2.1021196610E+00 +3.34 -2.0958259044E+00 +3.35 -2.0895697208E+00 +3.36 -2.0833507757E+00 +3.37 -2.0771687174E+00 +3.38 -2.0710232112E+00 +3.39 -2.0649139620E+00 +3.4 -2.0588406508E+00 +3.41 -2.0528029436E+00 +3.42 -2.0468005172E+00 +3.43 -2.0408330924E+00 +3.44 -2.0349003651E+00 +3.45 -2.0290020196E+00 +3.46 -2.0231377399E+00 +3.47 -2.0173072634E+00 +3.48 -2.0115103000E+00 +3.49 -2.0057465539E+00 +3.5 -2.0000157114E+00 +3.51 -1.9943175274E+00 +3.52 -1.9886517255E+00 +3.53 -1.9830180291E+00 +3.54 -1.9774161269E+00 +3.55 -1.9718457881E+00 +3.56 -1.9663067504E+00 +3.57 -1.9607987500E+00 +3.58 -1.9553214969E+00 +3.59 -1.9498747526E+00 +3.6 -1.9444582756E+00 +3.61 -1.9390718144E+00 +3.62 -1.9337151017E+00 +3.63 -1.9283878872E+00 +3.64 -1.9230899509E+00 +3.65 -1.9178210530E+00 +3.66 -1.9125809500E+00 +3.67 -1.9073693767E+00 +3.68 -1.9021861346E+00 +3.69 -1.8970309951E+00 +3.7 -1.8919037296E+00 +3.71 -1.8868040854E+00 +3.72 -1.8817318554E+00 +3.73 -1.8766868316E+00 +3.74 -1.8716687958E+00 +3.75 -1.8666775210E+00 +3.76 -1.8617127770E+00 +3.77 -1.8567743797E+00 +3.78 -1.8518621212E+00 +3.79 -1.8469757937E+00 +3.8 -1.8421151646E+00 +3.81 -1.8372800505E+00 +3.82 -1.8324702607E+00 +3.83 -1.8276855969E+00 +3.84 -1.8229258539E+00 +3.85 -1.8181908181E+00 +3.86 -1.8134803248E+00 +3.87 -1.8087941851E+00 +3.88 -1.8041322098E+00 +3.89 -1.7994941909E+00 +3.9 -1.7948799518E+00 +3.91 -1.7902893242E+00 +3.92 -1.7857221279E+00 +3.93 -1.7811781826E+00 +3.94 -1.7766572800E+00 +3.95 -1.7721592746E+00 +3.96 -1.7676839959E+00 +3.97 -1.7632312719E+00 +3.98 -1.7588009238E+00 +3.99 -1.7543927664E+00 +4.0 -1.7500066580E+00 +4.01 -1.7456424349E+00 +4.02 -1.7412999332E+00 +4.03 -1.7369789755E+00 +4.04 -1.7326793994E+00 +4.05 -1.7284010643E+00 +4.06 -1.7241438138E+00 +4.07 -1.7199074918E+00 +4.08 -1.7156919241E+00 +4.09 -1.7114969666E+00 +4.1 -1.7073224806E+00 +4.11 -1.7031683172E+00 +4.12 -1.6990343275E+00 +4.13 -1.6949203421E+00 +4.14 -1.6908262304E+00 +4.15 -1.6867518576E+00 +4.16 -1.6826970815E+00 +4.17 -1.6786617602E+00 +4.18 -1.6746457304E+00 +4.19 -1.6706488710E+00 +4.2 -1.6666710522E+00 +4.21 -1.6627121385E+00 +4.22 -1.6587719945E+00 +4.23 -1.6548504641E+00 +4.24 -1.6509474321E+00 +4.25 -1.6470627748E+00 +4.26 -1.6431963631E+00 +4.27 -1.6393480680E+00 +4.28 -1.6355177416E+00 +4.29 -1.6317052709E+00 +4.3 -1.6279105396E+00 +4.31 -1.6241334248E+00 +4.32 -1.6203738033E+00 +4.33 -1.6166315370E+00 +4.34 -1.6129065112E+00 +4.35 -1.6091986186E+00 +4.36 -1.6055077418E+00 +4.37 -1.6018337635E+00 +4.38 -1.5981765561E+00 +4.39 -1.5945360003E+00 +4.4 -1.5909119986E+00 +4.41 -1.5873044391E+00 +4.42 -1.5837132098E+00 +4.43 -1.5801381951E+00 +4.44 -1.5765792679E+00 +4.45 -1.5730363413E+00 +4.46 -1.5695093087E+00 +4.47 -1.5659980634E+00 +4.48 -1.5625024987E+00 +4.49 -1.5590224884E+00 +4.5 -1.5555579458E+00 +4.51 -1.5521087730E+00 +4.52 -1.5486748682E+00 +4.53 -1.5452561298E+00 +4.54 -1.5418524452E+00 +4.55 -1.5384637135E+00 +4.56 -1.5350898499E+00 +4.57 -1.5317307575E+00 +4.58 -1.5283863393E+00 +4.59 -1.5250564978E+00 +4.6 -1.5217411153E+00 +4.61 -1.5184401207E+00 +4.62 -1.5151534216E+00 +4.63 -1.5118809255E+00 +4.64 -1.5086225400E+00 +4.65 -1.5053781613E+00 +4.66 -1.5021476999E+00 +4.67 -1.4989310782E+00 +4.68 -1.4957282082E+00 +4.69 -1.4925390016E+00 +4.7 -1.4893633704E+00 +4.71 -1.4862012066E+00 +4.72 -1.4830524446E+00 +4.73 -1.4799170014E+00 +4.74 -1.4767947930E+00 +4.75 -1.4736857355E+00 +4.76 -1.4705897384E+00 +4.77 -1.4675067108E+00 +4.78 -1.4644365871E+00 +4.79 -1.4613792870E+00 +4.8 -1.4583347306E+00 +4.81 -1.4553028376E+00 +4.82 -1.4522835159E+00 +4.83 -1.4492766914E+00 +4.84 -1.4462822960E+00 +4.85 -1.4433002532E+00 +4.86 -1.4403304868E+00 +4.87 -1.4373729203E+00 +4.88 -1.4344274613E+00 +4.89 -1.4314940487E+00 +4.9 -1.4285726134E+00 +4.91 -1.4256630825E+00 +4.92 -1.4227653832E+00 +4.93 -1.4198794427E+00 +4.94 -1.4170051697E+00 +4.95 -1.4141425126E+00 +4.96 -1.4112914024E+00 +4.97 -1.4084517699E+00 +4.98 -1.4056235455E+00 +4.99 -1.4028066588E+00 +5.0 -1.4000010240E+00 +5.01 -1.3972065925E+00 +5.02 -1.3944232980E+00 +5.03 -1.3916510745E+00 +5.04 -1.3888898557E+00 +5.05 -1.3861395741E+00 +5.06 -1.3834001481E+00 +5.07 -1.3806715315E+00 +5.08 -1.3779536612E+00 +5.09 -1.3752464741E+00 +5.1 -1.3725499072E+00 +5.11 -1.3698638970E+00 +5.12 -1.3671883633E+00 +5.13 -1.3645232633E+00 +5.14 -1.3618685368E+00 +5.15 -1.3592241238E+00 +5.16 -1.3565899640E+00 +5.17 -1.3539659973E+00 +5.18 -1.3513521482E+00 +5.19 -1.3487483721E+00 +5.2 -1.3461546139E+00 +5.21 -1.3435708162E+00 +5.22 -1.3409969216E+00 +5.23 -1.3384328728E+00 +5.24 -1.3358786008E+00 +5.25 -1.3333340559E+00 +5.26 -1.3307991892E+00 +5.27 -1.3282739460E+00 +5.28 -1.3257582715E+00 +5.29 -1.3232521111E+00 +5.3 -1.3207554034E+00 +5.31 -1.3182680912E+00 +5.32 -1.3157901326E+00 +5.33 -1.3133214753E+00 +5.34 -1.3108620674E+00 +5.35 -1.3084118565E+00 +5.36 -1.3059707901E+00 +5.37 -1.3035388010E+00 +5.38 -1.3011158551E+00 +5.39 -1.2987019027E+00 +5.4 -1.2962968941E+00 +5.41 -1.2939007796E+00 +5.42 -1.2915135095E+00 +5.43 -1.2891350244E+00 +5.44 -1.2867652793E+00 +5.45 -1.2844042331E+00 +5.46 -1.2820518385E+00 +5.47 -1.2797080480E+00 +5.48 -1.2773728144E+00 +5.49 -1.2750460886E+00 +5.5 -1.2727278110E+00 +5.51 -1.2704179504E+00 +5.52 -1.2681164617E+00 +5.53 -1.2658232996E+00 +5.54 -1.2635384189E+00 +5.55 -1.2612617746E+00 +5.56 -1.2589933135E+00 +5.57 -1.2567329919E+00 +5.58 -1.2544807741E+00 +5.59 -1.2522366172E+00 +5.6 -1.2500004780E+00 +5.61 -1.2477723135E+00 +5.62 -1.2455520805E+00 +5.63 -1.2433397235E+00 +5.64 -1.2411352113E+00 +5.65 -1.2389385051E+00 +5.66 -1.2367495638E+00 +5.67 -1.2345683465E+00 +5.68 -1.2323948119E+00 +5.69 -1.2302289177E+00 +5.7 -1.2280706113E+00 +5.71 -1.2259198665E+00 +5.72 -1.2237766443E+00 +5.73 -1.2216409055E+00 +5.74 -1.2195126109E+00 +5.75 -1.2173917213E+00 +5.76 -1.2152781938E+00 +5.77 -1.2131719831E+00 +5.78 -1.2110730624E+00 +5.79 -1.2089813942E+00 +5.8 -1.2068969412E+00 +5.81 -1.2048196662E+00 +5.82 -1.2027495316E+00 +5.83 -1.2006864953E+00 +5.84 -1.1986305166E+00 +5.85 -1.1965815689E+00 +5.86 -1.1945396164E+00 +5.87 -1.1925046237E+00 +5.88 -1.1904765552E+00 +5.89 -1.1884553752E+00 +5.9 -1.1864410430E+00 +5.91 -1.1844335204E+00 +5.92 -1.1824327817E+00 +5.93 -1.1804387931E+00 +5.94 -1.1784515207E+00 +5.95 -1.1764709306E+00 +5.96 -1.1744969887E+00 +5.97 -1.1725296572E+00 +5.98 -1.1705688974E+00 +5.99 -1.1686146860E+00 +6.0 -1.1666669908E+00 +6.01 -1.1647257794E+00 diff --git a/src/Ions.cc b/src/Ions.cc index 11c42f64..ea69a870 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -34,13 +34,13 @@ Timer ions_setup_tm("ions::setup"); const double ang2bohr = 1.8897269; // const double rmax = 8.0; -std::map Ions::map_species_ - = { { "H", 1 }, { "D", 1 }, { "Li", 3 }, { "Be", 4 }, { "B", 5 }, - { "C", 6 }, { "N", 7 }, { "O", 8 }, { "F", 9 }, { "Na", 11 }, - { "Mg", 12 }, { "Al", 13 }, { "Si", 14 }, { "P", 15 }, { "S", 16 }, - { "Cl", 17 }, { "K", 19 }, { "Ca", 20 }, { "Cr", 24 }, { "Mn", 25 }, - { "Fe", 26 }, { "Co", 27 }, { "Ni", 28 }, { "Cu", 29 }, { "Zn", 30 }, - { "Ga", 31 }, { "Ge", 32 }, { "La", 57 }, { "Au", 79 } }; +std::map Ions::map_species_ = { { "H", 1 }, { "D", 1 }, + { "Li", 3 }, { "Be", 4 }, { "B", 5 }, { "C", 6 }, { "N", 7 }, { "O", 8 }, + { "F", 9 }, { "Na", 11 }, { "Mg", 12 }, { "Al", 13 }, { "Si", 14 }, + { "P", 15 }, { "S", 16 }, { "Cl", 17 }, { "K", 19 }, { "Ca", 20 }, + { "Cr", 24 }, { "Mn", 25 }, { "Fe", 26 }, { "Co", 27 }, { "Ni", 28 }, + { "Cu", 29 }, { "Zn", 30 }, { "Ga", 31 }, { "Ge", 32 }, { "Br", 35 }, + { "La", 57 }, { "Au", 79 } }; int Ions::num_ions_ = -1; short Ions::max_num_proj_ = -1; From f298d43b5adbf4fcb88b5280af5e289180815034 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 11 Feb 2025 15:23:42 -0500 Subject: [PATCH 17/99] Clean up some HDFrestart functions (#303) --- src/HDFrestart.cc | 584 ++----------------------------------- src/HDFrestart.h | 13 +- src/Ions.cc | 27 +- src/LocalizationRegions.cc | 3 +- 4 files changed, 51 insertions(+), 576 deletions(-) diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index ac856ff2..0322bfea 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -1874,141 +1874,32 @@ void HDFrestart::printTimers(std::ostream& os) create_file_tm_.print(os); close_file_tm_.print(os); } -/* -int HDFrestart::writeRandomState(unsigned short int rand_state[3]) -{ - if( active_ ){ - // Create the data space for new datasets - hsize_t dims[1]={3}; - hid_t dataspace_id = H5Screate_simple(1, dims, NULL); - if( dataspace_id<0 ){ - (*MPIdata::serr)<<"HDFrestart::writeRandomState(): H5Screate_simple -failed!!!"<& data) +int HDFrestart::readAtomicData(std::string datasetname, std::vector& data) { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 0) - { - (*MPIdata::sout) << "HDFrestart::readAtomicNumbers()..." << std::endl; - } + (*MPIdata::sout) << "HDFrestart::readAtomicData()..." << std::endl; if (active_) { assert(file_id_ >= 0); - htri_t exists = H5Lexists(file_id_, "/Atomic_numbers", H5P_DEFAULT); + htri_t exists = H5Lexists(file_id_, datasetname.c_str(), H5P_DEFAULT); if (!exists) return 0; // Open the dataset - hid_t dataset_id = H5Dopen2(file_id_, "/Atomic_numbers", H5P_DEFAULT); + hid_t dataset_id = H5Dopen2(file_id_, datasetname.c_str(), H5P_DEFAULT); if (dataset_id < 0) { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart::readAtomicNumbers() --- H5Dopen2 failed!!!" - << std::endl; + MGMOL_HDFRESTART_FAIL("H5Dopen2 failed for " + datasetname); return -1; } int dim = (int)(H5Dget_storage_size(dataset_id) / sizeof(int)); if (dim == 0) { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart::readAtomicNumbers() --- No numbers!!!" - << std::endl; + MGMOL_HDFRESTART_FAIL("No " + datasetname); return -1; } data.resize(dim); @@ -2017,17 +1908,13 @@ int HDFrestart::readAtomicNumbers(std::vector& data) H5P_DEFAULT, &data[0]); if (status < 0) { - (*MPIdata::sout) - << "HDFrestart::readAtomicNumbers() --- H5Dread failed!!!" - << std::endl; + MGMOL_HDFRESTART_FAIL("H5Dread failed for " + datasetname); return -1; } status = H5Dclose(dataset_id); if (status < 0) { - (*MPIdata::sout) - << "HDFrestart::readAtomicNumbers() --- H5Dclose failed!!!" - << std::endl; + MGMOL_HDFRESTART_FAIL("H5Dclose failed for " + datasetname); return -1; } } // if active_ @@ -2037,126 +1924,8 @@ int HDFrestart::readAtomicNumbers(std::vector& data) return 0; } -// return -2 means failure -// return -1 means dataset does not exists, and could be from older MGmol -// version -int HDFrestart::readAtomicIDs(std::vector& data) -{ - Control& ct = *(Control::instance()); - if (onpe0 && ct.verbose > 0) - (*MPIdata::sout) << "HDFrestart::readAtomicIDs()..." << std::endl; - - if (active_) - { - assert(file_id_ >= 0); - htri_t exists = H5Lexists(file_id_, "/Atomic_IDs", H5P_DEFAULT); - if (!exists) return -1; - - // Open the dataset - hid_t dataset_id = H5Dopen2(file_id_, "/Atomic_IDs", H5P_DEFAULT); - if (dataset_id < 0) - { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart::readAtomicIDs() --- H5Dopen2 failed!!!" - << std::endl; - return -2; - } - - int dim = (int)(H5Dget_storage_size(dataset_id) / sizeof(int)); - if (dim == 0) - { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart::readAtomicIDs() --- No IDs!!!" << std::endl; - return -2; - } - data.resize(dim); - - herr_t status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &data[0]); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart::readAtomicIDs() --- H5Dread failed!!!" - << std::endl; - return -2; - } - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart::readAtomicIDs() --- H5Dclose failed!!!" - << std::endl; - return -2; - } - } - if (gather_data_x_) gatherDataXdir(data); - - return 0; -} - -// return -2 means failure -// return -1 means dataset does not exists, and could be from older MGmol -// version -int HDFrestart::readAtomicNLprojIDs(std::vector& data) -{ - Control& ct = *(Control::instance()); - if (onpe0 && ct.verbose > 0) - (*MPIdata::sout) << "HDFrestart::readAtomicNLprojIDs()..." << std::endl; - - if (active_) - { - assert(file_id_ >= 0); - - htri_t exists = H5Lexists(file_id_, "/AtomicNLproj_IDs", H5P_DEFAULT); - if (!exists) return -1; - - hid_t dataset_id = H5Dopen2(file_id_, "/AtomicNLproj_IDs", H5P_DEFAULT); - if (dataset_id < 0) - { - if (onpe0) - (*MPIdata::sout) << "HDFrestart::readAtomicNLprojIDs() --- " - "H5Dopen2 failed!!!" - << std::endl; - return -2; - } - - int dim = (int)(H5Dget_storage_size(dataset_id) / sizeof(int)); - if (dim == 0) - { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart::readAtomicNLprojIDs() --- No IDs!!!" - << std::endl; - return -2; - } - data.resize(dim); - - herr_t status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &data[0]); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart::readAtomicNLprojIDs() --- H5Dread failed!!!" - << std::endl; - return -2; - } - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart::readAtomicNLprojIDs() --- H5Dclose failed!!!" - << std::endl; - return -2; - } - } - - if (gather_data_x_) gatherDataXdir(data); - return 0; -} - -int HDFrestart::readAtomicPositions(std::vector& data) +int HDFrestart::readAtomicData( + std::string datasetname, std::vector& data) { if (onpe0) (*MPIdata::sout) << "Read ionic positions from hdf5 file" << std::endl; @@ -2165,26 +1934,21 @@ int HDFrestart::readAtomicPositions(std::vector& data) { assert(file_id_ >= 0); - htri_t exists = H5Lexists(file_id_, "/Ionic_positions", H5P_DEFAULT); + htri_t exists = H5Lexists(file_id_, datasetname.c_str(), H5P_DEFAULT); if (!exists) return -1; // Open the dataset - hid_t dataset_id = H5Dopen2(file_id_, "/Ionic_positions", H5P_DEFAULT); + hid_t dataset_id = H5Dopen2(file_id_, datasetname.c_str(), H5P_DEFAULT); if (dataset_id < 0) { - (*MPIdata::sout) - << "HDFrestart:readAtomicPositions() --- H5Dopen2 failed!!!" - << std::endl; + MGMOL_HDFRESTART_FAIL("H5Dopen2 failed for " + datasetname); return -2; } int dim = (int)H5Dget_storage_size(dataset_id) / sizeof(double); if (dim == 0) { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart:readAtomicPositions() --- No positions!!!" - << std::endl; + MGMOL_HDFRESTART_FAIL("readAtomicData() --- No " + datasetname); return -2; } data.resize(dim); @@ -2193,15 +1957,13 @@ int HDFrestart::readAtomicPositions(std::vector& data) H5P_DEFAULT, &data[0]); if (status < 0) { - (*MPIdata::sout) - << "HDFrestart:readAtomicPositions() --- H5Dread failed!!!" - << std::endl; + MGMOL_HDFRESTART_FAIL("H5Dread failed for " + datasetname); return -2; } status = H5Dclose(dataset_id); if (status < 0) { - (*MPIdata::sout) << "H5Dclose failed!!!" << std::endl; + MGMOL_HDFRESTART_FAIL("H5Dclose failed for " + datasetname); return -2; } } @@ -2218,59 +1980,12 @@ int HDFrestart::readOldCenterOnMesh(std::vector& data, int i) "points from hdf5 file" << std::endl; - if (active_) - { - assert(file_id_ >= 0); + std::stringstream datasetstream; + datasetstream << "OldCenterOnMesh_" << i; - std::stringstream datasetstream; - datasetstream << "OldCenterOnMesh_" << i; + std::string datasetname = datasetstream.str(); - std::string datasetname = datasetstream.str(); - - htri_t exists = H5Lexists(file_id_, datasetname.c_str(), H5P_DEFAULT); - if (!exists) return -1; - - // Open the dataset - hid_t dataset_id = H5Dopen2(file_id_, datasetname.c_str(), H5P_DEFAULT); - if (dataset_id < 0) - { - (*MPIdata::sout) - << "HDFrestart:readOldCenterOnMesh() --- H5Dopen2 failed!!!" - << std::endl; - return -2; - } - - int dim = (int)H5Dget_storage_size(dataset_id) / sizeof(double); - if (dim == 0) - { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart:readOldCenterOnMesh() --- No old centers!!!" - << std::endl; - return -2; - } - data.resize(dim); - - herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &data[0]); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart:readOldCenterOnMesh() --- H5Dread failed!!!" - << std::endl; - return -2; - } - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::sout) << "H5Dclose failed!!!" << std::endl; - return -2; - } - } - - if (gather_data_x_) gatherDataXdir(data); - - return 0; + return readAtomicData(datasetname, data); } int HDFrestart::readOldCenter(std::vector& data, int i) @@ -2279,256 +1994,16 @@ int HDFrestart::readOldCenter(std::vector& data, int i) (*MPIdata::sout) << "Read old localization centers from hdf5 file" << std::endl; - if (active_) - { - assert(file_id_ >= 0); - - std::stringstream datasetstream; - datasetstream << "OldCenter_" << i; - - std::string datasetname = datasetstream.str(); - - htri_t exists = H5Lexists(file_id_, datasetname.c_str(), H5P_DEFAULT); - if (!exists) return -1; - - // Open the dataset - hid_t dataset_id = H5Dopen2(file_id_, datasetname.c_str(), H5P_DEFAULT); - if (dataset_id < 0) - { - (*MPIdata::sout) - << "HDFrestart:readOldCenter() --- H5Dopen2 failed!!!" - << std::endl; - return -2; - } - - int dim = (int)H5Dget_storage_size(dataset_id) / sizeof(double); - if (dim == 0) - { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart:readOldCenter() --- No old centers!!!" - << std::endl; - return -2; - } - data.resize(dim); - - herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &data[0]); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart:readOldCenter() --- H5Dread failed!!!" - << std::endl; - return -2; - } - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::sout) << "H5Dclose failed!!!" << std::endl; - return -2; - } - } - - if (gather_data_x_) gatherDataXdir(data); - - return 0; -} - -int HDFrestart::readGidsList(std::vector& data) -{ - if (onpe0) - (*MPIdata::sout) << "Read list of gids from hdf5 file" << std::endl; - - if (active_) - { - assert(file_id_ >= 0); - - std::string datasetname = "GidsList"; - - htri_t exists = H5Lexists(file_id_, datasetname.c_str(), H5P_DEFAULT); - if (!exists) return -1; - - // Open the dataset - hid_t dataset_id = H5Dopen2(file_id_, datasetname.c_str(), H5P_DEFAULT); - if (dataset_id < 0) - { - (*MPIdata::sout) - << "HDFrestart:readGidsList() --- H5Dopen2 failed!!!" - << std::endl; - return -2; - } - - int dim = (int)H5Dget_storage_size(dataset_id) / sizeof(int); - if (dim == 0) - { - if (onpe0) - (*MPIdata::sout) - << "HDFrestart:readGidsList() --- No GidsList!!!" - << std::endl; - return -2; - } - data.resize(dim); - - herr_t status = H5Dread(dataset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, - H5P_DEFAULT, &data[0]); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart:readGidsList() --- H5Dread failed!!!" - << std::endl; - return -2; - } - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::sout) << "H5Dclose failed!!!" << std::endl; - return -2; - } - } - - if (gather_data_x_) gatherDataXdir(data); - - return 0; -} - -int HDFrestart::readAtomicVelocities(std::vector& data) -{ - if (onpe0) - (*MPIdata::sout) << "Read atomic velocities from hdf5 file" - << std::endl; - - if (active_) - { - assert(file_id_ >= 0); + std::stringstream datasetstream; + datasetstream << "OldCenter_" << i; - htri_t exists = H5Lexists(file_id_, "/Ionic_velocities", H5P_DEFAULT); - if (exists) - { - - // Open an existing dataset - hid_t dataset_id - = H5Dopen2(file_id_, "/Ionic_velocities", H5P_DEFAULT); - if (dataset_id < 0) - { - std::cerr << "HDFrestart::readAtomicVelocities(), " - "H5Dopen failed->no velocities read" - << std::endl; - data.clear(); - return -1; - } - int dim = (int)H5Dget_storage_size(dataset_id) / sizeof(double); - data.resize(dim); - - herr_t status = H5Dread(dataset_id, H5T_NATIVE_DOUBLE, H5S_ALL, - H5S_ALL, H5P_DEFAULT, &data[0]); - if (status < 0) - { - MGMOL_HDFRESTART_FAIL("H5Dread failed!!!"); - return -2; - } - - status = H5Dclose(dataset_id); - if (status < 0) - { - MGMOL_HDFRESTART_FAIL("H5Dclose failed!!!"); - return -2; - } - } - } + std::string datasetname = datasetstream.str(); - if (gather_data_x_) gatherDataXdir(data); - - return 0; + return readAtomicData(datasetname, data); } -int HDFrestart::readLockedAtomNames(std::vector& data) -{ - if (onpe0) - (*MPIdata::sout) << "HDFrestart::readLockedAtomNames()..." << std::endl; - - std::vector buffer; - short name_length = 7; // default, value used before February 2016 - - if (active_) - { - assert(file_id_ >= 0); - - htri_t exists = H5Lexists(file_id_, "/LockedAtomsNames", H5P_DEFAULT); - if (!exists) return 0; - - hid_t dataset_id = H5Dopen2(file_id_, "/LockedAtomsNames", H5P_DEFAULT); - if (dataset_id < 0) - { - if (onpe0) - (*MPIdata::sout) << "HDFrestart::readLockedAtomNames(), " - "H5Dopen failed->no locked atoms read" - << std::endl; - return -1; - } - - std::string attname("String_Length"); - htri_t existsA = H5Aexists(dataset_id, attname.c_str()); - if (existsA) - { - hid_t attribute_id = H5Aopen_name(dataset_id, attname.c_str()); - herr_t status = H5Aread(attribute_id, H5T_NATIVE_INT, &name_length); - // check validity of data just read - if (status < 0) - { - MGMOL_HDFRESTART_FAIL("H5Aread failed!!!"); - return -1; - } - } - - int dim = (int)H5Dget_storage_size(dataset_id) / name_length; - - if (onpe0) - (*MPIdata::sout) - << "HDFrestart::readLockedAtomNames(), dataset size=" << dim - << std::endl; - - if (dim == 0) return 0; - - buffer.resize(dim * name_length); - - // create type for std::strings of length IonData_MaxStrLength - hid_t strtype = H5Tcopy(H5T_C_S1); - H5Tset_size(strtype, name_length); - herr_t status = H5Dread( - dataset_id, strtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &buffer[0]); - if (status < 0) - { - (*MPIdata::sout) - << "HDFrestart::readLockedAtomNames(), H5Dread failed!!!" - << std::endl; - return -1; - } - status = H5Dclose(dataset_id); - if (status < 0) - { - (*MPIdata::sout) << "H5Dclose failed!!!" << std::endl; - return -1; - } - } - - if (gather_data_x_) gatherDataXdir(buffer); - - data.clear(); - for (unsigned short i = 0; i < buffer.size(); i += name_length) - { - std::string t(&buffer[i], name_length); - assert(t.size() > 0); - - stripLeadingAndTrailingBlanks(t); - - assert(t.size() > 0); - data.push_back(t); - } - - return 0; -} - -int HDFrestart::readAtomicNames(std::vector& data) +int HDFrestart::readAtomicData( + std::string datasetname, std::vector& data) { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 0) @@ -2541,11 +2016,12 @@ int HDFrestart::readAtomicNames(std::vector& data) { assert(file_id_ >= 0); - htri_t exists = H5Lexists(file_id_, "/Atomic_names", H5P_DEFAULT); + htri_t exists = H5Lexists(file_id_, datasetname.c_str(), H5P_DEFAULT); if (exists) { // Open the dataset - hid_t dataset_id = H5Dopen2(file_id_, "/Atomic_names", H5P_DEFAULT); + hid_t dataset_id + = H5Dopen2(file_id_, datasetname.c_str(), H5P_DEFAULT); if (dataset_id < 0) { MGMOL_HDFRESTART_FAIL("H5Dopen2 failed!!!"); @@ -2577,7 +2053,7 @@ int HDFrestart::readAtomicNames(std::vector& data) int dim = (int)H5Dget_storage_size(dataset_id) / name_length; if (dim == 0) { - if (onpe0) MGMOL_HDFRESTART_FAIL("No names!!!"); + MGMOL_HDFRESTART_FAIL("No names!!!"); return -1; } diff --git a/src/HDFrestart.h b/src/HDFrestart.h index 72c5c1f5..1ed2d080 100644 --- a/src/HDFrestart.h +++ b/src/HDFrestart.h @@ -260,19 +260,12 @@ class HDFrestart template int readData(T* vv, hid_t memspace, hid_t dset_id, const short precision); - // int writeRandomState(unsigned short int rand_state[3]); - // int readRandomState(unsigned short* rand_state); - int readAtomicIDs(std::vector& data); - int readAtomicNLprojIDs(std::vector& data); - int readAtomicNumbers(std::vector& data); - int readAtomicNames(std::vector& data); - int readAtomicPositions(std::vector& data); - int readAtomicVelocities(std::vector& data); - int readLockedAtomNames(std::vector& data); + int readAtomicData(std::string datasetname, std::vector& data); + int readAtomicData(std::string datasetname, std::vector& data); + int readAtomicData(std::string datasetname, std::vector& data); int readRestartRandomStates(std::vector& data); int readOldCenter(std::vector& data, int i); int readOldCenterOnMesh(std::vector& data, int i); - int readGidsList(std::vector& data); void addDateToFilename(); void addMDTime2File(const float run_time); diff --git a/src/Ions.cc b/src/Ions.cc index ea69a870..913a0f55 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -700,13 +700,12 @@ void Ions::readLockedAtomNames(HDFrestart& h5f_file) if (dim == 0) return; std::vector data; - h5f_file.readLockedAtomNames(data); + std::string datasetname("/LockedAtomsNames"); + h5f_file.readAtomicData(datasetname, data); - for (std::vector::const_iterator i = data.begin(), - end = data.end(); - i != end; ++i) + for (auto& i : data) { - lockAtom(*i); + lockAtom(i); } } @@ -874,13 +873,17 @@ void Ions::initFromRestartFile(HDFrestart& h5_file) setupListIonsBoundaries(rmax); std::vector at_numbers; - h5_file.readAtomicNumbers(at_numbers); + std::string datasetname("/Atomic_numbers"); + h5_file.readAtomicData(datasetname, at_numbers); std::vector at_indexes; - int nidxs = h5_file.readAtomicIDs(at_indexes); + std::string datasetname_indexes("/Atomic_IDs"); + int nidxs = h5_file.readAtomicData(datasetname_indexes, at_indexes); std::vector at_nlprojIds; - int npids = h5_file.readAtomicNLprojIDs(at_nlprojIds); + std::string datasetname_nlprojIds("/AtomicNLproj_IDs"); + int npids = h5_file.readAtomicData(datasetname_nlprojIds, at_nlprojIds); std::vector at_names; - h5_file.readAtomicNames(at_names); + std::string datasetname_names("/Atomic_names"); + h5_file.readAtomicData(datasetname_names, at_names); if (onpe0 && ct.verbose > 2) { std::cout << "HDF file: at nb=" << at_numbers.size() << std::endl; @@ -967,7 +970,8 @@ void Ions::readRestartPositions(HDFrestart& h5_file) (*MPIdata::sout) << "Read ionic positions from hdf5 file" << std::endl; std::vector data; - h5_file.readAtomicPositions(data); + std::string datasetname("/Ionic_positions"); + h5_file.readAtomicData(datasetname, data); int i = 0; for (auto& ion : local_ions_) @@ -1142,7 +1146,8 @@ void Ions::readRestartVelocities(HDFrestart& h5_file) << std::endl; std::vector data; - h5_file.readAtomicVelocities(data); + std::string datasetname("/Ionic_velocities"); + h5_file.readAtomicData(datasetname, data); int i = 0; for (auto& ion : local_ions_) diff --git a/src/LocalizationRegions.cc b/src/LocalizationRegions.cc index 99cda54e..60f28301 100644 --- a/src/LocalizationRegions.cc +++ b/src/LocalizationRegions.cc @@ -1726,7 +1726,8 @@ void LocalizationRegions::writeOldCenter(HDFrestart& h5f_file, int i) void LocalizationRegions::setupOldCenters(HDFrestart& h5_file) { vector gids; - h5_file.readGidsList(gids); + std::string datasetname("GidsList"); + h5_file.readAtomicData(datasetname, gids); map gids_map; for (unsigned int i = 0; i < gids.size(); i++) From c690bc5bb8998a1202ebc22a16ebb3ffb80346a6 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 17 Feb 2025 07:19:17 -0500 Subject: [PATCH 18/99] Fix and test restart single hdf5 file (#305) --- src/Control.h | 2 + src/HDFrestart.cc | 28 ++++++++--- src/Ions.cc | 20 ++++++-- src/restart.cc | 2 +- tests/CMakeLists.txt | 8 +++ tests/DMandEnergyAndForces/test.py | 3 ++ tests/HDF5single/h2o.xyz | 6 +++ tests/HDF5single/md.cfg | 34 +++++++++++++ tests/HDF5single/mgmol.cfg | 31 ++++++++++++ tests/HDF5single/test.py | 75 ++++++++++++++++++++++++++++ tests/RestartEnergyAndForces/test.py | 5 +- tests/WFEnergyAndForces/test.py | 3 ++ 12 files changed, 203 insertions(+), 14 deletions(-) create mode 100644 tests/HDF5single/h2o.xyz create mode 100644 tests/HDF5single/md.cfg create mode 100644 tests/HDF5single/mgmol.cfg create mode 100755 tests/HDF5single/test.py diff --git a/src/Control.h b/src/Control.h index b3401c1d..bd4a7d96 100644 --- a/src/Control.h +++ b/src/Control.h @@ -371,6 +371,8 @@ class Control // 10 or larger means CG, otherwise MG V-cycles bool MGPoissonSolver() { return (diel_flag_ / 10 == 0); } + bool LangevinThermostat() { return (thermostat_type == 1); } + // // data // diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index 0322bfea..0fac518e 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -457,7 +457,8 @@ HDFrestart::HDFrestart(const std::string& filename, const pb::PEenv& pes, verbosity_ = 0; closed_ = false; - //(*MPIdata::sout)<<"HDFrestart::HDFrestart(), filename="<& data) // send data to inactive PEs if (gather_data_x_) gatherDataXdir(data); + if (useHdf5p()) + { + data.erase(std::remove(data.begin(), data.end(), -1), data.end()); + } + return 0; } @@ -1928,7 +1934,7 @@ int HDFrestart::readAtomicData( std::string datasetname, std::vector& data) { if (onpe0) - (*MPIdata::sout) << "Read ionic positions from hdf5 file" << std::endl; + (*MPIdata::sout) << "Read atomic data from hdf5 file" << std::endl; if (active_) { @@ -1968,6 +1974,10 @@ int HDFrestart::readAtomicData( } } + if (useHdf5p()) + { + data.erase(std::remove(data.begin(), data.end(), 1e+32), data.end()); + } if (gather_data_x_) gatherDataXdir(data); return 0; @@ -2094,6 +2104,11 @@ int HDFrestart::readAtomicData( data.push_back(t); } + if (useHdf5p()) + { + data.erase(std::remove(data.begin(), data.end(), ""), data.end()); + } + return 0; } @@ -2128,7 +2143,6 @@ int HDFrestart::readRestartRandomStates(std::vector& data) dim = (int)H5Dget_storage_size(dataset_id) / sizeof(unsigned short); } - if (dim > 0) { data.resize(dim); @@ -2148,12 +2162,10 @@ int HDFrestart::readRestartRandomStates(std::vector& data) } if (!data.empty()) - if (data[0] != data[0]) + if (std::isnan(data[0])) { MGMOL_HDFRESTART_FAIL( - "ERROR: HDFrestart::readRestartRandomStates() " - "--- data[0]=" - << data[0]); + "readRestartRandomStates() is NaN"); return -2; } } diff --git a/src/Ions.cc b/src/Ions.cc index 913a0f55..e93d82ea 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -911,8 +911,10 @@ void Ions::initFromRestartFile(HDFrestart& h5_file) assert(at_numbers.size() == at_nlprojIds.size()); num_ions_ = at_names.size(); - mmpi.allreduce(&num_ions_, 1, MPI_SUM); - + if (!h5_file.useHdf5p()) + { + mmpi.allreduce(&num_ions_, 1, MPI_SUM); + } if (onpe0 && ct.verbose > 0) { (*MPIdata::sout) << "Ions::setFromRestartFile(), read " << num_ions_ @@ -947,9 +949,21 @@ void Ions::initFromRestartFile(HDFrestart& h5_file) } readRestartPositions(h5_file); readRestartVelocities(h5_file); - readRestartRandomStates(h5_file); + if (ct.LangevinThermostat()) readRestartRandomStates(h5_file); readLockedAtomNames(h5_file); + // remove atoms from local list if not local + for (std::vector::iterator it = local_ions_.begin(); + it != local_ions_.end();) + { + double p[3]; + (*it)->getPosition(p); + if (!inLocalIons(p[0], p[1], p[2])) + it = local_ions_.erase(it); + else + ++it; + } + // rescale all velocities by factor specified in input rescaleVelocities(ct.VelocityScalingFactor()); diff --git a/src/restart.cc b/src/restart.cc index 2fa76ecb..247cca06 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -117,7 +117,7 @@ int MGmol::write_hdf5(HDFrestart& h5f_file, ions.writeAtomicIDs(h5f_file); ions.writeAtomicNLprojIDs(h5f_file); ions.writePositions(h5f_file); - ions.writeRandomStates(h5f_file); + if (ct.LangevinThermostat()) ions.writeRandomStates(h5f_file); ions.writeVelocities(h5f_file); ions.writeForces(h5f_file); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b29c1aad..baf1c451 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -504,6 +504,14 @@ add_test(NAME testMD_D72 ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/lrs.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testHDF5single + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/mgmol.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/md.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/h2o.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testMD_MVP COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MD_MVP/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} diff --git a/tests/DMandEnergyAndForces/test.py b/tests/DMandEnergyAndForces/test.py index c36f0767..b21e6e72 100755 --- a/tests/DMandEnergyAndForces/test.py +++ b/tests/DMandEnergyAndForces/test.py @@ -3,6 +3,7 @@ import os import subprocess import string +import shutil print("Test DMandEnergyAndForces...") @@ -33,6 +34,8 @@ output = subprocess.check_output(command,shell=True) lines=output.split(b'\n') +shutil.rmtree('WF') + #analyse output energies=[] for line in lines: diff --git a/tests/HDF5single/h2o.xyz b/tests/HDF5single/h2o.xyz new file mode 100644 index 00000000..d5171c8b --- /dev/null +++ b/tests/HDF5single/h2o.xyz @@ -0,0 +1,6 @@ +3 + +O 0.00 0.00 0.00 +H -0.76 0.59 0.00 +H 0.76 0.59 0.00 + diff --git a/tests/HDF5single/md.cfg b/tests/HDF5single/md.cfg new file mode 100644 index 00000000..1ff2adab --- /dev/null +++ b/tests/HDF5single/md.cfg @@ -0,0 +1,34 @@ +verbosity=3 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=48 +ny=48 +nz=48 +[Domain] +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=MD +[MD] +num_steps=5 +dt=40. +[Quench] +max_steps=24 +atol=1.e-8 +[Restart] +input_level=4 +input_filename=WF +input_type=single_file +output_level=4 +output_filename=WF_MD +output_type=single_file +[Coloring] +scope=global diff --git a/tests/HDF5single/mgmol.cfg b/tests/HDF5single/mgmol.cfg new file mode 100644 index 00000000..4dba942a --- /dev/null +++ b/tests/HDF5single/mgmol.cfg @@ -0,0 +1,31 @@ +verbosity=2 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=48 +ny=48 +nz=48 +[Domain] +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=QUENCH +[Quench] +max_steps=120 +atol=1.e-8 +[Orbitals] +initial_type=Random +initial_width=1.5 +[Restart] +output_level=4 +output_filename=WF +output_type=single_file +[Coloring] +scope=global diff --git a/tests/HDF5single/test.py b/tests/HDF5single/test.py new file mode 100755 index 00000000..080ee0ba --- /dev/null +++ b/tests/HDF5single/test.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +import sys +import os +import subprocess +import string + +print("Test test_rho_restart...") + +nargs=len(sys.argv) + +mpicmd = sys.argv[1]+" "+sys.argv[2]+" "+sys.argv[3] +for i in range(4,nargs-7): + mpicmd = mpicmd + " "+sys.argv[i] +print("MPI run command: {}".format(mpicmd)) + +mgmol_exe = sys.argv[nargs-5] +input1 = sys.argv[nargs-4] +input2 = sys.argv[nargs-3] +coords = sys.argv[nargs-2] +print("coordinates file: %s"%coords) + +#create links to potentials files +dst1 = 'pseudo.H_ONCV_PBE_SG15' +src1 = sys.argv[-1] + '/' + dst1 + +dst2 = 'pseudo.O_ONCV_PBE_SG15' +src2 = sys.argv[-1] + '/' + dst2 + +if not os.path.exists(dst1): + print("Create link to %s"%dst1) + os.symlink(src1, dst1) + +if not os.path.exists(dst2): + print("Create link to %s"%dst2) + os.symlink(src2, dst2) + +#run mgmol to generate initial ground state +command = "{} {} -c {} -i {}".format(mpicmd,mgmol_exe,input1,coords) +print("Run command: {}".format(command)) + +output = subprocess.check_output(command,shell=True) +lines=output.split(b'\n') + +#run MD +command = "{} {} -c {} -i {}".format(mpicmd,mgmol_exe,input2,coords) +print("Run command: {}".format(command)) +output = subprocess.check_output(command,shell=True) +lines=output.split(b'\n') + +os.remove('WF') + +print("Check energy conservation...") +tol = 1.e-4 +energy = 0. +count = 0 +for line in lines: + if line.count(b'Total') and line.count(b'Energy'): + print(line) + count=count+1 + words=line.split() + + energy=eval(words[2]) + if count==1: + first_energy=energy + + if count>1 and abs(energy-first_energy)>tol: + print("ERROR Energy = {} != {}".format(energy,first_energy)) + sys.exit(1) + +if count<4: + print("ERROR needs 4 energy values for checking conservation!") + sys.exit(1) + +print("Test SUCCESSFUL!") +sys.exit(0) diff --git a/tests/RestartEnergyAndForces/test.py b/tests/RestartEnergyAndForces/test.py index 349434e8..b62d39f8 100755 --- a/tests/RestartEnergyAndForces/test.py +++ b/tests/RestartEnergyAndForces/test.py @@ -3,6 +3,7 @@ import os import subprocess import string +import shutil print("Test RestartEnergyAndForces...") @@ -54,14 +55,14 @@ ref_energy=energy break -#sys.exit(0) - #run test command = "{} {} -c {} -i {}".format(mpicmd,test_exe,input2,coords) print("Run command: {}".format(command)) output = subprocess.check_output(command,shell=True) lines=output.split(b'\n') +shutil.rmtree('WF') + test_energy=1.e18 for line in lines: if line.count(b'%%'): diff --git a/tests/WFEnergyAndForces/test.py b/tests/WFEnergyAndForces/test.py index 45420ddf..6cac8c93 100755 --- a/tests/WFEnergyAndForces/test.py +++ b/tests/WFEnergyAndForces/test.py @@ -3,6 +3,7 @@ import os import subprocess import string +import shutil print("Test WFEnergyAndForces...") @@ -37,6 +38,8 @@ output = subprocess.check_output(command,shell=True) lines=output.split(b'\n') +shutil.rmtree('WF') + #analyse output energies=[] for line in lines: From 9f1fc20a63eb19e229a9f502b30d3271b264a3fc Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 17 Feb 2025 07:19:37 -0500 Subject: [PATCH 19/99] Save Hartree potential for write in restart file (#306) --- src/Potentials.cc | 16 ++++++++++++---- src/Potentials.h | 23 +++++++++++++++++------ src/md.cc | 3 +++ src/restart.cc | 8 ++++++++ 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/src/Potentials.cc b/src/Potentials.cc index bed005db..abe277f1 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -75,10 +75,13 @@ Potentials::Potentials() dv_.resize(size_); - memset(&vepsilon_[0], 0, size_ * sizeof(POTDTYPE)); - memset(&vh_rho_[0], 0, size_ * sizeof(POTDTYPE)); - memset(&vxc_rho_[0], 0, size_ * sizeof(POTDTYPE)); - memset(&v_ext_[0], 0, size_ * sizeof(POTDTYPE)); + vh_rho_backup_.resize(size_); + + memset(vepsilon_.data(), 0, size_ * sizeof(POTDTYPE)); + memset(vh_rho_.data(), 0, size_ * sizeof(POTDTYPE)); + memset(vxc_rho_.data(), 0, size_ * sizeof(POTDTYPE)); + memset(v_ext_.data(), 0, size_ * sizeof(POTDTYPE)); + memset(vh_rho_backup_.data(), 0, size_ * sizeof(POTDTYPE)); #ifdef HAVE_TRICUBIC vext_tricubic_ = NULL; @@ -596,6 +599,11 @@ void Potentials::axpVcomp(POTDTYPE* v, const double alpha) LinearAlgebraUtils::MPaxpy(size_, alpha, &v_comp_[0], v); } +void Potentials::backupVh() +{ + memcpy(vh_rho_backup_.data(), vh_rho_.data(), size_ * sizeof(POTDTYPE)); +} + void Potentials::initializeSupersampledRadialDataOnMesh( const Vector3D& position, const Species& sp) { diff --git a/src/Potentials.h b/src/Potentials.h index 970cf95d..71819a75 100644 --- a/src/Potentials.h +++ b/src/Potentials.h @@ -76,6 +76,11 @@ class Potentials std::vector dv_; + /*! + * Backpup copy of Hartree potential to save previous state + */ + std::vector vh_rho_backup_; + int itindex_vxc_; int itindex_vh_; @@ -138,14 +143,15 @@ class Potentials double scf_dvrho(void) const { return scf_dvrho_; } double scf_dv(void) const { return scf_dv_; } - POTDTYPE* vtot() { return &vtot_[0]; } - POTDTYPE* vh_rho() { return &vh_rho_[0]; } - RHODTYPE* rho_comp() { return &rho_comp_[0]; } + POTDTYPE* vtot() { return vtot_.data(); } + POTDTYPE* vh_rho() { return vh_rho_.data(); } + RHODTYPE* rho_comp() { return rho_comp_.data(); } const std::vector& vnuc() const { return v_nuc_; } - POTDTYPE* vnuc() { return &v_nuc_[0]; } - POTDTYPE* vext() { return &v_ext_[0]; } - POTDTYPE* vepsilon() { return &vepsilon_[0]; } + POTDTYPE* vnuc() { return v_nuc_.data(); } + POTDTYPE* vext() { return v_ext_.data(); } + POTDTYPE* vepsilon() { return vepsilon_.data(); } + POTDTYPE* vh_rho_backup() { return vh_rho_backup_.data(); } void axpVcompToVh(const double alpha); void axpVcomp(POTDTYPE* v, const double alpha); @@ -196,6 +202,11 @@ class Potentials void initBackground(Ions& ions); void addBackgroundToRhoComp(); + /*! + * Save current Hartree potential into backup array + */ + void backupVh(); + #ifdef HAVE_TRICUBIC void readExternalPot(const string filename, const char type); void setupVextTricubic(); diff --git a/src/md.cc b/src/md.cc index b50618c8..dfb4dfe6 100644 --- a/src/md.cc +++ b/src/md.cc @@ -54,6 +54,9 @@ void MGmol::moveVnuc(Ions& ions) Potentials& pot = hamiltonian_->potential(); + // save Hartree potential internally + pot.backupVh(); + // Update items that change when the ionic coordinates change pot.axpVcompToVh(1.); initNuc(ions); diff --git a/src/restart.cc b/src/restart.cc index 247cca06..30bdad73 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -141,6 +141,14 @@ int MGmol::write_hdf5(HDFrestart& h5f_file, pot.vh_rho(), "Hartree", &ll[0], &origin[0]); if (ierr < 0) return ierr; + if (ct.AtomsDynamic() == AtomsDynamicType::MD) + { + // Write hartree potential before extrapolation + ierr = h5f_file.write_1func_hdf5( + pot.vh_rho_backup(), "Preceding_Hartree", &ll[0], &origin[0]); + if (ierr < 0) return ierr; + } + // Write if (ct.diel) { From 8a1cf0530f2a2246dc62fca1db5b145919330101 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 19 Feb 2025 17:11:48 -0500 Subject: [PATCH 20/99] Speed-up recently added tests (#309) * simply use a smaller domain/mesh --- tests/DMandEnergyAndForces/mgmol.cfg | 18 +++++++++--------- tests/EnergyAndForces/mgmol.cfg | 18 +++++++++--------- tests/RestartEnergyAndForces/h2o.xyz | 8 ++++---- tests/RestartEnergyAndForces/mgmol.cfg | 18 +++++++++--------- tests/RestartEnergyAndForces/restart.cfg | 18 +++++++++--------- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/tests/DMandEnergyAndForces/mgmol.cfg b/tests/DMandEnergyAndForces/mgmol.cfg index 5278f40b..e7712706 100644 --- a/tests/DMandEnergyAndForces/mgmol.cfg +++ b/tests/DMandEnergyAndForces/mgmol.cfg @@ -2,16 +2,16 @@ verbosity=2 xcFunctional=LDA FDtype=4th [Mesh] -nx=64 -ny=64 -nz=64 +nx=48 +ny=48 +nz=48 [Domain] -ox=-6. -oy=-6. -oz=-6. -lx=12. -ly=12. -lz=12. +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. [Potentials] pseudopotential=pseudo.N_ONCVPSP_LDA [Run] diff --git a/tests/EnergyAndForces/mgmol.cfg b/tests/EnergyAndForces/mgmol.cfg index cf23889e..d1299db9 100644 --- a/tests/EnergyAndForces/mgmol.cfg +++ b/tests/EnergyAndForces/mgmol.cfg @@ -2,16 +2,16 @@ verbosity=1 xcFunctional=LDA FDtype=Mehrstellen [Mesh] -nx=64 -ny=64 -nz=64 +nx=48 +ny=48 +nz=48 [Domain] -ox=-6. -oy=-6. -oz=-6. -lx=12. -ly=12. -lz=12. +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. [Potentials] pseudopotential=pseudo.N_ONCVPSP_LDA [Run] diff --git a/tests/RestartEnergyAndForces/h2o.xyz b/tests/RestartEnergyAndForces/h2o.xyz index cdc906f6..d5171c8b 100644 --- a/tests/RestartEnergyAndForces/h2o.xyz +++ b/tests/RestartEnergyAndForces/h2o.xyz @@ -1,6 +1,6 @@ 3 -https://pubchem.ncbi.nlm.nih.gov/compound/Water -O 2.5369 -0.1550 0.0 -H 3.0739 0.1550 0.0 -H 2.0000 0.1550 0.0 + +O 0.00 0.00 0.00 +H -0.76 0.59 0.00 +H 0.76 0.59 0.00 diff --git a/tests/RestartEnergyAndForces/mgmol.cfg b/tests/RestartEnergyAndForces/mgmol.cfg index e590f810..b96f3d9c 100644 --- a/tests/RestartEnergyAndForces/mgmol.cfg +++ b/tests/RestartEnergyAndForces/mgmol.cfg @@ -2,16 +2,16 @@ verbosity=2 xcFunctional=PBE FDtype=4th [Mesh] -nx=64 -ny=64 -nz=64 +nx=48 +ny=48 +nz=48 [Domain] -ox=-3.4 -oy=-6.4 -oz=-6.4 -lx=12.8 -ly=12.8 -lz=12.8 +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 diff --git a/tests/RestartEnergyAndForces/restart.cfg b/tests/RestartEnergyAndForces/restart.cfg index 99bc77d8..280778e9 100644 --- a/tests/RestartEnergyAndForces/restart.cfg +++ b/tests/RestartEnergyAndForces/restart.cfg @@ -2,16 +2,16 @@ verbosity=2 xcFunctional=PBE FDtype=4th [Mesh] -nx=64 -ny=64 -nz=64 +nx=48 +ny=48 +nz=48 [Domain] -ox=-3.4 -oy=-6.4 -oz=-6.4 -lx=12.8 -ly=12.8 -lz=12.8 +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 From 8a2687965d538179129ea9aed22d93129a7ac1c6 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 20 Feb 2025 11:23:03 -0500 Subject: [PATCH 21/99] Encapsulate some functions in Potentials (#310) * some functions were called at random places and confusing --- src/Electrostatic.cc | 4 +- src/Electrostatic.h | 2 +- src/Forces.cc | 3 +- src/MGmol.cc | 18 +--- src/Poisson.h | 5 +- src/Potentials.cc | 203 ++++++++++++++++++++++++++++++------------- src/Potentials.h | 25 +++--- src/restart.cc | 49 +---------- 8 files changed, 165 insertions(+), 144 deletions(-) diff --git a/src/Electrostatic.cc b/src/Electrostatic.cc index 6ac4fef3..aba09d07 100644 --- a/src/Electrostatic.cc +++ b/src/Electrostatic.cc @@ -65,7 +65,7 @@ Electrostatic::~Electrostatic() if (grhoc_ != nullptr) delete grhoc_; } -void Electrostatic::setupInitialVh(const POTDTYPE* const vh_init) +void Electrostatic::setupInitialVh(const std::vector& vh_init) { poisson_solver_->set_vh(vh_init); @@ -153,7 +153,7 @@ void Electrostatic::setupPB( // initialize vh with last trial solution pb::GridFunc gf_vh(*pbGrid_, bc_[0], bc_[1], bc_[2]); - gf_vh.assign(pot.vh_rho()); + gf_vh.assign((pot.vh_rho()).data()); poisson_solver_->set_vh(gf_vh); } diff --git a/src/Electrostatic.h b/src/Electrostatic.h index 072ce0cc..9beee899 100644 --- a/src/Electrostatic.h +++ b/src/Electrostatic.h @@ -58,7 +58,7 @@ class Electrostatic template void computeVh(const pb::GridFunc& vhinit, const Ions& ions, Rho& rho, Potentials& pot); - void setupInitialVh(const POTDTYPE* const); + void setupInitialVh(const std::vector&); void setupInitialVh(const pb::GridFunc&); template void computeVhRho(Rho& rho); diff --git a/src/Forces.cc b/src/Forces.cc index 9ab19f6a..04e6ffd7 100644 --- a/src/Forces.cc +++ b/src/Forces.cc @@ -272,9 +272,10 @@ void Forces::get_loc_proj(RHODTYPE* rho, const int numpt = mymesh->numpt(); Potentials& pot = hamiltonian_->potential(); + const std::vector& vh_rho(pot.vh_rho()); for (int idx = 0; idx < numpt; idx++) { - const double vhrho = pot.vh_rho(idx); + const double vhrho = vh_rho[idx]; for (short dir = 0; dir < 3; dir++) { double* lproj = &(loc_proj[dir * NPTS]); diff --git a/src/MGmol.cc b/src/MGmol.cc index 681efacc..649f2c39 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -319,8 +319,6 @@ int MGmol::initial() mmpi.barrier(); if (ct.verbose > 0) current_orbitals_->printChromaticNumber(os_); - pot.initBackground(*ions_); - // Random initialization of the wavefunctions if (ct.restart_info <= 2) { @@ -834,23 +832,9 @@ void MGmol::initNuc(Ions& ions) Potentials& pot = hamiltonian_->potential(); + // initialize poentials based on ionic positions and their species pot.initialize(ions); - // Check compensating charges - double comp_rho = getCharge(pot.rho_comp()); - - if (onpe0 && ct.verbose > 1) - { - os_ << std::setprecision(8) << std::fixed - << " Charge of rhoc: " << comp_rho << std::endl; - } - -#if 1 - pot.rescaleRhoComp(); -#endif - - pot.addBackgroundToRhoComp(); - electrostat_->setupRhoc(pot.rho_comp()); if (onpe0 && ct.verbose > 3) os_ << " initNuc done" << std::endl; diff --git a/src/Poisson.h b/src/Poisson.h index 87107542..ba5daa39 100644 --- a/src/Poisson.h +++ b/src/Poisson.h @@ -71,7 +71,10 @@ class Poisson : public PoissonInterface virtual void set_rhod(pb::GridFunc* /*rhod*/){}; void set_vh(const pb::GridFunc& vh) { (*vh_) = vh; }; - void set_vh(const POTDTYPE* const vh) { vh_->assign(vh, 'd'); }; + void set_vh(const std::vector& vh) + { + vh_->assign(vh.data(), 'd'); + }; void resetVh() { vh_->resetData(); } void set_vepsilon(const POTDTYPE* const vepsilon) { diff --git a/src/Potentials.cc b/src/Potentials.cc index abe277f1..a0ff3f1a 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -25,7 +25,6 @@ #include "mputils.h" #include -using namespace std; // unit conversion factor Ha -> Ry const double ha2ry = 2.; @@ -92,7 +91,7 @@ void Potentials::initWithVnuc() { assert(size_ > 0); if (verbosity_level_ > 2 && onpe0) - (*MPIdata::sout) << "Potentials::initWithVnuc()" << endl; + (*MPIdata::sout) << "Potentials::initWithVnuc()" << std::endl; itindex_vxc_ = 0; itindex_vh_ = 0; int ione = 1; @@ -122,7 +121,8 @@ double Potentials::min() const return vmin; } -void Potentials::evalNormDeltaVtotRho(const vector>& rho) +void Potentials::evalNormDeltaVtotRho( + const std::vector>& rho) { Mesh* mymesh = Mesh::instance(); const pb::Grid& mygrid = mymesh->grid(); @@ -141,14 +141,14 @@ void Potentials::evalNormDeltaVtotRho(const vector>& rho) mmpi.allreduce(&scf_dvrho_, 1, MPI_SUM); } -double Potentials::update(const vector>& rho) +double Potentials::update(const std::vector>& rho) { assert(itindex_vxc_ >= 0); assert(itindex_vh_ >= 0); assert(itindex_vxc_ == itindex_vh_); if (verbosity_level_ > 2 && onpe0) - (*MPIdata::sout) << "Potentials::update(rho)" << endl; + (*MPIdata::sout) << "Potentials::update(rho)" << std::endl; int ione = 1; Mesh* mymesh = Mesh::instance(); const pb::PEenv& myPEenv = mymesh->peenv(); @@ -185,7 +185,7 @@ double Potentials::update(const vector>& rho) = MPI_Allreduce(&dvdot, &sum, 1, MPI_DOUBLE, MPI_SUM, myPEenv.comm()); if (rc != MPI_SUCCESS) { - cout << "MPI_Allreduce double sum failed!!!" << endl; + std::cout << "MPI_Allreduce double sum failed!!!" << std::endl; MPI_Abort(myPEenv.comm(), 2); } dvdot = sum; @@ -202,7 +202,7 @@ void Potentials::update(const double mix) assert(itindex_vxc_ == itindex_vh_); #ifdef DEBUG - if (onpe0) (*MPIdata::sout) << "Potentials::update(mix)" << endl; + if (onpe0) (*MPIdata::sout) << "Potentials::update(mix)" << std::endl; #endif // int ione=1; double potmix = mix; @@ -210,13 +210,13 @@ void Potentials::update(const double mix) size_, potmix, &dv_[0], &vtot_[0]); } -double Potentials::delta_v(const vector>& rho) +double Potentials::delta_v(const std::vector>& rho) { assert(itindex_vxc_ == itindex_vh_); assert(size_ > 0); if (verbosity_level_ > 2 && onpe0) - (*MPIdata::sout) << "Potentials::delta_v()" << endl; + (*MPIdata::sout) << "Potentials::delta_v()" << std::endl; int ione = 1; Mesh* mymesh = Mesh::instance(); @@ -253,7 +253,7 @@ double Potentials::delta_v(const vector>& rho) = MPI_Allreduce(&dvdot, &sum, 1, MPI_DOUBLE, MPI_SUM, myPEenv.comm()); if (rc != MPI_SUCCESS) { - cout << "MPI_Allreduce double sum failed!!!" << endl; + std::cout << "MPI_Allreduce double sum failed!!!" << std::endl; MPI_Abort(myPEenv.comm(), 2); } dvdot = sum; @@ -266,7 +266,7 @@ double Potentials::delta_v(const vector>& rho) } // in Ry -void Potentials::getVofRho(vector& vrho) const +void Potentials::getVofRho(std::vector& vrho) const { vrho.resize(size_); int ione = 1; @@ -283,7 +283,7 @@ void Potentials::getVofRho(vector& vrho) const // type: // 2->text // 3->binary -void Potentials::readExternalPot(const string filename, const char type) +void Potentials::readExternalPot(const std::string filename, const char type) { assert(type == 2 || type == 3); @@ -304,27 +304,27 @@ void Potentials::readExternalPot(const string filename, const char type) { from->seekg(0, ios::end); const int length = from->tellg(); - (*MPIdata::sout) << "Length file = " << length << endl; + (*MPIdata::sout) << "Length file = " << length << std::endl; from->seekg(0, ios::beg); if (length <= 0) { (*MPIdata::serr) - << "ERROR Potential: file length <=0!!!!" << endl; + << "ERROR Potential: file length <=0!!!!" << std::endl; mmpi.abort(); } } } if (!from) { - (*MPIdata::serr) << " Cannot open file " << filename << endl; + (*MPIdata::serr) << " Cannot open file " << filename << std::endl; mmpi.abort(); } if (onpe0) { (*MPIdata::sout) << "Potentials::read_ExternalPot(), filename=" - << filename << endl; - if (type == 2) (*MPIdata::sout) << "text file..." << endl; - if (type == 3) (*MPIdata::sout) << "binary file..." << endl; + << filename << std::endl; + if (type == 2) (*MPIdata::sout) << "text file..." << std::endl; + if (type == 3) (*MPIdata::sout) << "binary file..." << std::endl; } // read origin and end of cell (to check compatibility) @@ -348,29 +348,32 @@ void Potentials::readExternalPot(const string filename, const char type) if (onpe0) for (short d = 0; d < 3; d++) { - (*MPIdata::sout) << setprecision(8); + (*MPIdata::sout) << std::setprecision(8); if (fabs(origin[d] - mygrid.origin(d)) > 1.e-3) { (*MPIdata::serr) << "ERROR Potential: Incompatible cell origin in direction " - << d << endl; - (*MPIdata::serr) << "Potential origin=" << origin[d] << endl; - (*MPIdata::serr) << "MGmol origin=" << mygrid.origin(d) << endl; + << d << std::endl; + (*MPIdata::serr) + << "Potential origin=" << origin[d] << std::endl; + (*MPIdata::serr) + << "MGmol origin=" << mygrid.origin(d) << std::endl; (*MPIdata::serr) << "Difference=" << fabs(origin[d] - mygrid.origin(d)) - << endl; + << std::endl; mmpi.abort(); } if (fabs(ll[d] - mygrid.ll(d)) > 1.e-3) { (*MPIdata::serr) << "ERROR Potential: Incompatible cell " "dimension in direction " - << d << endl; - (*MPIdata::serr) << "Potential cell end=" << end[d] << endl; + << d << std::endl; + (*MPIdata::serr) + << "Potential cell end=" << end[d] << std::endl; (*MPIdata::serr) - << "Potential cell dimension=" << ll[d] << endl; + << "Potential cell dimension=" << ll[d] << std::endl; (*MPIdata::serr) - << "MGmol cell dimension=" << mygrid.ll(d) << endl; + << "MGmol cell dimension=" << mygrid.ll(d) << std::endl; mmpi.abort(); } } @@ -392,9 +395,9 @@ void Potentials::readExternalPot(const string filename, const char type) if (nxyz[i] != gdim_[i]) { (*MPIdata::serr) << "Potentials::read_ExternalPot(): dimension " - << i << " incompatible with Grid!!!" << endl; + << i << " incompatible with Grid!!!" << std::endl; (*MPIdata::serr) - << "n=" << nxyz[i] << ", gdim_=" << gdim_[i] << endl; + << "n=" << nxyz[i] << ", gdim_=" << gdim_[i] << std::endl; mmpi.abort(); } @@ -454,7 +457,7 @@ void Potentials::readExternalPot(const string filename, const char type) if (type == 3) { - vector tmp(dim_[2]); + std::vector tmp(dim_[2]); for (int i = 0; i < dim_[0]; i++) { // advance (start-file_index) positions @@ -512,7 +515,8 @@ void Potentials::getGradVext(const double r[3], double dfdr[3]) const vext_tricubic_->getGradient(r, dfdr, comm); } -void Potentials::getValVext(const vector& r, vector& val) const +void Potentials::getValVext( + const std::vector& r, std::vector& val) const { assert(vext_tricubic_ != NULL); @@ -522,21 +526,22 @@ void Potentials::getValVext(const vector& r, vector& val) const } #endif -void Potentials::readAll(vector& sp) +void Potentials::readAll(std::vector& sp) { assert(sp.size() <= pot_filenames_.size()); if (verbosity_level_ > 2 && onpe0) (*MPIdata::sout) << "Potentials::readAll() for " << pot_types_.size() - << " potentials" << endl; + << " potentials" << std::endl; Mesh* mymesh = Mesh::instance(); const pb::Grid& mygrid = mymesh->grid(); double hmin = mygrid.hmin(); if (verbosity_level_ > 2 && onpe0) - (*MPIdata::sout) << "hmin= " << hmin << endl; + (*MPIdata::sout) << "hmin= " << hmin << std::endl; - vector::const_iterator it_filename = pot_filenames_.begin(); - int isp = 0; + std::vector::const_iterator it_filename + = pot_filenames_.begin(); + int isp = 0; while (it_filename != pot_filenames_.end()) { if (pot_types_[isp] == 'n' || pot_types_[isp] == 's' @@ -556,7 +561,7 @@ void Potentials::readAll(vector& sp) #else (*MPIdata::sout) << "ERROR: cannot read external potential " - << " -> need to compile with Tricubic library" << endl; + << " -> need to compile with Tricubic library" << std::endl; #endif } it_filename++; @@ -594,11 +599,6 @@ void Potentials::axpVcompToVh(const double alpha) size_, alpha, &v_comp_[0], &vh_rho_[0]); } -void Potentials::axpVcomp(POTDTYPE* v, const double alpha) -{ - LinearAlgebraUtils::MPaxpy(size_, alpha, &v_comp_[0], v); -} - void Potentials::backupVh() { memcpy(vh_rho_backup_.data(), vh_rho_.data(), size_ * sizeof(POTDTYPE)); @@ -792,9 +792,12 @@ void Potentials::initialize(Ions& ions) const pb::Grid& mygrid = mymesh->grid(); const int numpt = mygrid.size(); - memset(&v_comp_[0], 0, numpt * sizeof(POTDTYPE)); - memset(&rho_comp_[0], 0, numpt * sizeof(RHODTYPE)); - memset(&v_nuc_[0], 0, numpt * sizeof(RHODTYPE)); + memset(v_comp_.data(), 0, numpt * sizeof(POTDTYPE)); + memset(rho_comp_.data(), 0, numpt * sizeof(RHODTYPE)); + memset(v_nuc_.data(), 0, numpt * sizeof(RHODTYPE)); + + // Count up the total ionic charge + ionic_charge_ = ions.computeIonicCharge(); char flag_filter = pot_type(0); @@ -805,6 +808,7 @@ void Potentials::initialize(Ions& ions) Vector3D position(ion->position(0), ion->position(1), ion->position(2)); + // initialize rho_comp_, v_comp_, v_nuc_ if (flag_filter == 's') { const int sampleRate = 3; @@ -820,6 +824,13 @@ void Potentials::initialize(Ions& ions) initializeRadialDataOnMesh(position, sp); } } + + // rescale rho_comp_ due to finite mesh effects + rescaleRhoComp(); + + initBackground(); + + addBackgroundToRhoComp(); } void Potentials::rescaleRhoComp() @@ -831,24 +842,33 @@ void Potentials::rescaleRhoComp() const pb::Grid& mygrid = mymesh->grid(); // Check compensating charges - double comp_rho = getCharge(&rho_comp_[0]); + double comp_rho = getCharge(rho_comp_.data()); + if (onpe0 && ct.verbose > 1) + { + std::cout << std::setprecision(8) << std::fixed + << " Charge of rhoc: " << comp_rho << std::endl; + } if (onpe0 && ct.verbose > 1) { - cout << " Rescaling rhoc" << endl; + std::cout << " Rescaling rhoc" << std::endl; } + + // rescale rho_comp_ (initialized by sampling on mesh) + // so that its integral exactly matches ionic_charge_ if (ionic_charge_ > 0.) { const int numpt = mygrid.size(); double t = ionic_charge_ / comp_rho; - LinearAlgebraUtils::MPscal(numpt, t, &rho_comp_[0]); + LinearAlgebraUtils::MPscal( + numpt, t, rho_comp_.data()); // Check new compensating charges - comp_rho = getCharge(&rho_comp_[0]); + comp_rho = getCharge(rho_comp_.data()); } if (onpe0 && ct.verbose > 1) - cout << " Rescaled compensating charges: " << setprecision(8) << fixed - << comp_rho << endl; + std::cout << " Rescaled compensating charges: " << std::setprecision(8) + << std::fixed << comp_rho << std::endl; if (comp_rho < 0.) mmpi.abort(); } @@ -868,26 +888,23 @@ void Potentials::addBackgroundToRhoComp() { if (onpe0) { - cout << setprecision(12) << scientific - << "Add background charge " << background << " to rhoc " - << endl; + std::cout << std::setprecision(12) << std::scientific + << "Add background charge " << background + << " to rhoc " << std::endl; } for (int i = 0; i < numpt; i++) rho_comp_[i] += background; // Check new compensating charges - getCharge(&rho_comp_[0]); + getCharge(rho_comp_.data()); } } } -void Potentials::initBackground(Ions& ions) +void Potentials::initBackground() { Control& ct = *(Control::instance()); - // Count up the total ionic charge - ionic_charge_ = ions.computeIonicCharge(); - // calculation the compensating background charge // for charged supercell calculations background_charge_ = 0.; @@ -898,14 +915,76 @@ void Potentials::initBackground(Ions& ions) } if (onpe0 && ct.verbose > 0) { - cout << "N electrons= " << ct.getNel() << endl; - cout << "ionic charge= " << ionic_charge_ << endl; - cout << "background charge=" << background_charge_ << endl; + std::cout << "N electrons= " << ct.getNel() << std::endl; + std::cout << "ionic charge= " << ionic_charge_ << std::endl; + std::cout << "background charge=" << background_charge_ << std::endl; } if (fabs(background_charge_) < 1.e-10) background_charge_ = 0.; } +int Potentials::read(HDFrestart& h5f_file) +{ + Control& ct = *(Control::instance()); + + // Read total potential + h5f_file.read_1func_hdf5(vtot_.data(), "Vtotal"); + + // Read the hartree potential + h5f_file.read_1func_hdf5(vh_rho_.data(), "Hartree"); + + // Read dielectric potential + if (ct.diel) + { + h5f_file.read_1func_hdf5(vepsilon_.data(), "VDielectric"); + } + + return 0; +} + +int Potentials::write(HDFrestart& h5f_file) +{ + Control& ct = *(Control::instance()); + + Mesh* mymesh = Mesh::instance(); + const pb::Grid& mygrid = mymesh->grid(); + + double ll[3] = { mygrid.ll(0), mygrid.ll(1), mygrid.ll(2) }; + double origin[3] = { mygrid.origin(0), mygrid.origin(1), mygrid.origin(2) }; + + // Write total potential + int ierr + = h5f_file.write_1func_hdf5(vtot_.data(), "Vtotal", &ll[0], &origin[0]); + if (ierr < 0) return ierr; + + // Write the hartree potential + ierr = h5f_file.write_1func_hdf5( + vh_rho_.data(), "Hartree", &ll[0], &origin[0]); + if (ierr < 0) return ierr; + + if (ct.AtomsDynamic() == AtomsDynamicType::MD) + { + // Write hartree potential before extrapolation + ierr = h5f_file.write_1func_hdf5( + vh_rho_backup_.data(), "Preceding_Hartree", &ll[0], &origin[0]); + if (ierr < 0) return ierr; + } + + // Write + if (ct.diel) + { + ierr = h5f_file.write_1func_hdf5( + vepsilon_.data(), "VDielectric", &ll[0], &origin[0]); + } + if (ierr < 0) return ierr; + + // Write external potential + ierr = h5f_file.write_1func_hdf5(v_ext_.data(), "Vext", &ll[0], &origin[0]); + if (ierr < 0) return ierr; + + return ierr; +} + template void Potentials::setVxc( const double* const vxc, const int iterativeIndex); template void Potentials::setVxc( diff --git a/src/Potentials.h b/src/Potentials.h index 71819a75..cb0f8e05 100644 --- a/src/Potentials.h +++ b/src/Potentials.h @@ -10,6 +10,7 @@ #ifndef MGMOL_POTENTIALS_H #define MGMOL_POTENTIALS_H +#include "HDFrestart.h" #include "Rho.h" #include "TriCubic.h" @@ -100,6 +101,12 @@ class Potentials void initializeSupersampledRadialDataOnMesh( const Vector3D& position, const Species& sp); + void rescaleRhoComp(); + + void addBackgroundToRhoComp(); + + void initBackground(); + public: Potentials(); @@ -139,27 +146,22 @@ class Potentials void turnOnDiel() { diel_ = true; } + int write(HDFrestart& h5f_file); + int read(HDFrestart& h5f_file); + int size() const { return size_; } double scf_dvrho(void) const { return scf_dvrho_; } double scf_dv(void) const { return scf_dv_; } POTDTYPE* vtot() { return vtot_.data(); } - POTDTYPE* vh_rho() { return vh_rho_.data(); } RHODTYPE* rho_comp() { return rho_comp_.data(); } const std::vector& vnuc() const { return v_nuc_; } - POTDTYPE* vnuc() { return v_nuc_.data(); } - POTDTYPE* vext() { return v_ext_.data(); } + const std::vector& vh_rho() const { return vh_rho_; } + POTDTYPE* vepsilon() { return vepsilon_.data(); } - POTDTYPE* vh_rho_backup() { return vh_rho_backup_.data(); } void axpVcompToVh(const double alpha); - void axpVcomp(POTDTYPE* v, const double alpha); - - POTDTYPE vtot(const int i) { return vtot_[i]; } - POTDTYPE vh_rho(const int i) { return vh_rho_[i]; } - POTDTYPE vxc_rho(const int i) { return vxc_rho_[i]; } - POTDTYPE vepsilon(const int i) { return vepsilon_[i]; } bool diel() const { return diel_; } @@ -198,9 +200,6 @@ class Potentials void setVh(const pb::GridFunc& vh, const int iterativeIndex); void initialize(Ions& ions); - void rescaleRhoComp(); - void initBackground(Ions& ions); - void addBackgroundToRhoComp(); /*! * Save current Hartree potential into backup array diff --git a/src/restart.cc b/src/restart.cc index 30bdad73..ed7f478c 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -43,27 +43,11 @@ int MGmol::read_rho_and_pot_hdf5( os_ << "Try to read density and potentials" << std::endl; Potentials& pot = hamiltonian_->potential(); + pot.read(file); - Mesh* mymesh = Mesh::instance(); - const pb::Grid& mygrid = mymesh->grid(); - POTDTYPE* tmp = new POTDTYPE[mygrid.size()]; - - // Read total potential - file.read_1func_hdf5(pot.vtot(), "Vtotal"); - - // Read the hartree potential - file.read_1func_hdf5(tmp, "Hartree"); - pot.setVh(tmp, 0); - - // Read dielectric potential - if (ct.diel) - { - file.read_1func_hdf5(pot.vepsilon(), "VDielectric"); - } // Read the Density rho.readRestart(file); - delete[] tmp; return 0; } @@ -131,41 +115,12 @@ int MGmol::write_hdf5(HDFrestart& h5f_file, if (ct.out_restart_info > 1) { - // Write total potential - int ierr = h5f_file.write_1func_hdf5( - pot.vtot(), "Vtotal", &ll[0], &origin[0]); - if (ierr < 0) return ierr; - - // Write the hartree potential - ierr = h5f_file.write_1func_hdf5( - pot.vh_rho(), "Hartree", &ll[0], &origin[0]); - if (ierr < 0) return ierr; - - if (ct.AtomsDynamic() == AtomsDynamicType::MD) - { - // Write hartree potential before extrapolation - ierr = h5f_file.write_1func_hdf5( - pot.vh_rho_backup(), "Preceding_Hartree", &ll[0], &origin[0]); - if (ierr < 0) return ierr; - } - - // Write - if (ct.diel) - { - ierr = h5f_file.write_1func_hdf5( - pot.vepsilon(), "VDielectric", &ll[0], &origin[0]); - } - if (ierr < 0) return ierr; + int ierr = pot.write(h5f_file); // Write the Density ierr = h5f_file.write_1func_hdf5( &rho[0][0], "Density", &ll[0], &origin[0]); if (ierr < 0) return ierr; - - // Write external potential - ierr - = h5f_file.write_1func_hdf5(pot.vext(), "Vext", &ll[0], &origin[0]); - if (ierr < 0) return ierr; } // Write wavefunctions and old centers. From 59b79e75b3f538c1f2ec6a76dc1806fa4d4df059 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 20 Feb 2025 12:54:11 -0500 Subject: [PATCH 22/99] Remove confusing 0 in naming restart files (#308) * use added integer only in case of fail/retry --- src/MGmol.cc | 1 - src/md.cc | 9 ++++++--- tests/MD_D72/test.py | 2 +- tests/MD_MVP/md.cfg | 2 +- tests/MD_MVP/test.py | 8 ++++---- tests/ShortSighted/test.py | 2 +- 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index 649f2c39..439f4fac 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -1096,7 +1096,6 @@ void MGmol::dumpRestart() // create restart file std::string filename(std::string(ct.out_restart_file)); - if (ct.out_restart_file_naming_strategy) filename += "0"; HDFrestart h5restartfile( filename, myPEenv, gdim, ct.out_restart_file_type); diff --git a/src/md.cc b/src/md.cc index dfb4dfe6..dc2857eb 100644 --- a/src/md.cc +++ b/src/md.cc @@ -242,9 +242,12 @@ int MGmol::dumpMDrestartFile(OrbitalsType& orbitals, Ions& ions, std::string filename(std::string(ct.out_restart_file)); // add an integer corresponding to attempt number/count // to allow several attempts at creating and writing file - std::stringstream s; - s << count; - filename += s.str(); + if (count > 0) + { + std::stringstream s; + s << count; + filename += s.str(); + } HDFrestart h5file(filename, myPEenv, gdim, ct.out_restart_file_type); diff --git a/tests/MD_D72/test.py b/tests/MD_D72/test.py index 17b70de2..3f98b2bd 100755 --- a/tests/MD_D72/test.py +++ b/tests/MD_D72/test.py @@ -42,7 +42,7 @@ print(line) #run MD -command = "ls -ld snapshot0* | awk '{ print $9 }' | tail -n1" +command = "ls -ld snapshot* | awk '{ print $9 }' | tail -n1" print(command) restart_file = subprocess.check_output(command,shell=True) restart_file=str(restart_file[:-1],'utf-8') diff --git a/tests/MD_MVP/md.cfg b/tests/MD_MVP/md.cfg index 83d5aa4c..e30a8807 100644 --- a/tests/MD_MVP/md.cfg +++ b/tests/MD_MVP/md.cfg @@ -34,6 +34,6 @@ solver=MVP nb_inner_it=1 mixing=1. [Restart] -input_filename=wave.out +input_filename=snapshotMVP input_level=3 output_level=3 diff --git a/tests/MD_MVP/test.py b/tests/MD_MVP/test.py index 489c9c8c..47b5462d 100755 --- a/tests/MD_MVP/test.py +++ b/tests/MD_MVP/test.py @@ -42,20 +42,20 @@ #run MD for i in range(2): - command = "ls -ld snapshot0* | awk '{ print $9 }' | tail -n1" + command = "ls -ld snapshot* | awk '{ print $9 }' | tail -n1" print(command) restart_file = subprocess.check_output(command,shell=True) restart_file=str(restart_file[:-1],'utf-8') print(restart_file) - os.rename(restart_file, 'wave.out') + os.rename(restart_file, 'snapshotMVP') #run MGmol command = "{} {} -c {} -i {}".format(mpicmd,exe,inp2,coords) output2 = subprocess.check_output(command,shell=True) #remove used restart files - shutil.rmtree('wave.out') + shutil.rmtree('snapshotMVP') #analyse mgmol standard output lines=output2.split(b'\n') @@ -81,7 +81,7 @@ sys.exit(1) #remove last restart files -command = "ls -ld snapshot0* | awk '{ print $9 }' | tail -n1" +command = "ls -ld snapshot* | awk '{ print $9 }' | tail -n1" restart_file = subprocess.check_output(command,shell=True) restart_file=str(restart_file[:-1],'utf-8') shutil.rmtree(restart_file) diff --git a/tests/ShortSighted/test.py b/tests/ShortSighted/test.py index 27da7f7d..68a617aa 100755 --- a/tests/ShortSighted/test.py +++ b/tests/ShortSighted/test.py @@ -42,7 +42,7 @@ print(line) #run MD -command = "ls -ld snapshot0* | awk '{ print $9 }' | tail -n1" +command = "ls -ld snapshot* | awk '{ print $9 }' | tail -n1" print(command) restart_file = subprocess.check_output(command,shell=True) restart_file=str(restart_file[:-1],'utf-8') From 16a818a43bae7906ff9b08f1765898c6b0569ae8 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 20 Feb 2025 18:48:12 -0500 Subject: [PATCH 23/99] Add functionalities for extra info in restart file (#312) --- src/HDFrestart.cc | 22 ++++++------ src/HDFrestart.h | 8 ++--- src/Ion.cc | 8 ++++- src/Ion.h | 16 +++++---- src/Ions.cc | 87 +++++++++++++++++++++++++++++++++++++++++++++-- src/Ions.h | 5 +++ src/restart.cc | 2 ++ 7 files changed, 124 insertions(+), 24 deletions(-) diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index 0fac518e..87982290 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -1440,8 +1440,8 @@ int HDFrestart::read_1func_hdf5(T* vv, const std::string& datasetname) } template -int HDFrestart::write_1func_hdf5( - T* vv, const std::string& datasetname, double* ll, double* cell_origin) +int HDFrestart::write_1func_hdf5(const T* const vv, + const std::string& datasetname, double* ll, double* cell_origin) { assert(ll != nullptr); assert(cell_origin != nullptr); @@ -1652,7 +1652,7 @@ int HDFrestart::readData( } template -int HDFrestart::writeData(T* data, hid_t space_id, hid_t memspace, +int HDFrestart::writeData(const T* const data, hid_t space_id, hid_t memspace, hid_t dset_id, const short precision) { if (precision == 1) @@ -2017,7 +2017,8 @@ int HDFrestart::readAtomicData( { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 0) - (*MPIdata::sout) << "HDFrestart::readAtomicNames()..." << std::endl; + (*MPIdata::sout) << "HDFrestart::readAtomicData(), dataset = " + << datasetname << std::endl; std::vector buffer; short name_length = 7; // default, value used before February 2016 @@ -2095,10 +2096,9 @@ int HDFrestart::readAtomicData( { std::string t(&buffer[i], name_length); assert(t.size() > 0); - // cout<<"name="< - int writeData(T* vv, hid_t filespace, hid_t memspace, hid_t dset_id, - const short precision); + int writeData(const T* const vv, hid_t filespace, hid_t memspace, + hid_t dset_id, const short precision); template int readData(T* vv, hid_t memspace, hid_t dset_id, const short precision); diff --git a/src/Ion.cc b/src/Ion.cc index 33465635..90350477 100644 --- a/src/Ion.cc +++ b/src/Ion.cc @@ -294,12 +294,18 @@ void Ion::getIonData(IonData& idata) const } } +void Ion::resetPositionsToPrevious() +{ + for (int pos = 0; pos < 3; pos++) + position_[pos] = old_position_[pos]; +} + void Ion::setFromIonData(const IonData& data) { // random state setRandomState(data.rand_state[0], data.rand_state[1], data.rand_state[2]); // previous position - setOldPosition( + setPreviousPosition( data.old_position[0], data.old_position[1], data.old_position[2]); // velocity setVelocity(data.velocity[0], data.velocity[1], data.velocity[2]); diff --git a/src/Ion.h b/src/Ion.h index ae870820..871205a3 100644 --- a/src/Ion.h +++ b/src/Ion.h @@ -74,12 +74,6 @@ class Ion position_[i] += shift; } - void setOldPosition(const double x, const double y, const double z) - { - old_position_[0] = x; - old_position_[1] = y; - old_position_[2] = z; - } public: Ion(const Species& species, const std::string& name, const double crds[3], @@ -96,6 +90,13 @@ class Ion void init(const double crds[3], const double velocity[3], const bool lock); void setup(); + void setPreviousPosition(const double x, const double y, const double z) + { + old_position_[0] = x; + old_position_[1] = y; + old_position_[2] = z; + } + std::shared_ptr kbproj() { return kbproj_; } const std::shared_ptr kbproj() const { return kbproj_; } @@ -189,6 +190,9 @@ class Ion kbproj_->clear(); } + + void resetPositionsToPrevious(); + void shiftPositionXLBOMDTest(Vector3D shift) { for (short dir = 0; dir < 3; dir++) diff --git a/src/Ions.cc b/src/Ions.cc index e93d82ea..09b97d2e 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -853,6 +853,42 @@ void Ions::writePositions(HDFrestart& h5f_file) } } +void Ions::writePreviousPositions(HDFrestart& h5f_file) +{ + Control& ct(*(Control::instance())); + + if (onpe0 && ct.verbose > 1) + { + (*MPIdata::sout) << "Ions::writePositions" << std::endl; + } + + std::vector data; + if (h5f_file.gatherDataX()) + { + Mesh* mymesh = Mesh::instance(); + const pb::PEenv& myPEenv = mymesh->peenv(); + MPI_Comm comm = myPEenv.comm_x(); + + gatherPreviousPositions(data, 0, comm); + } + else + { + for (auto& ion : local_ions_) + { + data.push_back(ion->getPreviousPosition(0)); + data.push_back(ion->getPreviousPosition(1)); + data.push_back(ion->getPreviousPosition(2)); + } + } + + hid_t file_id = h5f_file.file_id(); + if (file_id >= 0) + { + std::string datasetname("/Ionic_previous_positions"); + writeData2d(h5f_file, datasetname, data, 3, 1.e32); + } +} + void Ions::initFromRestartFile(HDFrestart& h5_file) { assert(list_ions_.empty()); @@ -995,14 +1031,39 @@ void Ions::readRestartPositions(HDFrestart& h5_file) } } +void Ions::readRestartPreviousPositions(HDFrestart& h5_file) +{ + Control& ct = *(Control::instance()); + if (onpe0 && ct.verbose > 0) + (*MPIdata::sout) << "Read ionic positions from hdf5 file" << std::endl; + + std::vector data; + std::string datasetname("/Ionic_previous_positions"); + h5_file.readAtomicData(datasetname, data); + assert(data.size() == 3 * local_ions_.size()); + + int i = 0; + for (auto& ion : local_ions_) + { + ion->setPreviousPosition(data[3 * i], data[3 * i + 1], data[3 * i + 2]); + i++; + } +} + +void Ions::resetPositionsToPrevious() +{ + for (auto& ion : local_ions_) + { + ion->resetPositionsToPrevious(); + } +} + void Ions::writeVelocities(HDFrestart& h5f_file) { Control& ct(*(Control::instance())); if (onpe0 && ct.verbose > 1) - { (*MPIdata::sout) << "Ions::writeVelocities" << std::endl; - } std::vector data; if (h5f_file.gatherDataX()) @@ -2535,6 +2596,28 @@ void Ions::gatherPositions( if (mype == root) positions = data; } +void Ions::gatherPreviousPositions( + std::vector& positions, const int root, const MPI_Comm comm) const +{ + std::vector local_positions; + + for (auto& ion : local_ions_) + { + local_positions.push_back(ion->getPreviousPosition(0)); + local_positions.push_back(ion->getPreviousPosition(1)); + local_positions.push_back(ion->getPreviousPosition(2)); + } + + // gather data to PE root + std::vector data; + mgmol_tools::gatherV(local_positions, data, root, comm); + + int mype = 0; + MPI_Comm_rank(comm, &mype); + positions.clear(); + if (mype == root) positions = data; +} + void Ions::gatherForces( std::vector& forces, const int root, const MPI_Comm comm) const { diff --git a/src/Ions.h b/src/Ions.h index ae4f6adb..b03972d1 100644 --- a/src/Ions.h +++ b/src/Ions.h @@ -147,6 +147,8 @@ class Ions const MPI_Comm comm) const; void gatherPositions(std::vector& positions, const int root, const MPI_Comm comm) const; + void gatherPreviousPositions(std::vector& positions, const int root, + const MPI_Comm comm) const; void gatherLockedNames(std::vector& names, const int root, const MPI_Comm comm) const; void gatherIndexes( @@ -198,6 +200,7 @@ class Ions ion++; } } + void resetPositionsToPrevious(); void removeMassCenterMotion(); bool hasNLprojectors() @@ -237,6 +240,7 @@ class Ions double kinetic_E(void) const; void writePositions(HDFrestart& h5f_file); + void writePreviousPositions(HDFrestart& h5f_file); void writeVelocities(HDFrestart& h5f_file); void writeRandomStates(HDFrestart& h5f_file); void writeForces(HDFrestart& h5f_file); @@ -350,6 +354,7 @@ class Ions void addIonToList(const Species& sp, const std::string& name, const double crds[3], const double velocity[3], const bool lock); + void readRestartPreviousPositions(HDFrestart& h5_file); // void checkUnicityLocalIons(); }; diff --git a/src/restart.cc b/src/restart.cc index ed7f478c..532043af 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -102,6 +102,8 @@ int MGmol::write_hdf5(HDFrestart& h5f_file, ions.writeAtomicNLprojIDs(h5f_file); ions.writePositions(h5f_file); if (ct.LangevinThermostat()) ions.writeRandomStates(h5f_file); + if (ct.AtomsDynamic() == AtomsDynamicType::MD) + ions.writePreviousPositions(h5f_file); ions.writeVelocities(h5f_file); ions.writeForces(h5f_file); From 77bd87ef256623ee5d95866a75f601417e559ca7 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 21 Feb 2025 12:51:24 -0500 Subject: [PATCH 24/99] MPI abort (#313) * Fix error code used by MPI_Abort --- src/BlockVector.cc | 2 +- src/Control.cc | 18 ++++++++--------- src/Control.h | 2 +- src/DistMatrix/BlacsContext.cc | 10 +++++----- src/DistMatrix/DistMatrix.h | 2 +- src/DistanceConstraint.cc | 2 +- src/ExtendedGridOrbitals.cc | 2 +- src/IonicAlgorithm.cc | 2 +- src/Ions.cc | 3 +-- src/LDAFunctional.cc | 3 +-- src/LocalizationRegions.cc | 20 +++++-------------- src/LocalizationRegions.h | 6 +++--- src/MDfiles.cc | 11 +++++----- src/MGmol.cc | 15 ++++++-------- src/MGmol.h | 1 - src/NonOrthoDMStrategy.cc | 2 +- src/PBEFunctional.cc | 3 +-- src/Potentials.cc | 8 ++++---- src/ProjectedMatricesSparse.cc | 2 +- src/main.cc | 2 +- src/md.cc | 7 +++---- src/mgmol_memory.cc | 2 +- src/mgmol_run.cc | 2 +- src/pb/Lap.h | 2 +- src/pb/PEenv.cc | 12 +++++------ src/pb/PEenv.h | 6 +++--- src/radial/RadialMeshFunction.cc | 4 ++-- src/setup.cc | 4 ++-- src/sparse_linear_algebra/DataDistribution.cc | 6 +++--- src/tools.cc | 6 +++--- src/tools/MGmol_MPI.cc | 6 +++--- src/tools/MGmol_MPI.h | 2 +- src/tools/Vector3D.cc | 2 +- 33 files changed, 80 insertions(+), 97 deletions(-) diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 40271750..9a411ebe 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -209,7 +209,7 @@ void BlockVector::allocate_storage() std::cerr << "ERROR BlockVector: trying to use allocation " << size_storage_ << " bigger than initialy preallocated " << allocated_size_storage_ << "!!!" << std::endl; - ct.global_exit(0); + ct.global_exit(); } storage_ = class_storage_[my_allocation_]; assert(class_storage_.size() > 0); diff --git a/src/Control.cc b/src/Control.cc index d266aa2e..33fd93c4 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -495,7 +495,7 @@ void Control::sync(void) if (mpirc != MPI_SUCCESS) { (*MPIdata::sout) << "MPI Bcast of Control failed!!!" << std::endl; - MPI_Abort(comm_global_, 2); + MPI_Abort(comm_global_, EXIT_FAILURE); } }; @@ -1152,7 +1152,7 @@ void Control::setTolEigenvalueGram(const float tol) << threshold_eigenvalue_gram_ << std::endl; } -void Control::global_exit(int i) { MPI_Abort(comm_global_, i); } +void Control::global_exit() { MPI_Abort(comm_global_, EXIT_FAILURE); } void Control::setSpecies(Potentials& pot) { @@ -1342,7 +1342,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) (*MPIdata::serr) << "ERROR in Control::setOptions: Invalid restart dump type" << std::endl; - MPI_Abort(comm_global_, 2); + MPI_Abort(comm_global_, EXIT_FAILURE); } (*MPIdata::sout) << "Output restart file: " << out_restart_file @@ -1549,14 +1549,14 @@ void Control::setOptions(const boost::program_options::variables_map& vm) else { std::cerr << "ERROR: Spread Penalty needs a type" << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } if (spread_penalty_target_ <= 0.) { (*MPIdata::sout) << "Invalid value for Spread Penalty target: " << spread_penalty_target_ << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } } @@ -1599,7 +1599,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) { (*MPIdata::sout) << "Invalid value for Thermostat.type: " << thermostat_type << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } tkel = vm["Thermostat.temperature"].as(); @@ -1608,7 +1608,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) (*MPIdata::sout) << "Invalid value for Thermostat.temperature: " << tkel << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } thtime = vm["Thermostat.relax_time"].as(); if (thtime < 0.) @@ -1616,7 +1616,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) (*MPIdata::sout) << "Invalid value for Thermostat.relax_time: " << thtime << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } if (str.compare("SCALING") == 0) @@ -1627,7 +1627,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) (*MPIdata::sout) << "Invalid value for Thermostat.width: " << thwidth << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } } } diff --git a/src/Control.h b/src/Control.h index bd4a7d96..9481cc24 100644 --- a/src/Control.h +++ b/src/Control.h @@ -346,7 +346,7 @@ class Control return pair_mlwf_distance_threshold_; } - void global_exit(int i); + void global_exit(); bool Mehrstellen() const { return (lap_type == 0 || lap_type == 10); } diff --git a/src/DistMatrix/BlacsContext.cc b/src/DistMatrix/BlacsContext.cc index e87cb7dd..7610a3cb 100644 --- a/src/DistMatrix/BlacsContext.cc +++ b/src/DistMatrix/BlacsContext.cc @@ -191,7 +191,7 @@ BlacsContext::BlacsContext( { std::cerr << " BlacsContext::BlacsContext: type = " << type << " is an incorrect parameter" << std::endl; - MPI_Abort(comm_global, 0); + MPI_Abort(comm_global, EXIT_FAILURE); } size_ = nprow_ * npcol_; @@ -222,7 +222,7 @@ BlacsContext::BlacsContext( { std::cerr << " nprocs_=" << nprocs_ << std::endl; std::cerr << " BlacsContext nprow*npcol > nprocs_" << std::endl; - MPI_Abort(comm_global, 0); + MPI_Abort(comm_global, EXIT_FAILURE); } ictxt_ = Csys2blacs_handle(comm_global_); @@ -252,7 +252,7 @@ BlacsContext::BlacsContext( { std::cerr << " BlacsContext::BlacsContext: invalid parameters" << " in " << __FILE__ << ":" << __LINE__ << std::endl; - MPI_Abort(comm_global, 0); + MPI_Abort(comm_global, EXIT_FAILURE); } int* pmap = new int[nprow * npcol]; // build pmap @@ -296,7 +296,7 @@ BlacsContext::BlacsContext(BlacsContext& bc, const int irow, const int icol, { std::cerr << " BlacsContext::BlacsContext: invalid parameters" << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } int* pmap = new int[nprow * npcol]; // build pmap @@ -350,7 +350,7 @@ BlacsContext::BlacsContext(const BlacsContext& bc, const char type) std::cerr << " BlacsContext::BlacsContext: row/col incorrect parameter: " << type << std::endl; - MPI_Abort(comm_global_, 0); + MPI_Abort(comm_global_, EXIT_FAILURE); } ictxt_ = Csys2blacs_handle(comm_global_); diff --git a/src/DistMatrix/DistMatrix.h b/src/DistMatrix/DistMatrix.h index f56df2b4..175fbd9b 100644 --- a/src/DistMatrix/DistMatrix.h +++ b/src/DistMatrix/DistMatrix.h @@ -28,7 +28,7 @@ std::cerr << "ERROR in file " << __FILE__ << " at line " << __LINE__ \ << std::endl; \ std::cerr << "Error Message: " << X << std::endl; \ - MPI_Abort(comm_global_, 2); + MPI_Abort(comm_global_, EXIT_FAILURE); #endif diff --git a/src/DistanceConstraint.cc b/src/DistanceConstraint.cc index d5175d7e..6d046f3b 100644 --- a/src/DistanceConstraint.cc +++ b/src/DistanceConstraint.cc @@ -99,7 +99,7 @@ bool DistanceConstraint::enforce(void) { cerr << "mype=" << mype << ", tau1p_[0]=" << tau1p_[0] << endl; cerr << "mype=" << mype << ", tau2p_[0]=" << tau2p_[0] << endl; - MPI_Abort(MPI_COMM_WORLD, 0); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } if (locally_owned_) (*MPIdata::sout) << setprecision(8) << "DistanceConstraint, d=" << d diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 19c0d26b..07face7c 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -1239,7 +1239,7 @@ double ExtendedGridOrbitals::dotProduct( "dot product type" << std::endl; Control& ct = *(Control::instance()); - ct.global_exit(2); + ct.global_exit(); } dot_product_tm_.stop(); diff --git a/src/IonicAlgorithm.cc b/src/IonicAlgorithm.cc index 37a077e4..e804077a 100644 --- a/src/IonicAlgorithm.cc +++ b/src/IonicAlgorithm.cc @@ -57,7 +57,7 @@ void IonicAlgorithm::init(HDFrestart* h5f_file) if (ct.restart_info > 0) { int status = stepper_->init(*h5f_file); - if (status < 0) ct.global_exit(2); + if (status < 0) ct.global_exit(); // if restart data for lbfgs found if (status == 0) diff --git a/src/Ions.cc b/src/Ions.cc index 09b97d2e..9af0a64e 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -1961,7 +1961,6 @@ int Ions::read1atom(std::ifstream* tfile, const bool cell_relative) double velocity[3] = { 0., 0., 0. }; MGmol_MPI& mmpi(*(MGmol_MPI::instance())); - Control& ct(*(Control::instance())); short movable = 0; std::string query; @@ -2005,7 +2004,7 @@ int Ions::read1atom(std::ifstream* tfile, const bool cell_relative) { std::cerr << "ERROR: Invalid name read in input file: " << name_read << std::endl; - ct.global_exit(2); + mmpi.abort(); } short dummy; ss >> dummy; // not used anymore (was species index) diff --git a/src/LDAFunctional.cc b/src/LDAFunctional.cc index 4b60b51e..89f5ee5f 100644 --- a/src/LDAFunctional.cc +++ b/src/LDAFunctional.cc @@ -120,8 +120,7 @@ double LDAFunctional::computeRhoDotExc() const if (rc != MPI_SUCCESS) { (*MPIdata::sout) << "MPI_Allreduce double sum failed!!!" << endl; - Control& ct = *(Control::instance()); - ct.global_exit(2); + mmpi.abort(); } exc = (POTDTYPE)sum; return exc; diff --git a/src/LocalizationRegions.cc b/src/LocalizationRegions.cc index 60f28301..f0f96412 100644 --- a/src/LocalizationRegions.cc +++ b/src/LocalizationRegions.cc @@ -492,7 +492,7 @@ void LocalizationRegions::bcastLRs() cerr << "ERROR!!!! LocalizationRegions::bcast(), Failure in MPI_Bcast " "of 'nglobal_'!!!" << endl; - MPI_Abort(comm, 0); + MPI_Abort(comm, EXIT_FAILURE); } if (nglobal_ == 0) return; @@ -522,7 +522,7 @@ void LocalizationRegions::bcastLRs() cerr << "ERROR!!!! LocalizationRegions::bcast(), Failure in " "MPI_Bcast of 'centers'!!!" << endl; - MPI_Abort(comm, 0); + MPI_Abort(comm, EXIT_FAILURE); } vector::iterator it = all_regions_.begin(); int i = 0; @@ -554,7 +554,7 @@ void LocalizationRegions::bcastLRs() cerr << "ERROR!!!! LocalizationRegions::bcast(), Failure in " "MPI_Bcast of 'radius'!!!" << endl; - MPI_Abort(comm, 0); + MPI_Abort(comm, EXIT_FAILURE); } it = all_regions_.begin(); i = 0; @@ -642,7 +642,7 @@ void LocalizationRegions::bcastLRs() cerr << "ERROR!!!! LocalizationRegions::bcast(), Failure in MPI_Bcast " "of 'volume'!!!" << endl; - MPI_Abort(comm, 0); + MPI_Abort(comm, EXIT_FAILURE); } if (ct.verbose > 0) printWithTimeStamp("LocalizationRegions::bcast() done...", cout); @@ -915,19 +915,9 @@ void LocalizationRegions::setupLocalRegionsFromOverlapRegions() cerr << "ERROR in distribution of localization centers: count=" << count << endl; cerr << "global number of regions=" << nglobal_ << endl; - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } -#if 0 - //if(onpe0) - if( count!=nglobal_ ) - { - cerr<<"ERROR in distribution of localization centers: count="<::write_header() if (ct.isLocMode() && ct.verbose > 3) lrs_->printAllRegions(os_); } -template -void MGmol::global_exit(int i) -{ - MPI_Abort(comm_, i); -} - template void MGmol::check_anisotropy() { @@ -748,7 +742,8 @@ void MGmol::check_anisotropy() << ", hmin=" << mygrid.hmin() << std::endl; (*MPIdata::serr) << "init: Anisotropy too large: " << mygrid.anisotropy() << std::endl; - global_exit(2); + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + mmpi.abort(); } } @@ -1046,7 +1041,8 @@ void MGmol::setup() total_tm_.start(); setup_tm_.start(); - Control& ct = *(Control::instance()); + Control& ct = *(Control::instance()); + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); if (ct.verbose > 0) printWithTimeStamp("MGmol::setup()...", os_); @@ -1064,7 +1060,8 @@ void MGmol::setup() #else int ierr = initial(); #endif - if (ierr < 0) global_exit(0); + + if (ierr < 0) mmpi.abort(); // Write header to stdout write_header(); diff --git a/src/MGmol.h b/src/MGmol.h index 4d82dd64..bccbc7af 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -221,7 +221,6 @@ class MGmol : public MGmolInterface void initNuc(Ions& ions); void initKBR(); - void global_exit(int i); void printEigAndOcc(); int readCoordinates(std::ifstream* tfile, const bool cell_relative); diff --git a/src/NonOrthoDMStrategy.cc b/src/NonOrthoDMStrategy.cc index 5452592f..3835b039 100644 --- a/src/NonOrthoDMStrategy.cc +++ b/src/NonOrthoDMStrategy.cc @@ -46,7 +46,7 @@ int NonOrthoDMStrategy::update(OrbitalsType& orbitals) { std::cerr << "NonOrthoDMStrategy, Invalid mixing value: " << mix_ << std::endl; - MPI_Abort(mmpi.commSameSpin(), 0); + MPI_Abort(mmpi.commSameSpin(), EXIT_FAILURE); } if (mmpi.PE0() && ct.verbose > 2) diff --git a/src/PBEFunctional.cc b/src/PBEFunctional.cc index 4a878c3c..29151a14 100644 --- a/src/PBEFunctional.cc +++ b/src/PBEFunctional.cc @@ -167,8 +167,7 @@ double PBEFunctional::computeRhoDotExc() const if (rc != MPI_SUCCESS) { (*MPIdata::sout) << "MPI_Allreduce double sum failed!!!" << std::endl; - Control& ct = *(Control::instance()); - ct.global_exit(2); + mmpi.abort(); } exc = sum; return exc; diff --git a/src/Potentials.cc b/src/Potentials.cc index a0ff3f1a..ebe42320 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -185,8 +185,8 @@ double Potentials::update(const std::vector>& rho) = MPI_Allreduce(&dvdot, &sum, 1, MPI_DOUBLE, MPI_SUM, myPEenv.comm()); if (rc != MPI_SUCCESS) { - std::cout << "MPI_Allreduce double sum failed!!!" << std::endl; - MPI_Abort(myPEenv.comm(), 2); + std::cerr << "MPI_Allreduce double sum failed!!!" << std::endl; + MPI_Abort(myPEenv.comm(), EXIT_FAILURE); } dvdot = sum; @@ -253,8 +253,8 @@ double Potentials::delta_v(const std::vector>& rho) = MPI_Allreduce(&dvdot, &sum, 1, MPI_DOUBLE, MPI_SUM, myPEenv.comm()); if (rc != MPI_SUCCESS) { - std::cout << "MPI_Allreduce double sum failed!!!" << std::endl; - MPI_Abort(myPEenv.comm(), 2); + std::cerr << "MPI_Allreduce double sum failed!!!" << std::endl; + MPI_Abort(myPEenv.comm(), EXIT_FAILURE); } dvdot = sum; diff --git a/src/ProjectedMatricesSparse.cc b/src/ProjectedMatricesSparse.cc index 8b328b1c..15ad6d6c 100644 --- a/src/ProjectedMatricesSparse.cc +++ b/src/ProjectedMatricesSparse.cc @@ -354,7 +354,7 @@ double ProjectedMatricesSparse::dotProductSimple( << std::endl; MGmol_MPI& mmpi = *(MGmol_MPI::instance()); - MPI_Abort(mmpi.commSameSpin(), 0); + MPI_Abort(mmpi.commSameSpin(), EXIT_FAILURE); return -1.; } diff --git a/src/main.cc b/src/main.cc index 3fcbf791..3c6830a0 100644 --- a/src/main.cc +++ b/src/main.cc @@ -45,7 +45,7 @@ int main(int argc, char** argv) if (mpirc != MPI_SUCCESS) { std::cerr << "MPI Initialization failed!!!" << std::endl; - MPI_Abort(MPI_COMM_WORLD, 0); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } MPI_Comm comm = MPI_COMM_WORLD; diff --git a/src/md.cc b/src/md.cc index dc2857eb..8ec20074 100644 --- a/src/md.cc +++ b/src/md.cc @@ -690,11 +690,10 @@ void MGmol::loadRestartFile(const std::string filename) if (ierr < 0) { if (onpe0) - (*MPIdata::serr) - << "loadRestartFile: failed to read the restart file." - << std::endl; + std::cerr << "loadRestartFile: failed to read the restart file." + << std::endl; - global_exit(0); + mmpi.abort(); } if (!ct.fullyOccupied()) { diff --git a/src/mgmol_memory.cc b/src/mgmol_memory.cc index a1360c99..b1be12d6 100644 --- a/src/mgmol_memory.cc +++ b/src/mgmol_memory.cc @@ -62,7 +62,7 @@ void addTrack(long addr, long asize) if (nPos > MAXNUMALLOCATIONS) { printf("ERROR: Not enough memory slots!!!"); - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } // if(onpe0)printf("nPos=%d, addr=%ld, size=%ld\n",nPos,addr,asize); } diff --git a/src/mgmol_run.cc b/src/mgmol_run.cc index a5de1ad5..468d61bd 100644 --- a/src/mgmol_run.cc +++ b/src/mgmol_run.cc @@ -96,7 +96,7 @@ int mgmol_check() { std::cerr << "Code should be called with " << myPEenv.n_mpi_tasks() << " MPI tasks only" << std::endl; - ct.global_exit(2); + ct.global_exit(); } assert(ct.getMGlevels() >= -1); diff --git a/src/pb/Lap.h b/src/pb/Lap.h index 33d90706..460e36c1 100644 --- a/src/pb/Lap.h +++ b/src/pb/Lap.h @@ -35,7 +35,7 @@ class Lap : public FDoper virtual void applyWithPot(GridFunc&, const double* const, T*) { std::cerr << "ERROR: Lap::applyWithPot() not implemented" << std::endl; - MPI_Abort(MPI_COMM_WORLD, 0); + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } std::string name() const { return name_; } diff --git a/src/pb/PEenv.cc b/src/pb/PEenv.cc index d5ff78f3..3e2fb777 100644 --- a/src/pb/PEenv.cc +++ b/src/pb/PEenv.cc @@ -197,7 +197,7 @@ PEenv::~PEenv() if (mpirc != MPI_SUCCESS) { std::cerr << "MPI_Comm_free failed!" << std::endl; - MPI_Abort(comm_, 2); + MPI_Abort(comm_, EXIT_FAILURE); } } if (cart_comm_ != MPI_COMM_NULL) MPI_Comm_free(&cart_comm_); @@ -277,7 +277,7 @@ void PEenv::task2xyz() if (rc != MPI_SUCCESS) { std::cerr << " error in MPI_Cart_coords()!!!" << std::endl; - MPI_Abort(comm_, 1); + MPI_Abort(comm_, EXIT_FAILURE); } #else mytask_dir_[2] = mytask_ % n_mpi_tasks_dir_[2]; @@ -650,7 +650,7 @@ void PEenv::split_comm(const int nx, const int ny, const int nz, const int bias) { std::cerr << "MPI_Comm_split failed!, my color_=" << color_ << std::endl; - MPI_Abort(comm_, 0); + MPI_Abort(comm_, EXIT_FAILURE); } MPI_Comm_size(comm_active_, &n_mpi_tasks_); #ifndef NDEBUG @@ -696,7 +696,7 @@ void PEenv::printPEnames(std::ostream& os) const { std::cerr << "PEenv::printPEnames, MPI_Recv() failed!!!" << std::endl; - MPI_Abort(comm_, 0); + MPI_Abort(comm_, EXIT_FAILURE); } } else if (ip == mytask_) @@ -708,7 +708,7 @@ void PEenv::printPEnames(std::ostream& os) const { std::cerr << "PEenv::printPEnames, MPI_Send() failed!!!" << std::endl; - MPI_Abort(comm_, 0); + MPI_Abort(comm_, EXIT_FAILURE); } } if (mytask_ == 0) @@ -717,7 +717,7 @@ void PEenv::printPEnames(std::ostream& os) const if (mytask_ == 0) os << std::endl; } -void PEenv::globalExit() const { MPI_Abort(comm_, 2); } +void PEenv::globalExit() const { MPI_Abort(comm_, EXIT_FAILURE); } void PEenv::bcast(int* val, const int n) const { diff --git a/src/pb/PEenv.h b/src/pb/PEenv.h index bc4751a5..9782ddb9 100644 --- a/src/pb/PEenv.h +++ b/src/pb/PEenv.h @@ -166,7 +166,7 @@ class PEenv { std::cerr << "ERROR in PEenv::maxXdir()" << std::endl; sleep(5); - MPI_Abort(comm_, 0); + MPI_Abort(comm_, EXIT_FAILURE); } delete[] sendbuf; } @@ -181,7 +181,7 @@ class PEenv { std::cerr << "ERROR in PEenv::maxYdir()" << std::endl; sleep(5); - MPI_Abort(comm_, 0); + MPI_Abort(comm_, EXIT_FAILURE); } delete[] sendbuf; } @@ -196,7 +196,7 @@ class PEenv { std::cerr << "ERROR in PEenv::maxZdir()" << std::endl; sleep(5); - MPI_Abort(comm_, 0); + MPI_Abort(comm_, EXIT_FAILURE); } delete[] sendbuf; } diff --git a/src/radial/RadialMeshFunction.cc b/src/radial/RadialMeshFunction.cc index 499e482a..79b32518 100644 --- a/src/radial/RadialMeshFunction.cc +++ b/src/radial/RadialMeshFunction.cc @@ -98,7 +98,7 @@ void RadialMeshFunction::bcast(MPI_Comm comm, const int root) { (*MPIdata::sout) << "RadialMeshFunction::bcast() failed!!!" << std::endl; - MPI_Abort(comm, 0); + MPI_Abort(comm, EXIT_FAILURE); } for (int i = 0; i < nn[1]; i++) @@ -110,7 +110,7 @@ void RadialMeshFunction::bcast(MPI_Comm comm, const int root) { (*MPIdata::sout) << "RadialMeshFunction::bcast() failed!!!" << std::endl; - MPI_Abort(comm, 0); + MPI_Abort(comm, EXIT_FAILURE); } } } diff --git a/src/setup.cc b/src/setup.cc index 9963bfc5..2ff6d79e 100644 --- a/src/setup.cc +++ b/src/setup.cc @@ -140,7 +140,7 @@ int MGmol::setupLRsFromInput(const std::string filename) if (!tfile->is_open()) { std::cerr << " Unable to open file " << filename << std::endl; - global_exit(0); + mmpi.abort(); } else { @@ -173,7 +173,7 @@ int MGmol::setupConstraintsFromInput(const std::string filename) if (!tfile->is_open()) { std::cerr << " Unable to open file " << filename << std::endl; - global_exit(0); + mmpi.abort(); } else { diff --git a/src/sparse_linear_algebra/DataDistribution.cc b/src/sparse_linear_algebra/DataDistribution.cc index d4b26d93..67bb1319 100644 --- a/src/sparse_linear_algebra/DataDistribution.cc +++ b/src/sparse_linear_algebra/DataDistribution.cc @@ -288,7 +288,7 @@ void DataDistribution::distributeLocalDataWithCommOvlp(const int nsteps, std::cout << "ERROR: " << name_ << ", dir=" << dir << ", remote_size=" << remote_size << ", bsiz=" << bsiz << std::endl; - MPI_Abort(cart_comm_, 0); + MPI_Abort(cart_comm_, EXIT_FAILURE); } // string stamp="DataDistribution ("+name_+"), buffer size checked..."; // printWithTimeStamp(stamp,cout); @@ -302,14 +302,14 @@ void DataDistribution::distributeLocalDataWithCommOvlp(const int nsteps, if (mpircv != MPI_SUCCESS) { std::cout << "ERROR in MPI_Irecv, code=" << mpircv << std::endl; - MPI_Abort(cart_comm_, 0); + MPI_Abort(cart_comm_, EXIT_FAILURE); } int mpisnd = MPI_Isend(packed_buffer.sendBuffer(), siz, MPI_CHAR, dest, 0, cart_comm_, &request[1]); if (mpisnd != MPI_SUCCESS) { std::cout << "ERROR in MPI_Isend, code=" << mpisnd << std::endl; - MPI_Abort(cart_comm_, 0); + MPI_Abort(cart_comm_, EXIT_FAILURE); } /* wait to complete communication */ MPI_Waitall(2, request, MPI_STATUSES_IGNORE); diff --git a/src/tools.cc b/src/tools.cc index 95790c59..aff34933 100644 --- a/src/tools.cc +++ b/src/tools.cc @@ -29,7 +29,7 @@ void noMoreMemory() std::cerr << "Unable to satisfy request for memory for MPI task " << mype << std::endl; Control& ct = *(Control::instance()); - ct.global_exit(3); + ct.global_exit(); } // an atom name should start with a capital letter and end with a number @@ -229,12 +229,12 @@ void printWithTimeStamp(const std::string& string2print, std::ostream& os) if( mpierr!=MPI_SUCCESS ) { cerr << " Error in MPI!!!" << std::endl; - MPI_Abort(mmpi.commGlobal(),1); + MPI_Abort(mmpi.commGlobal(),EXIT_FAILURE); } if( r!=mmpi.size()*s && onpe0 ) { cerr << " Error in barrier: "<& vv, MPI_Comm comm) std::cerr << "ERROR!!!! bcastvv3d(), Failure in MPI_Bcast of 'radii_'!!!" << std::endl; - MPI_Abort(comm, 0); + MPI_Abort(comm, EXIT_FAILURE); } for (int j = 0; j < n; j++) for (short i = 0; i < 3; i++) From 8b84de49791659a4d874c617f71a920fb08e9138 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 24 Feb 2025 15:03:24 -0500 Subject: [PATCH 25/99] Rho and VH restart (#311) * enable restart with consistent rho and VHartree --------- Co-authored-by: Seung Whan Chung --- src/Electrostatic.h | 3 + src/MGmol.h | 4 +- src/Potentials.cc | 6 + src/Potentials.h | 2 + src/md.cc | 13 ++ tests/CMakeLists.txt | 15 ++ tests/RhoVhRestart/h2o.xyz | 6 + tests/RhoVhRestart/md.cfg | 30 ++++ tests/RhoVhRestart/mgmol.cfg | 28 ++++ tests/RhoVhRestart/restart.cfg | 25 +++ tests/RhoVhRestart/test.py | 79 +++++++++ tests/RhoVhRestart/testRhoVhRestart.cc | 220 +++++++++++++++++++++++++ 12 files changed, 430 insertions(+), 1 deletion(-) create mode 100644 tests/RhoVhRestart/h2o.xyz create mode 100644 tests/RhoVhRestart/md.cfg create mode 100644 tests/RhoVhRestart/mgmol.cfg create mode 100644 tests/RhoVhRestart/restart.cfg create mode 100755 tests/RhoVhRestart/test.py create mode 100644 tests/RhoVhRestart/testRhoVhRestart.cc diff --git a/src/Electrostatic.h b/src/Electrostatic.h index 9beee899..06a9e17c 100644 --- a/src/Electrostatic.h +++ b/src/Electrostatic.h @@ -47,6 +47,9 @@ class Electrostatic ~Electrostatic(); static Timer solve_tm() { return solve_tm_; } + pb::GridFunc* getRhoc() { return grhoc_; } + Poisson* getPoissonSolver() { return poisson_solver_; } + void setup(const short max_sweeps); void setupPB(const double e0, const double rho0, const double drho0, Potentials& pot); diff --git a/src/MGmol.h b/src/MGmol.h index bccbc7af..b18465a1 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -150,7 +150,6 @@ class MGmol : public MGmolInterface void initialMasks(); int setupLRsFromInput(const std::string filename); - void setup(); int setupLRs(const std::string input_file) override; int setupFromInput(const std::string input_file) override; int setupConstraintsFromInput(const std::string input_file) override; @@ -177,12 +176,15 @@ class MGmol : public MGmolInterface ~MGmol() override; + void setup(); + /* access functions */ OrbitalsType* getOrbitals() { return current_orbitals_; } std::shared_ptr> getHamiltonian() { return hamiltonian_; } + std::shared_ptr> getRho() { return rho_; } void run() override; diff --git a/src/Potentials.cc b/src/Potentials.cc index ebe42320..96375c6d 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -939,6 +939,12 @@ int Potentials::read(HDFrestart& h5f_file) h5f_file.read_1func_hdf5(vepsilon_.data(), "VDielectric"); } + std::string datasetname("Preceding_Hartree"); + if (h5f_file.checkDataExists(datasetname)) + { + h5f_file.read_1func_hdf5(vh_rho_backup_.data(), datasetname); + } + return 0; } diff --git a/src/Potentials.h b/src/Potentials.h index cb0f8e05..8e9b763c 100644 --- a/src/Potentials.h +++ b/src/Potentials.h @@ -206,6 +206,8 @@ class Potentials */ void backupVh(); + void resetVhRho2Backup() { vh_rho_ = vh_rho_backup_; } + #ifdef HAVE_TRICUBIC void readExternalPot(const string filename, const char type); void setupVextTricubic(); diff --git a/src/md.cc b/src/md.cc index 8ec20074..b1ffb619 100644 --- a/src/md.cc +++ b/src/md.cc @@ -702,6 +702,19 @@ void MGmol::loadRestartFile(const std::string filename) ierr = proj_matrices_->readWFDM(h5file); } + if (h5file.checkDataExists("Preceding_Hartree")) + { + ions_->readRestartPreviousPositions(h5file); + ions_->resetPositionsToPrevious(); + ions_->setup(); + + Potentials& pot = hamiltonian_->potential(); + pot.initialize(*ions_); + if (onpe0) std::cout << "Reset VhRho to backup..." << std::endl; + pot.resetVhRho2Backup(); + electrostat_->setupRhoc(pot.rho_comp()); + } + ierr = h5file.close(); mmpi.allreduce(&ierr, 1, MPI_MIN); if (ierr < 0) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index baf1c451..838ce6a4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -250,6 +250,8 @@ add_executable(testDensityMatrix ${CMAKE_SOURCE_DIR}/src/ReplicatedWorkSpace.cc ${CMAKE_SOURCE_DIR}/src/hdf_tools.cc ${CMAKE_SOURCE_DIR}/tests/ut_magma_main.cc) +add_executable(testRhoVhRestart + ${CMAKE_SOURCE_DIR}/tests/RhoVhRestart/testRhoVhRestart.cc) add_executable(testEnergyAndForces ${CMAKE_SOURCE_DIR}/tests/EnergyAndForces/testEnergyAndForces.cc) add_executable(testWFEnergyAndForces @@ -382,6 +384,17 @@ add_test(NAME testRestartEnergyAndForces ${CMAKE_CURRENT_SOURCE_DIR}/RestartEnergyAndForces/restart.cfg ${CMAKE_CURRENT_SOURCE_DIR}/RestartEnergyAndForces/h2o.xyz ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testRhoVhRestart + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/RhoVhRestart/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_BINARY_DIR}/testRhoVhRestart + ${CMAKE_CURRENT_SOURCE_DIR}/RhoVhRestart/mgmol.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/RhoVhRestart/md.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/RhoVhRestart/restart.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/RhoVhRestart/h2o.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) + if(${MAGMA_FOUND}) add_test(NAME testOpenmpOffload @@ -577,6 +590,7 @@ target_include_directories(testDensityMatrix PRIVATE ${Boost_INCLUDE_DIRS} ${HDF target_include_directories(testGramMatrix PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testAndersonMix PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(testIons PRIVATE ${Boost_INCLUDE_DIRS} ${HDF5_INCLUDE_DIRS}) +target_include_directories(testRhoVhRestart PRIVATE ${Boost_INCLUDE_DIRS}) target_link_libraries(testMPI PRIVATE MPI::MPI_CXX) target_link_libraries(testBlacsContext PRIVATE ${SCALAPACK_LIBRARIES} @@ -589,6 +603,7 @@ target_link_libraries(testDMandEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testRestartEnergyAndForces PRIVATE mgmol_src) target_link_libraries(testIons PRIVATE mgmol_src) target_link_libraries(testDensityMatrix PRIVATE ${HDF5_LIBRARIES}) +target_link_libraries(testRhoVhRestart mgmol_src) if(${MAGMA_FOUND}) target_link_libraries(testDistVector PRIVATE ${SCALAPACK_LIBRARIES} diff --git a/tests/RhoVhRestart/h2o.xyz b/tests/RhoVhRestart/h2o.xyz new file mode 100644 index 00000000..d5171c8b --- /dev/null +++ b/tests/RhoVhRestart/h2o.xyz @@ -0,0 +1,6 @@ +3 + +O 0.00 0.00 0.00 +H -0.76 0.59 0.00 +H 0.76 0.59 0.00 + diff --git a/tests/RhoVhRestart/md.cfg b/tests/RhoVhRestart/md.cfg new file mode 100644 index 00000000..2b8a378b --- /dev/null +++ b/tests/RhoVhRestart/md.cfg @@ -0,0 +1,30 @@ +verbosity=2 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=48 +ny=48 +nz=48 +[Domain] +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=MD +[MD] +num_steps=5 +dt=40. +[Quench] +max_steps=24 +atol=1.e-8 +[Restart] +input_level=4 +input_filename=WF +output_level=4 +output_filename=WF_MD diff --git a/tests/RhoVhRestart/mgmol.cfg b/tests/RhoVhRestart/mgmol.cfg new file mode 100644 index 00000000..eee7f11c --- /dev/null +++ b/tests/RhoVhRestart/mgmol.cfg @@ -0,0 +1,28 @@ +verbosity=2 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=48 +ny=48 +nz=48 +[Domain] +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=QUENCH +[Quench] +max_steps=120 +atol=1.e-8 +[Orbitals] +initial_type=Random +initial_width=1.5 +[Restart] +output_level=4 +output_filename=WF diff --git a/tests/RhoVhRestart/restart.cfg b/tests/RhoVhRestart/restart.cfg new file mode 100644 index 00000000..20f0293a --- /dev/null +++ b/tests/RhoVhRestart/restart.cfg @@ -0,0 +1,25 @@ +verbosity=2 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=48 +ny=48 +nz=48 +[Domain] +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=QUENCH +[Quench] +max_steps=24 +atol=1.e-8 +[Restart] +input_level=4 +input_filename=WF_MD diff --git a/tests/RhoVhRestart/test.py b/tests/RhoVhRestart/test.py new file mode 100755 index 00000000..a34b962f --- /dev/null +++ b/tests/RhoVhRestart/test.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +import sys +import os +import subprocess +import string + +print("Test test_rho_restart...") + +nargs=len(sys.argv) + +mpicmd = sys.argv[1]+" "+sys.argv[2]+" "+sys.argv[3] +for i in range(4,nargs-7): + mpicmd = mpicmd + " "+sys.argv[i] +print("MPI run command: {}".format(mpicmd)) + +mgmol_exe = sys.argv[nargs-7] +test_exe = sys.argv[nargs-6] +input1 = sys.argv[nargs-5] +input2 = sys.argv[nargs-4] +input3 = sys.argv[nargs-3] +coords = sys.argv[nargs-2] +print("coordinates file: %s"%coords) + +#create links to potentials files +dst1 = 'pseudo.H_ONCV_PBE_SG15' +src1 = sys.argv[-1] + '/' + dst1 + +dst2 = 'pseudo.O_ONCV_PBE_SG15' +src2 = sys.argv[-1] + '/' + dst2 + +if not os.path.exists(dst1): + print("Create link to %s"%dst1) + os.symlink(src1, dst1) + +if not os.path.exists(dst2): + print("Create link to %s"%dst2) + os.symlink(src2, dst2) + +#run mgmol to generate initial ground state +command = "{} {} -c {} -i {}".format(mpicmd,mgmol_exe,input1,coords) +print("Run command: {}".format(command)) + +output = subprocess.check_output(command,shell=True) +lines=output.split(b'\n') + +flag=0 +for line in lines: + if line.count(b'Run ended'): + flag=1 + +if flag==0: + print("Initial quench failed to complete!") + sys.exit(1) + +#run MD +command = "{} {} -c {} -i {}".format(mpicmd,mgmol_exe,input2,coords) +print("Run command: {}".format(command)) +output = subprocess.check_output(command,shell=True) +lines=output.split(b'\n') + +flag=0 +for line in lines: + if line.count(b'Run ended'): + flag=1 + +if flag==0: + print("MD failed to complete!") + sys.exit(1) + +#run test +command = "{} {} -c {} -i {}".format(mpicmd,test_exe,input3,coords) +print("Run command: {}".format(command)) +output = subprocess.check_output(command,shell=True) +lines=output.split(b'\n') +for line in lines: + print(line) + +print("Test SUCCESSFUL!") +sys.exit(0) diff --git a/tests/RhoVhRestart/testRhoVhRestart.cc b/tests/RhoVhRestart/testRhoVhRestart.cc new file mode 100644 index 00000000..09da3320 --- /dev/null +++ b/tests/RhoVhRestart/testRhoVhRestart.cc @@ -0,0 +1,220 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "Control.h" +#include "Electrostatic.h" +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "MGmol.h" +#include "MGmol_MPI.h" +#include "MPIdata.h" +#include "Poisson.h" +#include "Potentials.h" +#include "mgmol_run.h" + +#include +#include +#include +#include +#include + +#include +namespace po = boost::program_options; + +template +int testRhoRestart(MGmolInterface* mgmol_) +{ + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + const int rank = mmpi.mypeGlobal(); + + MGmol* mgmol = static_cast*>(mgmol_); + std::shared_ptr> rho = mgmol->getRho(); + + /* save density from the restart file to elsewhere */ + std::vector rho0(rho->rho_[0].size()); + rho0 = rho->rho_[0]; + + /* recompute rho from the orbital */ + rho->update(*mgmol->getOrbitals()); + + /* check if the recomputed density is the same */ + for (int d = 0; d < (int)rho0.size(); d++) + { + double error = abs(rho0[d] - rho->rho_[0][d]) / abs(rho0[d]); + if (error > 1e-10) + { + printf("rank %d, rho[%d]=%.15e, rho0[%d]=%.15e\n", rank, d, + rho->rho_[0][d], d, rho0[d]); + std::cerr << "Density is inconsistent!!!" << std::endl; + return -1; + } + } + if (rank == 0) std::cout << "Density is consistent..." << std::endl; + + return 0; +} + +template +int testPotRestart(MGmolInterface* mgmol_) +{ + Control& ct = *(Control::instance()); + + Mesh* mymesh = Mesh::instance(); + const pb::Grid& mygrid = mymesh->grid(); + + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + const int rank = mmpi.mypeGlobal(); + + MGmol* mgmol = static_cast*>(mgmol_); + Potentials& pot = mgmol->getHamiltonian()->potential(); + Poisson* poisson = mgmol->electrostat_->getPoissonSolver(); + std::shared_ptr> rho = mgmol->getRho(); + + /* GridFunc initialization inputs */ + short bc[3]; + for (int d = 0; d < 3; d++) + bc[d] = ct.bcPoisson[d]; + + /* save potential from the restart file to elsewhere */ + pb::GridFunc vh0_gf(mygrid, bc[0], bc[1], bc[2]); + vh0_gf.assign((pot.vh_rho()).data(), 'd'); + double n = vh0_gf.norm2(); + std::cout << "Norm2 of vh = " << n << std::endl; + + std::vector vh0(pot.size()); + const std::vector& d_vhrho(pot.vh_rho()); + for (int d = 0; d < (int)vh0.size(); d++) + vh0[d] = d_vhrho[d]; + + /* recompute potential */ + pb::GridFunc grho(mygrid, bc[0], bc[1], bc[2]); + grho.assign(&rho->rho_[0][0]); + pb::GridFunc* grhoc = mgmol->electrostat_->getRhoc(); + + poisson->solve(grho, *grhoc); + const pb::GridFunc& vh(poisson->vh()); + + pb::GridFunc error_gf(vh0_gf); + error_gf -= vh; + + double rel_error = error_gf.norm2() / vh0_gf.norm2(); + if (rank == 0) + { + printf("FOM potential relative error: %.3e\n", rel_error); + } + if (rel_error > 1e-9) + { + if (rank == 0) + { + std::cerr << "Potential is inconsistent!!!" << std::endl; + } + return -1; + } + if (rank == 0) std::cout << "Potential is consistent..." << std::endl; + + return 0; +} + +int main(int argc, char** argv) +{ + int mpirc = MPI_Init(&argc, &argv); + if (mpirc != MPI_SUCCESS) + { + std::cerr << "MPI Initialization failed!!!" << std::endl; + MPI_Abort(MPI_COMM_WORLD, 0); + } + + MPI_Comm comm = MPI_COMM_WORLD; + + /* + * Initialize general things, like magma, openmp, IO, ... + */ + mgmol_init(comm); + + /* + * read runtime parameters + */ + std::string input_filename(""); + std::string lrs_filename; + std::string constraints_filename(""); + + float total_spin = 0.; + bool with_spin = false; + + po::variables_map vm; + + // read from PE0 only + if (MPIdata::onpe0) + { + read_config(argc, argv, vm, input_filename, lrs_filename, + constraints_filename, total_spin, with_spin); + } + + MGmol_MPI::setup(comm, std::cout, with_spin); + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + MPI_Comm global_comm = mmpi.commGlobal(); + + /* + * Setup control struct with run time parameters + */ + Control::setup(global_comm, with_spin, total_spin); + Control& ct = *(Control::instance()); + + ct.setOptions(vm); + + int ret = ct.checkOptions(); + if (ret < 0) return ret; + + mmpi.bcastGlobal(input_filename); + mmpi.bcastGlobal(lrs_filename); + + int status = 0; + + // Enter main scope + { + MGmolInterface* mgmol = new MGmol(global_comm, + *MPIdata::sout, input_filename, lrs_filename, constraints_filename); + + mgmol->setup(); + + /* load a restart file */ + MGmol* mgmol_ext + = dynamic_cast*>(mgmol); + mgmol_ext->loadRestartFile(ct.restart_file); + + if (MPIdata::onpe0) + std::cout << "=============================" << std::endl; + if (MPIdata::onpe0) std::cout << "testRhoRestart..." << std::endl; + status = testRhoRestart(mgmol); + if (status < 0) return status; + + if (MPIdata::onpe0) + std::cout << "=============================" << std::endl; + if (MPIdata::onpe0) std::cout << "testPotRestart..." << std::endl; + status = testPotRestart(mgmol); + if (status < 0) return status; + + delete mgmol; + + } // close main scope + + mgmol_finalize(); + + mpirc = MPI_Finalize(); + if (mpirc != MPI_SUCCESS) + { + std::cerr << "MPI Finalize failed!!!" << std::endl; + } + + time_t tt; + time(&tt); + if (onpe0) std::cout << " Run ended at " << ctime(&tt) << std::endl; + + return 0; +} From 8003d70f5758f3cef4d74724f2b7d3a04afe7964 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 3 Mar 2025 21:51:29 -0500 Subject: [PATCH 26/99] Strenghten testIons (#315) --- tests/testIons.cc | 76 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/tests/testIons.cc b/tests/testIons.cc index 30b2ad0c..6bfdd74a 100644 --- a/tests/testIons.cc +++ b/tests/testIons.cc @@ -41,7 +41,7 @@ int main(int argc, char** argv) // read species info from pseudopotential file std::string file_path = argv[1]; std::string filename(file_path + "/pseudo.C_ONCV_PBE_SG15"); - std::cout << "Potential = " << filename << std::endl; + if (myrank == 0) std::cout << "Potential = " << filename << std::endl; sp.read_1species(filename); sp.set_dim_nl(h[0]); @@ -80,13 +80,71 @@ int main(int argc, char** argv) ions.setup(); - std::vector& new_local_ions(ions.local_ions()); + // verify sum of local ions adds up to total number of ions + { + std::vector& new_local_ions(ions.local_ions()); + + int nlocal = new_local_ions.size(); + std::cout << "PE " << myrank << ", nlocal = " << nlocal << std::endl; + + int ntotal = 0; + MPI_Allreduce(&nlocal, &ntotal, 1, MPI_INT, MPI_SUM, comm); + if (ntotal != na) + { + std::cout << "ntotal = " << ntotal << std::endl; + return 1; + } + } + MPI_Barrier(MPI_COMM_WORLD); - int nlocal = new_local_ions.size(); - std::cout << "PE " << myrank << ", nlocal = " << nlocal << std::endl; + // verify some functionalities of class Ions + { + std::vector positions; + std::vector anumbers; + ions.getPositions(positions); + ions.getAtomicNumbers(anumbers); + if (myrank == 0) + { + int i = 0; + for (auto& position : positions) + { + std::cout << position; + if (i % 3 == 2) + std::cout << std::endl; + else + std::cout << " "; + i++; + } + } + MPI_Barrier(MPI_COMM_WORLD); + + // swap x and z + for (size_t i = 0; i < positions.size() - 2; i++) + { + double x = positions[i]; + double z = positions[i + 2]; + positions[i] = z; + positions[i + 2] = x; + } + + ions.setPositions(positions, anumbers); + } - int ntotal = 0; - MPI_Allreduce(&nlocal, &ntotal, 1, MPI_INT, MPI_SUM, comm); + MPI_Barrier(MPI_COMM_WORLD); + { + std::vector& new_local_ions(ions.local_ions()); + + int nlocal = new_local_ions.size(); + std::cout << "PE " << myrank << ", nlocal = " << nlocal << std::endl; + + int ntotal = 0; + MPI_Allreduce(&nlocal, &ntotal, 1, MPI_INT, MPI_SUM, comm); + if (ntotal != na) + { + std::cout << "ntotal = " << ntotal << std::endl; + return 1; + } + } mpirc = MPI_Finalize(); if (mpirc != MPI_SUCCESS) { @@ -94,11 +152,5 @@ int main(int argc, char** argv) return 1; } - if (ntotal != na) - { - std::cout << "ntotal = " << ntotal << std::endl; - return 1; - } - return 0; } From 4efb34c5e2e2283043faa4f8b612e195570806bd Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 3 Mar 2025 21:52:11 -0500 Subject: [PATCH 27/99] More clean up in class Potentials (#316) * remove some unused functions * make use of class MGmol_MPI * change some function names for clarity * fix some minor bug in iterative indexes --- src/MGmol.cc | 9 +++--- src/Potentials.cc | 70 ++++++++++++++--------------------------------- src/Potentials.h | 15 +++------- 3 files changed, 29 insertions(+), 65 deletions(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index 8d87f1c1..c18e58c7 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -400,7 +400,6 @@ int MGmol::initial() current_orbitals_->setDataWithGhosts(); current_orbitals_->trade_boundaries(); - // if(ct.restart_info <= 1)pot.initWithVnuc(); // initialize matrices S and invB if (ct.numst > 0) @@ -1379,14 +1378,14 @@ void MGmol::update_pot(const Ions& ions) const bool flag_mixing = (fabs(ct.mix_pot - 1.) > 1.e-3); - // evaluate potential correction + // update total potential if (flag_mixing) { - pot.delta_v(rho_->rho_); - pot.update(ct.mix_pot); + pot.computeDeltaV(rho_->rho_); + pot.updateVtot(ct.mix_pot); } else - pot.update(rho_->rho_); + pot.updateVtot(rho_->rho_); } template diff --git a/src/Potentials.cc b/src/Potentials.cc index 96375c6d..4da9ab9a 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -87,37 +87,19 @@ Potentials::Potentials() #endif } -void Potentials::initWithVnuc() -{ - assert(size_ > 0); - if (verbosity_level_ > 2 && onpe0) - (*MPIdata::sout) << "Potentials::initWithVnuc()" << std::endl; - itindex_vxc_ = 0; - itindex_vh_ = 0; - int ione = 1; - Tcopy(&size_, &v_nuc_[0], &ione, &vtot_[0], &ione); - double one = 1.; - LinearAlgebraUtils::MPaxpy( - size_, one, &v_ext_[0], &vtot_[0]); - // factor ha2ry to get total potential in [Ry] for calculations - LinearAlgebraUtils::MPscal(size_, ha2ry, &vtot_[0]); -} - double Potentials::max() const { - Mesh* mymesh = Mesh::instance(); - const pb::PEenv& myPEenv = mymesh->peenv(); - double vmax = (*max_element(vtot_.begin(), vtot_.end())); - vmax = myPEenv.double_max_all(vmax); + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + double vmax = (*max_element(vtot_.begin(), vtot_.end())); + vmax = mmpi.allreduce(&vmax, 1, MPI_MAX); return vmax; } double Potentials::min() const { - Mesh* mymesh = Mesh::instance(); - const pb::PEenv& myPEenv = mymesh->peenv(); - double vmin = -(*min_element(vtot_.begin(), vtot_.end())); - vmin = -myPEenv.double_max_all(vmin); + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + double vmin = -(*min_element(vtot_.begin(), vtot_.end())); + vmin = -mmpi.allreduce(&vmin, 1, MPI_MAX); return vmin; } @@ -141,7 +123,7 @@ void Potentials::evalNormDeltaVtotRho( mmpi.allreduce(&scf_dvrho_, 1, MPI_SUM); } -double Potentials::update(const std::vector>& rho) +double Potentials::updateVtot(const std::vector>& rho) { assert(itindex_vxc_ >= 0); assert(itindex_vh_ >= 0); @@ -149,9 +131,8 @@ double Potentials::update(const std::vector>& rho) if (verbosity_level_ > 2 && onpe0) (*MPIdata::sout) << "Potentials::update(rho)" << std::endl; - int ione = 1; - Mesh* mymesh = Mesh::instance(); - const pb::PEenv& myPEenv = mymesh->peenv(); + int ione = 1; + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); // save old potentials Tcopy(&size_, &vtot_[0], &ione, &vtot_old_[0], &ione); @@ -181,23 +162,22 @@ double Potentials::update(const std::vector>& rho) = LinearAlgebraUtils::MPdot(size_, &dv_[0], &dv_[0]); double sum = 0.; - int rc - = MPI_Allreduce(&dvdot, &sum, 1, MPI_DOUBLE, MPI_SUM, myPEenv.comm()); + int rc = mmpi.allreduce(&dvdot, &sum, 1, MPI_SUM); if (rc != MPI_SUCCESS) { std::cerr << "MPI_Allreduce double sum failed!!!" << std::endl; - MPI_Abort(myPEenv.comm(), EXIT_FAILURE); + mmpi.abort(); } dvdot = sum; scf_dv_ = 0.5 * sqrt(dvdot); - const double gsize = (double)size_ * (double)myPEenv.n_mpi_tasks(); + const double gsize = (double)size_ * (double)mmpi.size(); scf_dv_ /= gsize; return scf_dv_; } -void Potentials::update(const double mix) +void Potentials::updateVtot(const double mix) { assert(itindex_vxc_ == itindex_vh_); @@ -210,7 +190,7 @@ void Potentials::update(const double mix) size_, potmix, &dv_[0], &vtot_[0]); } -double Potentials::delta_v(const std::vector>& rho) +double Potentials::computeDeltaV(const std::vector>& rho) { assert(itindex_vxc_ == itindex_vh_); assert(size_ > 0); @@ -218,9 +198,8 @@ double Potentials::delta_v(const std::vector>& rho) if (verbosity_level_ > 2 && onpe0) (*MPIdata::sout) << "Potentials::delta_v()" << std::endl; - int ione = 1; - Mesh* mymesh = Mesh::instance(); - const pb::PEenv& myPEenv = mymesh->peenv(); + int ione = 1; + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); // save old potentials Tcopy(&size_, &vtot_[0], &ione, &vtot_old_[0], &ione); @@ -249,17 +228,16 @@ double Potentials::delta_v(const std::vector>& rho) = LinearAlgebraUtils::MPdot(size_, &dv_[0], &dv_[0]); double sum = 0.; - int rc - = MPI_Allreduce(&dvdot, &sum, 1, MPI_DOUBLE, MPI_SUM, myPEenv.comm()); + int rc = mmpi.allreduce(&dvdot, &sum, 1, MPI_SUM); if (rc != MPI_SUCCESS) { std::cerr << "MPI_Allreduce double sum failed!!!" << std::endl; - MPI_Abort(myPEenv.comm(), EXIT_FAILURE); + mmpi.abort(); } dvdot = sum; scf_dv_ = 0.5 * sqrt(dvdot); - const double gsize = (double)size_ * (double)myPEenv.n_mpi_tasks(); + const double gsize = (double)size_ * (double)mmpi.size(); scf_dv_ /= gsize; return scf_dv_; @@ -437,7 +415,6 @@ void Potentials::readExternalPot(const std::string filename, const char type) { assert(index < size_); (*from) >> v_ext_[index]; - //(*MPIdata::sout)<& vrho) const; /*! * evaluate potential correction associated with a new rho */ - double delta_v(const std::vector>& rho); + double computeDeltaV(const std::vector>& rho); /*! - * update potentials based on argument rho + * update total potential with updated components */ - double update(const std::vector>& rho); + double updateVtot(const std::vector>& rho); /*! * update potentials based on potential correction delta v and mixing * parameter */ - void update(const double mix); + void updateVtot(const double mix); double max() const; double min() const; @@ -196,7 +190,6 @@ class Potentials template void setVxc(const T* const vxc, const int iterativeIndex); - void setVh(const POTDTYPE* const vh, const int iterativeIndex); void setVh(const pb::GridFunc& vh, const int iterativeIndex); void initialize(Ions& ions); From 2f511b775505a127869107d8c90a30cb212ad591 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 14 Mar 2025 17:41:39 -0400 Subject: [PATCH 28/99] Misc code fixes (#317) * exposed by porting on new environment --- src/HDFrestart.cc | 6 ++++++ src/Ions.cc | 2 ++ src/tools/SymmetricMatrix.h | 1 + 3 files changed, 9 insertions(+) diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index 87982290..e2bfa1ab 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -1922,10 +1922,12 @@ int HDFrestart::readAtomicData(std::string datasetname, std::vector& data) // send data to inactive PEs if (gather_data_x_) gatherDataXdir(data); +#ifdef MGMOL_USE_HDF5P if (useHdf5p()) { data.erase(std::remove(data.begin(), data.end(), -1), data.end()); } +#endif return 0; } @@ -1974,10 +1976,12 @@ int HDFrestart::readAtomicData( } } +#ifdef MGMOL_USE_HDF5P if (useHdf5p()) { data.erase(std::remove(data.begin(), data.end(), 1e+32), data.end()); } +#endif if (gather_data_x_) gatherDataXdir(data); return 0; @@ -2104,10 +2108,12 @@ int HDFrestart::readAtomicData( data.push_back(t); } +#ifdef MGMOL_USE_HDF5P if (useHdf5p()) { data.erase(std::remove(data.begin(), data.end(), ""), data.end()); } +#endif return 0; } diff --git a/src/Ions.cc b/src/Ions.cc index 9af0a64e..bc30417b 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -947,10 +947,12 @@ void Ions::initFromRestartFile(HDFrestart& h5_file) assert(at_numbers.size() == at_nlprojIds.size()); num_ions_ = at_names.size(); +#ifdef MGMOL_USE_HDF5P if (!h5_file.useHdf5p()) { mmpi.allreduce(&num_ions_, 1, MPI_SUM); } +#endif if (onpe0 && ct.verbose > 0) { (*MPIdata::sout) << "Ions::setFromRestartFile(), read " << num_ions_ diff --git a/src/tools/SymmetricMatrix.h b/src/tools/SymmetricMatrix.h index bd887fb3..b7aece1e 100644 --- a/src/tools/SymmetricMatrix.h +++ b/src/tools/SymmetricMatrix.h @@ -14,6 +14,7 @@ #include #include #include +#include #include // full lower triangular part of symmetric matrix with compact storage From 45a2c80cd6199da0643adf787d04831650e57d07 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 14 Mar 2025 17:42:00 -0400 Subject: [PATCH 29/99] Use unique restart filenames in test HDF5single (#318) --- tests/HDF5single/md.cfg | 4 ++-- tests/HDF5single/mgmol.cfg | 2 +- tests/HDF5single/test.py | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/HDF5single/md.cfg b/tests/HDF5single/md.cfg index 1ff2adab..8d6b21e2 100644 --- a/tests/HDF5single/md.cfg +++ b/tests/HDF5single/md.cfg @@ -25,10 +25,10 @@ max_steps=24 atol=1.e-8 [Restart] input_level=4 -input_filename=WF +input_filename=wf.h5 input_type=single_file output_level=4 -output_filename=WF_MD +output_filename=wf_md.h5 output_type=single_file [Coloring] scope=global diff --git a/tests/HDF5single/mgmol.cfg b/tests/HDF5single/mgmol.cfg index 4dba942a..e8f01a29 100644 --- a/tests/HDF5single/mgmol.cfg +++ b/tests/HDF5single/mgmol.cfg @@ -25,7 +25,7 @@ initial_type=Random initial_width=1.5 [Restart] output_level=4 -output_filename=WF +output_filename=wf.h5 output_type=single_file [Coloring] scope=global diff --git a/tests/HDF5single/test.py b/tests/HDF5single/test.py index 080ee0ba..e2f0596a 100755 --- a/tests/HDF5single/test.py +++ b/tests/HDF5single/test.py @@ -47,7 +47,7 @@ output = subprocess.check_output(command,shell=True) lines=output.split(b'\n') -os.remove('WF') +os.remove('wf.h5') print("Check energy conservation...") tol = 1.e-4 @@ -71,5 +71,7 @@ print("ERROR needs 4 energy values for checking conservation!") sys.exit(1) +os.remove('wf_md.h5') + print("Test SUCCESSFUL!") sys.exit(0) From b6cf162eac41663473659d44bd04c087d56e5fdc Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Sat, 15 Mar 2025 09:12:08 -0400 Subject: [PATCH 30/99] Swap ions in existing test (#319) * strengthen testing by swapping ions in testRestartEnergyAndForces --- src/MGmol.h | 5 +++ src/MGmolInterface.h | 3 ++ tests/RestartEnergyAndForces/test.py | 6 ++++ .../testRestartEnergyAndForces.cc | 32 +++++++++++++++++++ 4 files changed, 46 insertions(+) diff --git a/src/MGmol.h b/src/MGmol.h index b18465a1..350e8511 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -341,6 +341,11 @@ class MGmol : public MGmolInterface { forces_->force(orbitals, ions); } + void setPositions(const std::vector& positions, + const std::vector& atnumbers) + { + ions_->setPositions(positions, atnumbers); + } /* * simply dump current state diff --git a/src/MGmolInterface.h b/src/MGmolInterface.h index 9a9bf8a6..dc54a8e7 100644 --- a/src/MGmolInterface.h +++ b/src/MGmolInterface.h @@ -40,6 +40,9 @@ class MGmolInterface virtual void getAtomicPositions(std::vector& tau) = 0; virtual void getAtomicNumbers(std::vector& an) = 0; + virtual void setPositions(const std::vector& positions, + const std::vector& atnumbers) + = 0; virtual std::shared_ptr getProjectedMatrices() = 0; virtual void dumpRestart() = 0; diff --git a/tests/RestartEnergyAndForces/test.py b/tests/RestartEnergyAndForces/test.py index b62d39f8..585722f0 100755 --- a/tests/RestartEnergyAndForces/test.py +++ b/tests/RestartEnergyAndForces/test.py @@ -64,7 +64,13 @@ shutil.rmtree('WF') test_energy=1.e18 +l=-1 for line in lines: + if line.count(b'Positions'): + l=0 + if l>=0 and l<4: + print(line) + l=l+1 if line.count(b'%%'): print(line) words=line.split() diff --git a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc index e323afbc..3027ab55 100644 --- a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc +++ b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc @@ -124,6 +124,8 @@ int main(int argc, char** argv) } } + mgmol->setPositions(positions, anumbers); + Mesh* mymesh = Mesh::instance(); const pb::Grid& mygrid = mymesh->grid(); const pb::PEenv& myPEenv = mymesh->peenv(); @@ -165,6 +167,36 @@ int main(int argc, char** argv) projmatrices->setDMuniform(ct.getNelSpin(), 0); projmatrices->printDM(std::cout); + // swap H and O to make sure order of atoms in list does not matter + double x = positions[0]; + double y = positions[1]; + double z = positions[2]; + positions[0] = positions[3]; + positions[1] = positions[4]; + positions[2] = positions[5]; + positions[3] = x; + positions[4] = y; + positions[5] = z; + short tmp = anumbers[0]; + anumbers[0] = anumbers[1]; + anumbers[1] = tmp; + if (MPIdata::onpe0) + { + std::cout << "Positions:" << std::endl; + std::vector::iterator ita = anumbers.begin(); + for (std::vector::iterator it = positions.begin(); + it != positions.end(); it += 3) + { + std::cout << *ita; + for (int i = 0; i < 3; i++) + std::cout << " " << *(it + i); + std::cout << std::endl; + ita++; + } + } + + mgmol->setPositions(positions, anumbers); + // // evaluate energy and forces with wavefunctions just read // From 7fbfe133c1ebd4f0932c9464e4e512ef8228d8d3 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 17 Mar 2025 13:44:30 -0400 Subject: [PATCH 31/99] Code clean up (#320) * use more const * use initNuc() instead of moveVnuc() * remove incorrect assert * use MGmol_MPI in more cases * setup g_kbpsi_ inside initNuc() * rename initNuc() into setupPotentials() --- src/HDFrestart.cc | 1 - src/KBPsiMatrixSparse.cc | 45 +++++++++++++++++++--------------------- src/KBPsiMatrixSparse.h | 15 ++++++-------- src/MGmol.cc | 18 ++++++++-------- src/MGmol.h | 2 +- src/md.cc | 2 +- src/quench.cc | 1 - 7 files changed, 38 insertions(+), 46 deletions(-) diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index e2bfa1ab..1b2c7fb7 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -2104,7 +2104,6 @@ int HDFrestart::readAtomicData( stripLeadingAndTrailingBlanks(t); // std::cout<<"stripped name="< 0); data.push_back(t); } diff --git a/src/KBPsiMatrixSparse.cc b/src/KBPsiMatrixSparse.cc index 604e2f8e..8f5b3d49 100644 --- a/src/KBPsiMatrixSparse.cc +++ b/src/KBPsiMatrixSparse.cc @@ -11,16 +11,15 @@ #include "Control.h" #include "ExtendedGridOrbitals.h" -#include "Ions.h" #include "LocGridOrbitals.h" #include "MGmol_MPI.h" #include "Mesh.h" #include "ProjectedMatrices.h" -#include "ProjectedMatricesSparse.h" #include "ReplicatedMatrix.h" #include "SquareSubMatrix2DistMatrix.h" #include + #define Ry2Ha 0.5; Timer KBPsiMatrixSparse::global_sum_tm_("KBPsiMatrixSparse::global_sum"); @@ -133,7 +132,7 @@ void KBPsiMatrixSparse::globalSumKBpsi() // Loop over the ions with projectors overlapping with local subdomain // and evaluate for some state. template -void KBPsiMatrixSparse::computeKBpsi(Ions& ions, T& orbitals, +void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, T& orbitals, const int first_color, const int nb_colors, const bool flag) { assert(first_color >= 0); @@ -189,7 +188,7 @@ void KBPsiMatrixSparse::computeKBpsi(Ions& ions, T& orbitals, // Loop over the ions if (gid != -1) { - for (auto ion : ions.overlappingNL_ions()) + for (const auto& ion : ions.overlappingNL_ions()) { computeLocalElement( *ion, gid, iloc, ppsi + color * ldsize, flag); @@ -211,8 +210,8 @@ void KBPsiMatrixSparse::computeKBpsi(Ions& ions, T& orbitals, compute_kbpsi_tm_.stop(); } -void KBPsiMatrixSparse::computeKBpsi( - Ions& ions, pb::GridFunc* phi, const int istate, const bool flag) +void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, + pb::GridFunc* phi, const int istate, const bool flag) { assert(lapop_ != nullptr); compute_kbpsi_tm_.start(); @@ -238,7 +237,7 @@ void KBPsiMatrixSparse::computeKBpsi( for (int iloc = 0; iloc < subdivx; iloc++) { // Loop over the ions - for (auto ion : ions.overlappingNL_ions()) + for (const auto& ion : ions.overlappingNL_ions()) { computeLocalElement(*ion, istate, iloc, ppsi, flag); } @@ -251,7 +250,7 @@ void KBPsiMatrixSparse::computeKBpsi( void KBPsiMatrixSparse::scaleWithKBcoeff(const Ions& ions) { - for (auto ion : ions.overlappingNL_ions()) + for (const auto& ion : ions.overlappingNL_ions()) { std::vector gids; ion->getGidsNLprojs(gids); @@ -266,8 +265,8 @@ void KBPsiMatrixSparse::scaleWithKBcoeff(const Ions& ions) // loop over states to multiply kbpsi_[st][gid] and kbBpsi_[st][gid] // by coeff - (*kbpsimat_).scaleRow(gid, coeff); - if (lapop_) (*kbBpsimat_).scaleRow(gid, coeff); + kbpsimat_->scaleRow(gid, coeff); + if (lapop_) kbBpsimat_->scaleRow(gid, coeff); } } } @@ -454,7 +453,7 @@ SquareSubMatrix KBPsiMatrixSparse::computeHvnlMatrix( // Loop over ions centered on current PE only // (distribution of work AND Hvnlij contributions) - for (auto ion : ions.local_ions()) + for (const auto& ion : ions.local_ions()) { computeHvnlMatrix((KBPsiMatrixSparse*)kbpsi2, *ion, Aij); } @@ -473,7 +472,7 @@ void KBPsiMatrixSparse::computeHvnlMatrix( // Loop over ions centered on current PE only // (distribution of work AND Hvnlij contributions) - for (auto ion : ions.local_ions()) + for (const auto& ion : ions.local_ions()) { computeHvnlMatrix((KBPsiMatrixSparse*)kbpsi2, *ion, mat); } @@ -538,14 +537,14 @@ void KBPsiMatrixSparse::getPsiKBPsiSym( { // loop over all the ions // parallelization over ions by including only those centered in subdomain - for (auto& ion : ions.local_ions()) + for (const auto& ion : ions.local_ions()) { getPsiKBPsiSym(*ion, sm); } } template -void KBPsiMatrixSparse::computeAll(Ions& ions, T& orbitals) +void KBPsiMatrixSparse::computeAll(const Ions& ions, T& orbitals) { assert(count_proj_subdomain_ == ions.countProjectorsSubdomain()); @@ -603,7 +602,7 @@ double KBPsiMatrixSparse::getEvnl( double trace = 0.0; // loop over all the ions // parallelization over ions by including only those centered in subdomain - for (auto& ion : ions.local_ions()) + for (const auto& ion : ions.local_ions()) { std::vector gids; ion->getGidsNLprojs(gids); @@ -618,10 +617,9 @@ double KBPsiMatrixSparse::getEvnl( /* gather trace result */ MGmol_MPI& mmpi = *(MGmol_MPI::instance()); - MPI_Comm comm = mmpi.commSpin(); double evnl = 0.0; - MPI_Allreduce(&trace, &evnl, 1, MPI_DOUBLE, MPI_SUM, comm); + mmpi.allreduce(&trace, &evnl, 1, MPI_SUM); return evnl * Ry2Ha; } @@ -637,7 +635,7 @@ double KBPsiMatrixSparse::getEvnl(const Ions& ions, double trace = 0.0; // loop over all the ions // parallelization over ions by including only those centered in subdomain - for (auto& ion : ions.local_ions()) + for (const auto& ion : ions.local_ions()) { std::vector gids; ion->getGidsNLprojs(gids); @@ -652,10 +650,9 @@ double KBPsiMatrixSparse::getEvnl(const Ions& ions, /* gather trace result */ MGmol_MPI& mmpi = *(MGmol_MPI::instance()); - MPI_Comm comm = mmpi.commSpin(); double evnl = 0.0; - MPI_Allreduce(&trace, &evnl, 1, MPI_DOUBLE, MPI_SUM, comm); + mmpi.allreduce(&trace, &evnl, 1, MPI_SUM); return evnl * Ry2Ha; } @@ -736,12 +733,12 @@ double KBPsiMatrixSparse::getTraceDM( return trace; } -template void KBPsiMatrixSparse::computeKBpsi(Ions& ions, +template void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, LocGridOrbitals& orbitals, const int first_color, const int nb_colors, const bool flag); -template void KBPsiMatrixSparse::computeAll(Ions&, LocGridOrbitals&); +template void KBPsiMatrixSparse::computeAll(const Ions&, LocGridOrbitals&); -template void KBPsiMatrixSparse::computeKBpsi(Ions& ions, +template void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, ExtendedGridOrbitals& orbitals, const int first_color, const int nb_colors, const bool flag); -template void KBPsiMatrixSparse::computeAll(Ions&, ExtendedGridOrbitals&); +template void KBPsiMatrixSparse::computeAll(const Ions&, ExtendedGridOrbitals&); diff --git a/src/KBPsiMatrixSparse.h b/src/KBPsiMatrixSparse.h index 91f6c583..073fbddc 100644 --- a/src/KBPsiMatrixSparse.h +++ b/src/KBPsiMatrixSparse.h @@ -12,7 +12,9 @@ #include "DataDistribution.h" #include "DensityMatrixSparse.h" +#include "Ions.h" #include "KBPsiMatrixInterface.h" +#include "ProjectedMatricesSparse.h" #include "SquareSubMatrix.h" #include "VariableSizeMatrix.h" @@ -23,11 +25,6 @@ #include #include -class Ions; -class Ion; -class ProjectedMatricesInterface; -class ProjectedMatricesSparse; - class KBPsiMatrixSparse : public KBPsiMatrixInterface { static Timer global_sum_tm_; @@ -79,8 +76,8 @@ class KBPsiMatrixSparse : public KBPsiMatrixInterface void getPsiKBPsiSym(const Ions& ions, VariableSizeMatrix& sm); void getPsiKBPsiSym(const Ion& ion, VariableSizeMatrix& sm); template - void computeKBpsi(Ions& ions, OrbitalsType& orbitals, const int first_color, - const int nb_colors, const bool flag); + void computeKBpsi(const Ions& ions, OrbitalsType& orbitals, + const int first_color, const int nb_colors, const bool flag); void clearData(); public: @@ -105,7 +102,7 @@ class KBPsiMatrixSparse : public KBPsiMatrixInterface double getEvnl( const Ions& ions, ProjectedMatrices* proj_matrices); void computeKBpsi( - Ions& ions, pb::GridFunc*, const int, const bool flag); + const Ions& ions, pb::GridFunc*, const int, const bool flag); double getValIonState(const int gid, const int st) const { return (*kbpsimat_).get_value(gid, st); @@ -127,7 +124,7 @@ class KBPsiMatrixSparse : public KBPsiMatrixInterface const Ions& ions, ProjectedMatricesInterface* proj_matrices) const; template - void computeAll(Ions& ions, T& orbitals); + void computeAll(const Ions& ions, T& orbitals); void setup(const Ions& ions); double getTraceDM( diff --git a/src/MGmol.cc b/src/MGmol.cc index c18e58c7..8d516b24 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -358,8 +358,8 @@ int MGmol::initial() } // Initialize the nuclear local potential and the compensating charges - if (ct.verbose > 0) printWithTimeStamp("initNuc()...", os_); - initNuc(*ions_); + if (ct.verbose > 0) printWithTimeStamp("setupPotentials()...", os_); + setupPotentials(*ions_); // initialize Rho if (ct.verbose > 0) printWithTimeStamp("Initialize Rho...", os_); @@ -415,9 +415,6 @@ int MGmol::initial() current_orbitals_->checkCond(100000., ct.AtomsMove()); } - if (ct.verbose > 0) printWithTimeStamp("Setup kbpsi...", os_); - g_kbpsi_->setup(*ions_); - if (ct.restart_info == 0) { if (ct.verbose > 0) printWithTimeStamp("update_pot...", os_); @@ -817,7 +814,7 @@ double get_trilinval(const double xc, const double yc, const double zc, #endif template -void MGmol::initNuc(Ions& ions) +void MGmol::setupPotentials(Ions& ions) { init_nuc_tm_.start(); @@ -829,9 +826,12 @@ void MGmol::initNuc(Ions& ions) // initialize poentials based on ionic positions and their species pot.initialize(ions); + if (ct.verbose > 0) printWithTimeStamp("Setup kbpsi...", os_); + g_kbpsi_->setup(*ions_); + electrostat_->setupRhoc(pot.rho_comp()); - if (onpe0 && ct.verbose > 3) os_ << " initNuc done" << std::endl; + if (onpe0 && ct.verbose > 3) os_ << " setupPotentials done" << std::endl; init_nuc_tm_.stop(); } @@ -1428,7 +1428,7 @@ double MGmol::evaluateEnergyAndForces(Orbitals* orbitals, ions_->setPositions(tau, atnumbers); - moveVnuc(*ions_); + setupPotentials(*ions_); double eks = 0.; OrbitalsType* dorbitals = dynamic_cast(orbitals); @@ -1450,7 +1450,7 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, ions_->setPositions(tau, atnumbers); - moveVnuc(*ions_); + setupPotentials(*ions_); // initialize electronic density rho_->update(*dorbitals); diff --git a/src/MGmol.h b/src/MGmol.h index 350e8511..12197dfd 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -220,7 +220,7 @@ class MGmol : public MGmolInterface void getAtomicNumbers(std::vector& an); - void initNuc(Ions& ions); + void setupPotentials(Ions& ions); void initKBR(); void printEigAndOcc(); diff --git a/src/md.cc b/src/md.cc index b1ffb619..edccdb17 100644 --- a/src/md.cc +++ b/src/md.cc @@ -59,7 +59,7 @@ void MGmol::moveVnuc(Ions& ions) // Update items that change when the ionic coordinates change pot.axpVcompToVh(1.); - initNuc(ions); + setupPotentials(ions); pot.axpVcompToVh(-1.); proj_matrices_->setHiterativeIndex(-1, -1); diff --git a/src/quench.cc b/src/quench.cc index 59a16d50..073798f6 100644 --- a/src/quench.cc +++ b/src/quench.cc @@ -545,7 +545,6 @@ int MGmol::quench(OrbitalsType& orbitals, Ions& ions, // get actual indexes of stored functions const std::vector>& gids(orbitals.getOverlappingGids()); - g_kbpsi_->setup(*ions_); electrostat_->setup(ct.vh_its); rho_->setup(ct.getOrthoType(), gids); From 14eb43ff48c5f0eaf6ccad0e819536dcc3e08ee6 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 21 Mar 2025 07:54:45 -0400 Subject: [PATCH 32/99] Added functionalities to set local forces (#321) --- src/Ions.cc | 39 +++++++++++++++++++++++++++++++++++++++ src/Ions.h | 9 +++++++++ tests/testIons.cc | 45 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/Ions.cc b/src/Ions.cc index bc30417b..10baad10 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -1288,6 +1288,29 @@ void Ions::writeForces(HDFrestart& h5f_file) } } +void Ions::setLocalForces( + const std::vector& forces, const std::vector& names) +{ + assert(forces.size() == 3 * names.size()); + + // loop over global list of forces and atom names + std::vector::const_iterator s = names.begin(); + for (auto it = forces.begin(); it != forces.end(); it += 3) + { + // find possible matching ion + for (auto& ion : local_ions_) + { + if (ion->compareName(*s)) + { + ion->set_force(0, *it); + ion->set_force(1, *(it + 1)); + ion->set_force(2, *(it + 2)); + } + } + s++; + } +} + // Writes out the postions of the ions and the current forces on them by root void Ions::printForcesGlobal(std::ostream& os, const int root) const { @@ -2187,6 +2210,22 @@ void Ions::getLocalPositions(std::vector& tau) const } } +void Ions::getLocalNames(std::vector& names) const +{ + for (auto& ion : local_ions_) + { + names.push_back(ion->name()); + } +} + +void Ions::getNames(std::vector& names) const +{ + for (auto& ion : list_ions_) + { + names.push_back(ion->name()); + } +} + void Ions::getPositions(std::vector& tau) { std::vector tau_local(3 * local_ions_.size()); diff --git a/src/Ions.h b/src/Ions.h index b03972d1..33b8982b 100644 --- a/src/Ions.h +++ b/src/Ions.h @@ -285,11 +285,20 @@ class Ions const std::vector& tau, const std::vector& anumbers); void getLocalPositions(std::vector& tau) const; + void getLocalNames(std::vector& names) const; + void getNames(std::vector& names) const; void getPositions(std::vector& tau); void getAtomicNumbers(std::vector& atnumbers); void getForces(std::vector& forces); void getLocalForces(std::vector& tau) const; + + /*! + * set forces for ions in local_ions_ based on names matching + */ + void setLocalForces(const std::vector& forces, + const std::vector& names); + void syncData(const std::vector& sp); // void syncNames(const int nions, std::vector& local_names, // std::vector& names); diff --git a/tests/testIons.cc b/tests/testIons.cc index 6bfdd74a..e23e0e24 100644 --- a/tests/testIons.cc +++ b/tests/testIons.cc @@ -8,6 +8,8 @@ int main(int argc, char** argv) { + int status = 0; + int mpirc = MPI_Init(&argc, &argv); MPI_Comm comm = MPI_COMM_WORLD; @@ -92,7 +94,7 @@ int main(int argc, char** argv) if (ntotal != na) { std::cout << "ntotal = " << ntotal << std::endl; - return 1; + status = 1; } } MPI_Barrier(MPI_COMM_WORLD); @@ -141,16 +143,49 @@ int main(int argc, char** argv) MPI_Allreduce(&nlocal, &ntotal, 1, MPI_INT, MPI_SUM, comm); if (ntotal != na) { - std::cout << "ntotal = " << ntotal << std::endl; - return 1; + std::cerr << "ntotal = " << ntotal << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + } + } + + // get the names of all the ions + std::vector names; + ions.getNames(names); + if (myrank == 0) + for (auto& name : names) + std::cout << "name = " << name << std::endl; + if (names.size() != na) + { + std::cerr << "Incorrect count of names..." << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + } + MPI_Barrier(MPI_COMM_WORLD); + + std::vector forces(3 * na); + // arbitrary value + const double fval = 1.12; + for (auto& f : forces) + f = fval; + ions.setLocalForces(forces, names); + + int nlocal = ions.getNumLocIons(); + std::vector lforces(3 * nlocal); + ions.getLocalForces(lforces); + for (auto& f : lforces) + { + if (std::abs(f - fval) > 1.e-14) + { + std::cerr << "f = " << f << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } } + mpirc = MPI_Finalize(); if (mpirc != MPI_SUCCESS) { std::cerr << "MPI Finalize failed!!!" << std::endl; - return 1; + status = 1; } - return 0; + return status; } From c61b48d2dabcdbe291b1545eeba338c56889cc40 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 24 Mar 2025 13:35:03 -0400 Subject: [PATCH 33/99] Update mixed precision code (#322) * enable cmake build with mixed precision * enable use of BLIS * isolate blas3 loop rewrite in separate files * fix misc issues with mixed precision code --- CMakeLists.txt | 15 +- scripts/build_ubuntu22_mixedp.sh | 32 ++ src/BlockVector.cc | 4 +- src/ExtendedGridOrbitals.cc | 29 +- src/LocGridOrbitals.cc | 14 +- src/MGmol.cc | 2 + src/Rho.cc | 2 +- src/global.h | 2 +- src/linear_algebra/CMakeLists.txt | 2 +- src/linear_algebra/MGmol_blas1.h | 14 +- src/linear_algebra/gemm_impl.cc | 251 ++++++++++++++ src/linear_algebra/gemm_impl.h | 14 + src/linear_algebra/mputils.cc | 550 ++++++++---------------------- src/linear_algebra/mputils.h | 23 +- src/linear_algebra/syrk_impl.cc | 227 ++++++++++++ src/linear_algebra/syrk_impl.h | 13 + src/mgmol_run.cc | 8 + tests/CMakeLists.txt | 30 ++ 18 files changed, 768 insertions(+), 464 deletions(-) create mode 100755 scripts/build_ubuntu22_mixedp.sh create mode 100644 src/linear_algebra/gemm_impl.cc create mode 100644 src/linear_algebra/gemm_impl.h create mode 100644 src/linear_algebra/syrk_impl.cc create mode 100644 src/linear_algebra/syrk_impl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 753a455d..e931b5cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,12 @@ else() find_package(OpenMP) endif() +# Use mixed precision +set(MGMOL_WITH_MP FALSE CACHE BOOL "Compile with mixed precision") +if(MGMOL_WITH_MP) + add_definitions(-DMGMOL_USE_MIXEDP) +endif() + # HDF5 (required) set(MGMOL_USE_HDF5P TRUE CACHE BOOL "Use HDF5 parallel capability") if(HDF5_LIBRARIES) #user sets libraries explicitly @@ -100,9 +106,16 @@ if(NOT BLAS_LIBRARIES) message(FATAL_ERROR "Required blas library not found.") endif(NOT ${BLAS_FOUND}) else(NOT BLAS_LIBRARIES) - message(STATUS "BLAS_LIBARIES: ${BLAS_LIBRARIES}") + message(STATUS "BLAS_LIBARIES set to ${BLAS_LIBRARIES}") endif(NOT BLAS_LIBRARIES) +set(MGMOL_WITH_BLIS FALSE CACHE BOOL "Use BLIS library") +if(MGMOL_WITH_BLIS) + add_definitions(-DMGMOL_USE_BLIS) + message(STATUS "BLIS_LIBRARIES include: ${BLIS_INCLUDE_DIRS}") + include_directories("${BLIS_INCLUDE_DIRS}") +endif() + if(NOT LAPACK_LIBRARIES) find_package(LAPACK REQUIRED) message(STATUS "LAPACK_LIBARIES: ${LAPACK_LIBRARIES}") diff --git a/scripts/build_ubuntu22_mixedp.sh b/scripts/build_ubuntu22_mixedp.sh new file mode 100755 index 00000000..d85ab9a5 --- /dev/null +++ b/scripts/build_ubuntu22_mixedp.sh @@ -0,0 +1,32 @@ +#/bin/bash +MGMOL_ROOT=`pwd` + +INSTALL_DIR=${MGMOL_ROOT}/mgmol_install +mkdir -p ${INSTALL_DIR} + +BUILD_DIR=${MGMOL_ROOT}/build +mkdir -p ${BUILD_DIR} +cd ${BUILD_DIR} + +# call cmake +cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_CXX_COMPILER=mpiCC.openmpi \ + -DMGMOL_WITH_BLIS=ON \ + -DBLAS_LIBRARIES=/home/q8j/blis/lib/libblis.so \ + -DBLIS_INCLUDE_DIRS="/home/q8j/blis//include" \ + -DLAPACK_LIBRARIES=/usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so \ + -DCMAKE_Fortran_COMPILER=mpif77.openmpi \ + -DSCALAPACK_LIBRARY=/usr/lib/x86_64-linux-gnu/libscalapack-openmpi.so.2.1 \ + -DMPIEXEC_EXECUTABLE=/usr/bin/mpirun \ + -DMPIEXEC_NUMPROC_FLAG="-np" \ + -DMPIEXEC_PREFLAGS="--oversubscribe" \ + -DMGMOL_WITH_CLANG_FORMAT=ON \ + -DMGMOL_WITH_MP=ON \ + -DCMAKE_PREFIX_PATH=${HOME}/bin \ + -D CMAKE_CXX_FLAGS="-Wall" \ + .. + +# call make install +make -j 4 +make install diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 9a411ebe..671ba90d 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -556,7 +556,7 @@ template void BlockVector::setDataWithGhosts( pb::GridFuncVector* data_wghosts); template void BlockVector::setDataWithGhosts( pb::GridFuncVector* data_wghosts); -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP template class BlockVector; template void BlockVector::assign( const pb::GridFuncVector& src); @@ -586,7 +586,7 @@ template void BlockVector::setDataWithGhosts( pb::GridFuncVector* data_wghosts); template void BlockVector::setDataWithGhosts( pb::GridFuncVector* data_wghosts); -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP template class BlockVector; template void BlockVector::assign( const pb::GridFuncVector& src); diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 07face7c..3e887dad 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -926,10 +926,10 @@ void ExtendedGridOrbitals::computeMatB( MATDTYPE* ssiloc = ss.getRawPtr(iloc); // calculate nf columns of ssiloc - MPgemmTN(numst_, nf, loc_numpt_, 1., - orbitals_psi_host_view + iloc * loc_numpt_, lda_, - work + iloc * loc_numpt_, lda_, 0., ssiloc + icolor * numst_, - numst_); + LinearAlgebraUtils::MPgemmTN(numst_, nf, + loc_numpt_, 1., orbitals_psi_host_view + iloc * loc_numpt_, + lda_, work + iloc * loc_numpt_, lda_, 0., + ssiloc + icolor * numst_, numst_); } } @@ -970,7 +970,7 @@ void ExtendedGridOrbitals::getLocalOverlap( if (numst_ != 0) { -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP getLocalOverlap(*this, ss); #else ORBDTYPE* psi = block_vector_.vect(0); @@ -1041,21 +1041,12 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, const int lda = transpose ? ld : lda_; const int ldb = transpose ? lda_ : ld; -#ifdef USE_MP - // use temporary float data for matrix ss - LocalMatrices ssf(ss.nmat(), ss.m(), ss.n()); -#else - LocalMatrices& ssf(ss); -#endif for (short iloc = 0; iloc < subdivx_; iloc++) { - LinearAlgebraUtils::MPgemm('T', 'N', numst_, numst_, + LinearAlgebraUtils::MPgemmTN(numst_, numst_, loc_numpt_, 1., a + iloc * loc_numpt_, lda, b + +iloc * loc_numpt_, - ldb, 0., ssf.getRawPtr(iloc), ssf.m()); + ldb, 0., ss.getRawPtr(iloc), ss.m()); } -#ifdef USE_MP - ss.copy(ssf); -#endif ss.scal(grid_.vel()); } @@ -1709,9 +1700,9 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( Apsi.getPsi(0, iloc), phi_size, phi_host_view); // TODO this can be done on the GPU - MPgemmTN(numst_, numst_, loc_numpt_, vel, - block_vector_host_view + iloc * loc_numpt_, lda_, phi_host_view, - lda_, 1., work.data(), numst_); + LinearAlgebraUtils::MPgemmTN(numst_, numst_, + loc_numpt_, vel, block_vector_host_view + iloc * loc_numpt_, lda_, + phi_host_view, lda_, 1., work.data(), numst_); MemorySpace::Memory::free_host_view( phi_host_view); diff --git a/src/LocGridOrbitals.cc b/src/LocGridOrbitals.cc index 4a1b9442..3d14e051 100644 --- a/src/LocGridOrbitals.cc +++ b/src/LocGridOrbitals.cc @@ -1448,8 +1448,8 @@ void LocGridOrbitals::computeMatB( MATDTYPE* ssiloc = ss.getRawPtr(iloc); // calculate nf columns of ssiloc - MPgemmTN(chromatic_number_, nf, loc_numpt_, 1., - orbitals_psi + iloc * loc_numpt_, lda_, + LinearAlgebraUtils::MPgemmTN(chromatic_number_, + nf, loc_numpt_, 1., orbitals_psi + iloc * loc_numpt_, lda_, work + iloc * loc_numpt_, lda_, 0., ssiloc + icolor * chromatic_number_, chromatic_number_); } @@ -1490,7 +1490,7 @@ void LocGridOrbitals::getLocalOverlap( if (chromatic_number_ != 0) { -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP getLocalOverlap(*this, ss); #else const ORBDTYPE* const psi = block_vector_.vect(0); @@ -1560,7 +1560,7 @@ void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, MemorySpace::Memory::copy_view_to_host( const_cast(b), b_size, b_host_view); -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP // use temporary float data for matrix ss LocalMatrices ssf(ss.nmat(), ss.m(), ss.n()); #else @@ -1575,7 +1575,7 @@ void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, a_host_view); MemorySpace::Memory::free_host_view( b_host_view); -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP ss.copy(ssf); #endif @@ -2495,8 +2495,8 @@ void LocGridOrbitals::addDotWithNcol2Matrix( { MATDTYPE* ssiloc = ss.getRawPtr(iloc); - // TODO - MPgemmTN(chromatic_number_, chromatic_number_, loc_numpt_, vel, + LinearAlgebraUtils::MPgemmTN(chromatic_number_, + chromatic_number_, loc_numpt_, vel, block_vector_.vect(0) + iloc * loc_numpt_, lda_, Apsi.getPsi(0, iloc), lda_, 0., ssiloc, chromatic_number_); } diff --git a/src/MGmol.cc b/src/MGmol.cc index 8d516b24..4d69387d 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -87,6 +87,7 @@ extern Timer sgemm_tm; extern Timer dgemm_tm; extern Timer mpgemm_tm; extern Timer tttgemm_tm; +extern Timer bligemm_tm; extern Timer dsyrk_tm; extern Timer ssyrk_tm; extern Timer mpsyrk_tm; @@ -864,6 +865,7 @@ void MGmol::printTimers() dgemm_tm.print(os_); mpgemm_tm.print(os_); tttgemm_tm.print(os_); + bligemm_tm.print(os_); ssyrk_tm.print(os_); dsyrk_tm.print(os_); diff --git a/src/Rho.cc b/src/Rho.cc index 45a41f3e..e80e74fe 100644 --- a/src/Rho.cc +++ b/src/Rho.cc @@ -588,7 +588,7 @@ Rho::computeRho>( ExtendedGridOrbitals&, const dist_matrix::DistMatrix&); template void Rho::computeRho>( LocGridOrbitals&, const dist_matrix::DistMatrix&); -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP template double Rho::dotWithRho( const float* const func) const; #endif diff --git a/src/global.h b/src/global.h index de6c8559..a23e4e3b 100644 --- a/src/global.h +++ b/src/global.h @@ -15,7 +15,7 @@ //#include "mgmol_memory.h" -#ifdef USE_MP +#ifdef MGMOL_USE_MIXEDP typedef float ORBDTYPE; #else typedef double ORBDTYPE; diff --git a/src/linear_algebra/CMakeLists.txt b/src/linear_algebra/CMakeLists.txt index 8bd5ef2f..0178f539 100644 --- a/src/linear_algebra/CMakeLists.txt +++ b/src/linear_algebra/CMakeLists.txt @@ -1,4 +1,4 @@ -set(SOURCES mputils.cc) +set(SOURCES mputils.cc gemm_impl.cc syrk_impl.cc) add_library(mgmol_linear_algebra ${SOURCES}) target_link_libraries(mgmol_linear_algebra PUBLIC MPI::MPI_CXX) if(${MGMOL_WITH_MAGMA}) diff --git a/src/linear_algebra/MGmol_blas1.h b/src/linear_algebra/MGmol_blas1.h index 41204226..32431468 100644 --- a/src/linear_algebra/MGmol_blas1.h +++ b/src/linear_algebra/MGmol_blas1.h @@ -11,6 +11,9 @@ #define MGMOL_MYBLAS1_H #include "fc_mangle.h" +#ifdef MGMOL_USE_BLIS +#include +#endif #include #include @@ -18,6 +21,8 @@ #define MY_VERSION 0 #define EPSILON 1.e-12 +#ifndef MGMOL_USE_BLIS + #ifdef __cplusplus extern "C" { @@ -42,12 +47,17 @@ extern "C" float SNRM2(const int* const, const float* const, const int* const); int IDAMAX(const int* const, const double* const, const int* const); int ISAMAX(const int* const, const float* const, const int* const); - void DROT(int*, double*, int*, double*, int*, double*, double*); - void SROT(int*, float*, int*, float*, int*, float*, float*); + void DROT(const int*, double*, const int*, double*, const int*, + const double*, const double*); + void SROT(const int*, float*, const int*, float*, const int*, const float*, + const float*); + #ifdef __cplusplus } #endif +#endif // MGMOL_USE_BLIS + inline void my_daxpy( const int n, const double alpha, const double* const a, double* b) { diff --git a/src/linear_algebra/gemm_impl.cc b/src/linear_algebra/gemm_impl.cc new file mode 100644 index 00000000..3500410c --- /dev/null +++ b/src/linear_algebra/gemm_impl.cc @@ -0,0 +1,251 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "gemm_impl.h" +#include "Timer.h" +#include "mputils.h" + +#include +#include + +Timer mpgemm_tm("mpgemm"); +Timer tttgemm_tm("tttgemm"); + +using LAU_H = LinearAlgebraUtils; + +template +void gemm_impl(const char transa, const char transb, const int m, const int n, + const int k, const double alpha, const T1* const a, const int lda, + const T2* const b, const int ldb, const double beta, T3* const c, + const int ldc) +{ + tttgemm_tm.start(); + // std::cout<<"template MPgemm..."< buff(m); + for (int j = 0; j < n; j++) + { + std::fill(buff.begin(), buff.end(), 0.); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const T1* colL = a + lda * l; + /* get multiplier */ + double mult = (double)(alpha * b[ldb * j + l]); + LAU_H::MPaxpy(m, mult, colL, buff.data()); + } + /* Update col j of of result matrix C. */ + /* Get pointer to beginning of column j in C. */ + T3* cj = c + ldc * j; + LAU_H::MPscal(m, beta, cj); + for (int i = 0; i < m; i++) + { + cj[i] += (T3)buff[i]; + } + } + } + else /* transa == 'T'/'C' */ + { + for (int j = 0; j < n; j++) + { + const T2* __restrict__ bj = b + ldb * j; + for (int i = 0; i < m; i++) + { + const int pos = ldc * j + i; + double bc = static_cast(c[pos]) * beta; + const T1* __restrict__ ai = a + lda * i; + c[pos] + = static_cast(alpha * LAU_H::MPdot(k, ai, bj) + bc); + } + } + } + } + else /* transb == 'T'/'C' */ + { + if (transa == 'N' || transa == 'n') + { + /* buffer to hold accumulation in double */ + std::vector buff(m); + for (int j = 0; j < n; j++) + { + std::fill(buff.begin(), buff.end(), 0.); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const T1* colL = a + lda * l; + /* get multiplier */ + double mult = (double)(alpha * b[ldb * l + j]); + LAU_H::MPaxpy(m, mult, colL, buff.data()); + } + /* Update col j of of result matrix C. */ + /* Get pointer to beginning of column j in C. */ + T3* cj = c + ldc * j; + LAU_H::MPscal(m, beta, cj); + for (int i = 0; i < m; i++) + { + cj[i] += (T3)buff[i]; + } + } + } + else /* transa == 'T'/'C' */ + { + for (int j = 0; j < n; j++) + { + for (int i = 0; i < m; i++) + { + const int pos = ldc * j + i; + const T1* ai = a + lda * i; + double sum = 0.; + for (int l = 0; l < k; l++) + { + sum += alpha * ai[l] * b[ldb * l + j]; + } + sum += (double)(beta * c[pos]); + c[pos] = (T3)sum; + } + } + } + } + + tttgemm_tm.stop(); +} + +// input/output in float, computation in double +template <> +void gemm_impl(const char transa, const char transb, + const int m, const int n, const int k, const double alpha, + const float* const a, const int lda, const float* const b, const int ldb, + const double beta, float* const c, const int ldc) +{ + mpgemm_tm.start(); + + if (beta == 1. && (alpha == 0. || m == 0 || n == 0 || k == 0)) return; + + /* case transb == 'N' and transa == 'N' */ + if (transb == 'N' || transb == 'n') + { + if (transa == 'N' || transa == 'n') + { + /* buffer to hold accumulation in double */ + std::vector buff(m); + for (int j = 0; j < n; j++) + { + std::fill(buff.begin(), buff.end(), 0); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const float* colL = a + lda * l; + /* get multiplier */ + double mult = (double)(alpha * b[ldb * j + l]); + LAU_H::MPaxpy(m, mult, colL, buff.data()); + } + /* Update col j of of result matrix C. */ + /* Get pointer to beginning of column j in C. */ + float* cj = c + ldc * j; + LAU_H::MPscal(m, beta, cj); + for (int i = 0; i < m; i++) + cj[i] += (float)buff[i]; + } + } + else /* transa == 'T'/'C' */ + { + for (int j = 0; j < n; j++) + { + const float* __restrict__ bj = b + ldb * j; + for (int i = 0; i < m; i++) + { + const int pos = ldc * j + i; + double bc = (double)c[pos] * beta; + const float* __restrict__ ai = a + lda * i; + c[pos] = (float)(alpha * LAU_H::MPdot(k, ai, bj) + bc); + } + } + } + } + else /* transb == 'T'/'C' */ + { + if (transa == 'N' || transa == 'n') + { + /* buffer to hold accumulation in double */ + std::vector buff(m); + for (int j = 0; j < n; j++) + { + std::fill(buff.begin(), buff.end(), 0); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const float* colL = a + lda * l; + /* get multiplier */ + double mult = (double)(alpha * b[ldb * l + j]); + LAU_H::MPaxpy(m, mult, colL, buff.data()); + } + /* Update col j of of result matrix C. */ + /* Get pointer to beginning of column j in C. */ + float* cj = c + ldc * j; + LAU_H::MPscal(m, beta, cj); + for (int i = 0; i < m; i++) + cj[i] += (float)buff[i]; + } + } + else /* transa == 'T'/'C' */ + { + for (int j = 0; j < n; j++) + { + for (int i = 0; i < m; i++) + { + const int pos = ldc * j + i; + const float* ai = a + lda * i; + double sum = 0.; + for (int l = 0; l < k; l++) + { + sum += alpha * ai[l] * b[ldb * l + j]; + } + sum += (double)(beta * c[pos]); + c[pos] = (float)sum; + } + } + } + } + + mpgemm_tm.stop(); +} + +template void gemm_impl(const char transa, + const char transb, const int m, const int n, const int k, + const double alpha, const double* const a, const int lda, + const float* const b, const int ldb, const double beta, double* const c, + const int ldc); +template void gemm_impl(const char transa, + const char transb, const int m, const int n, const int k, + const double alpha, const float* const a, const int lda, + const double* const b, const int ldb, const double beta, float* const c, + const int ldc); +template void gemm_impl(const char transa, + const char transb, const int m, const int n, const int k, + const double alpha, const double* const a, const int lda, + const double* const b, const int ldb, const double beta, float* const c, + const int ldc); +template void gemm_impl(const char transa, + const char transb, const int m, const int n, const int k, + const double alpha, const float* const a, const int lda, + const float* const b, const int ldb, const double beta, double* const c, + const int ldc); +template void gemm_impl(const char transa, + const char transb, const int m, const int n, const int k, + const double alpha, const float* const a, const int lda, + const float* const b, const int ldb, const double beta, float* const c, + const int ldc); diff --git a/src/linear_algebra/gemm_impl.h b/src/linear_algebra/gemm_impl.h new file mode 100644 index 00000000..5140b143 --- /dev/null +++ b/src/linear_algebra/gemm_impl.h @@ -0,0 +1,14 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +template +void gemm_impl(const char transa, const char transb, const int m, const int n, + const int k, const double alpha, const T1* const a, const int lda, + const T2* const b, const int ldb, const double beta, T3* const c, + const int ldc); diff --git a/src/linear_algebra/mputils.cc b/src/linear_algebra/mputils.cc index 77b08689..487d1367 100644 --- a/src/linear_algebra/mputils.cc +++ b/src/linear_algebra/mputils.cc @@ -16,19 +16,27 @@ #include #endif +#include "MGmol_blas1.h" +#include "gemm_impl.h" +#include "syrk_impl.h" + +#ifdef MGMOL_USE_BLIS +#include +#else +#include "blas2_c.h" +#include "blas3_c.h" +#endif + #include #include #include Timer dgemm_tm("dgemm"); Timer sgemm_tm("sgemm"); -Timer mpgemm_tm("mpgemm"); -Timer tttgemm_tm("tttgemm"); +Timer bligemm_tm("bligemm"); Timer dsyrk_tm("dsyrk"); Timer ssyrk_tm("ssyrk"); -Timer mpsyrk_tm("mpsyrk"); -Timer tttsyrk_tm("tttsyrk"); Timer mpdot_tm("mpdot"); Timer ttdot_tm("ttdot"); @@ -304,106 +312,6 @@ void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, dsyrk_tm.stop(); } -template <> -void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, - const double alpha, const float* const a, const int lda, const double beta, - float* c, const int ldc) -{ - MemorySpace::assert_is_host_ptr(a); - MemorySpace::assert_is_host_ptr(c); - - mpsyrk_tm.start(); - - if (beta == 1. && (alpha == 0. || n == 0 || k == 0)) return; - - /* case Trans == 'N' */ - if (trans == 'N' || trans == 'n') - { - /* buffer to hold accumulation in double */ - std::vector buff(n); - if (uplo == 'U' || uplo == 'u') - { - for (int j = 0; j < n; j++) - { - const int len = j + 1; - std::fill(buff.begin(), buff.begin() + len, 0.); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const float* colL = a + lda * l; - /* get multiplier */ - double mult = static_cast( - alpha * colL[j]); // same as alpha * a[lda*l + j]; - LAU_H::MPaxpy(len, mult, colL, buff.data()); - } - /* Update col j of upper part of matrix C. */ - /* Get pointer to beginning of column j in C. */ - float* cj = c + ldc * j; - LAU_H::MPscal(len, beta, cj); - for (int i = 0; i < len; i++) - cj[i] += static_cast(buff[i]); - } - } - else /* uplo = 'L' or 'l' */ - { - for (int j = 0; j < n; j++) - { - const int len = n - (j + 1); - std::fill(buff.begin(), buff.begin() + len, 0.); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const float* colL = a + lda * l + j; - /* get multiplier */ - double mult = static_cast( - alpha * colL[0]); // same as alpha * a[lda*l + j]; - LAU_H::MPaxpy(len, mult, colL, buff.data()); - } - /* Update col j of upper part of matrix C. */ - /* Get pointer to beginning of column j in C. */ - float* cj = c + ldc * j + j; - LAU_H::MPscal(len, beta, cj); - for (int i = 0; i < len; i++) - cj[i] += static_cast(buff[i]); - } - } - } - else /* Trans == 'T' or 'C' */ - { - if (uplo == 'U' || uplo == 'u') - { - for (int j = 0; j < n; j++) - { - const float* __restrict__ aj = a + lda * j; - for (int i = 0; i < j; i++) - { - const int pos = ldc * j + i; - const float* __restrict__ ai = a + lda * i; - double bc = static_cast(c[pos]) * beta; - c[pos] = static_cast( - alpha * LAU_H::MPdot(k, ai, aj) + bc); - } - } - } - else /* uplo = 'L' or 'l' */ - { - for (int j = 0; j < n; j++) - { - const float* __restrict__ aj = a + lda * j; - for (int i = j; i < n; i++) - { - const int pos = ldc * j + i; - const float* __restrict__ ai = a + lda * i; - double bc = static_cast(c[pos]) * beta; - c[pos] = static_cast( - alpha * LAU_H::MPdot(k, ai, aj) + bc); - } - } - } - } - mpsyrk_tm.stop(); -} - template <> template void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, @@ -413,97 +321,7 @@ void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, MemorySpace::assert_is_host_ptr(a); MemorySpace::assert_is_host_ptr(c); - tttsyrk_tm.start(); - - if (beta == 1. && (alpha == 0. || n == 0 || k == 0)) return; - - /* case Trans == 'N' */ - if (trans == 'N' || trans == 'n') - { - /* buffer to hold accumulation in double */ - std::vector buff(n); - if (uplo == 'U' || uplo == 'u') - { - for (int j = 0; j < n; j++) - { - const int len = j + 1; - std::fill(buff.begin(), buff.begin() + len, 0.); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const T1* colL = a + lda * l; - /* get multiplier */ - double mult = static_cast( - alpha * colL[j]); // same as alpha * a[lda*l + j]; - LAU_H::MPaxpy(len, mult, colL, buff.data()); - } - /* Update col j of upper part of matrix C. */ - /* Get pointer to beginning of column j in C. */ - T2* cj = c + ldc * j; - LAU_H::MPscal(len, beta, cj); - for (int i = 0; i < len; i++) - cj[i] += (T2)buff[i]; - } - } - else /* uplo = 'L' or 'l' */ - { - for (int j = 0; j < n; j++) - { - const int len = n - (j + 1); - std::fill(buff.begin(), buff.begin() + len, 0.); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const T1* colL = a + lda * l + j; - /* get multiplier */ - double mult = static_cast( - alpha * colL[0]); // same as alpha * a[lda*l + j]; - LAU_H::MPaxpy(len, mult, colL, buff.data()); - } - /* Update col j of upper part of matrix C. */ - /* Get pointer to beginning of column j in C. */ - T2* cj = c + ldc * j + j; - LAU_H::MPscal(len, beta, cj); - for (int i = 0; i < len; i++) - cj[i] += (T2)buff[i]; - } - } - } - else /* Trans == 'T' or 'C' */ - { - if (uplo == 'U' || uplo == 'u') - { - for (int j = 0; j < n; j++) - { - const T1* __restrict__ aj = a + lda * j; - for (int i = 0; i < j; i++) - { - const int pos = ldc * j + i; - const T1* __restrict__ ai = a + lda * i; - double bc = static_cast(c[pos]) * beta; - c[pos] - = static_cast(alpha * LAU_H::MPdot(k, ai, aj) + bc); - } - } - } - else /* uplo = 'L' or 'l' */ - { - for (int j = 0; j < n; j++) - { - const T1* __restrict__ aj = a + lda * j; - for (int i = j; i < n; i++) - { - const int pos = ldc * j + i; - const T1* __restrict__ ai = a + lda * i; - double bc = static_cast(c[pos]) * beta; - c[pos] - = static_cast(alpha * LAU_H::MPdot(k, ai, aj) + bc); - } - } - } - } - - tttsyrk_tm.stop(); + syrk_impl(uplo, trans, n, k, alpha, a, lda, beta, c, ldc); } // MemorySpace::Device @@ -725,103 +543,7 @@ void LAU_H::MPgemm(const char transa, const char transb, const int m, MemorySpace::assert_is_host_ptr(b); MemorySpace::assert_is_host_ptr(c); - tttgemm_tm.start(); - // if(onpe0)cout<<"template MPgemm..."< buff(m); - for (int j = 0; j < n; j++) - { - std::fill(buff.begin(), buff.end(), 0.); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const T1* colL = a + lda * l; - /* get multiplier */ - double mult = (double)(alpha * b[ldb * j + l]); - LAU_H::MPaxpy(m, mult, colL, buff.data()); - } - /* Update col j of of result matrix C. */ - /* Get pointer to beginning of column j in C. */ - T3* cj = c + ldc * j; - LAU_H::MPscal(m, beta, cj); - for (int i = 0; i < m; i++) - { - cj[i] += (T3)buff[i]; - } - } - } - else /* transa == 'T'/'C' */ - { - for (int j = 0; j < n; j++) - { - const T2* __restrict__ bj = b + ldb * j; - for (int i = 0; i < m; i++) - { - const int pos = ldc * j + i; - double bc = static_cast(c[pos]) * beta; - const T1* __restrict__ ai = a + lda * i; - c[pos] - = static_cast(alpha * LAU_H::MPdot(k, ai, bj) + bc); - } - } - } - } - else /* transb == 'T'/'C' */ - { - if (transa == 'N' || transa == 'n') - { - /* buffer to hold accumulation in double */ - std::vector buff(m); - for (int j = 0; j < n; j++) - { - std::fill(buff.begin(), buff.end(), 0.); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const T1* colL = a + lda * l; - /* get multiplier */ - double mult = (double)(alpha * b[ldb * l + j]); - LAU_H::MPaxpy(m, mult, colL, buff.data()); - } - /* Update col j of of result matrix C. */ - /* Get pointer to beginning of column j in C. */ - T3* cj = c + ldc * j; - LAU_H::MPscal(m, beta, cj); - for (int i = 0; i < m; i++) - { - cj[i] += (T3)buff[i]; - } - } - } - else /* transa == 'T'/'C' */ - { - for (int j = 0; j < n; j++) - { - for (int i = 0; i < m; i++) - { - const int pos = ldc * j + i; - const T1* ai = a + lda * i; - double sum = 0.; - for (int l = 0; l < k; l++) - { - sum += alpha * ai[l] * b[ldb * l + j]; - } - sum += (double)(beta * c[pos]); - c[pos] = (T3)sum; - } - } - } - } - - tttgemm_tm.stop(); + gemm_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } // input/output in double, computation in double @@ -842,111 +564,6 @@ void LAU_H::MPgemm(const char transa, const char transb, dgemm_tm.stop(); } -// input/output in float, computation in double -template <> -template <> -void LAU_H::MPgemm(const char transa, const char transb, - const int m, const int n, const int k, const double alpha, - const float* const a, const int lda, const float* const b, const int ldb, - const double beta, float* const c, const int ldc) -{ - MemorySpace::assert_is_host_ptr(a); - MemorySpace::assert_is_host_ptr(b); - MemorySpace::assert_is_host_ptr(c); - - mpgemm_tm.start(); - - if (beta == 1. && (alpha == 0. || m == 0 || n == 0 || k == 0)) return; - - /* case transb == 'N' and transa == 'N' */ - if (transb == 'N' || transb == 'n') - { - if (transa == 'N' || transa == 'n') - { - /* buffer to hold accumulation in double */ - std::vector buff(m); - for (int j = 0; j < n; j++) - { - std::fill(buff.begin(), buff.end(), 0); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const float* colL = a + lda * l; - /* get multiplier */ - double mult = (double)(alpha * b[ldb * j + l]); - LAU_H::MPaxpy(m, mult, colL, buff.data()); - } - /* Update col j of of result matrix C. */ - /* Get pointer to beginning of column j in C. */ - float* cj = c + ldc * j; - LAU_H::MPscal(m, beta, cj); - for (int i = 0; i < m; i++) - cj[i] += (float)buff[i]; - } - } - else /* transa == 'T'/'C' */ - { - for (int j = 0; j < n; j++) - { - const float* __restrict__ bj = b + ldb * j; - for (int i = 0; i < m; i++) - { - const int pos = ldc * j + i; - double bc = (double)c[pos] * beta; - const float* __restrict__ ai = a + lda * i; - c[pos] = (float)(alpha * MPdot(k, ai, bj) + bc); - } - } - } - } - else /* transb == 'T'/'C' */ - { - if (transa == 'N' || transa == 'n') - { - /* buffer to hold accumulation in double */ - std::vector buff(m); - for (int j = 0; j < n; j++) - { - std::fill(buff.begin(), buff.end(), 0); - for (int l = 0; l < k; l++) - { - /* pointer to beginning of column l in matrix a */ - const float* colL = a + lda * l; - /* get multiplier */ - double mult = (double)(alpha * b[ldb * l + j]); - LAU_H::MPaxpy(m, mult, colL, buff.data()); - } - /* Update col j of of result matrix C. */ - /* Get pointer to beginning of column j in C. */ - float* cj = c + ldc * j; - LAU_H::MPscal(m, beta, cj); - for (int i = 0; i < m; i++) - cj[i] += (float)buff[i]; - } - } - else /* transa == 'T'/'C' */ - { - for (int j = 0; j < n; j++) - { - for (int i = 0; i < m; i++) - { - const int pos = ldc * j + i; - const float* ai = a + lda * i; - double sum = 0.; - for (int l = 0; l < k; l++) - { - sum += alpha * ai[l] * b[ldb * l + j]; - } - sum += (double)(beta * c[pos]); - c[pos] = (float)sum; - } - } - } - } - - mpgemm_tm.stop(); -} - // MemorySpace::Device #ifdef HAVE_MAGMA template <> @@ -1014,10 +631,6 @@ void LAU_D::MPgemm(const char transa, const char transb, const int m, } #endif -/////////////////////////////// -// MPgemmNN // -/////////////////////////////// - template template void LinearAlgebraUtils::MPgemmNN(const int m, const int n, @@ -1031,19 +644,122 @@ void LinearAlgebraUtils::MPgemmNN(const int m, const int n, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } +template <> +template <> +void LAU_H::MPgemmNN(const int m, const int n, const int k, const double alpha, + const float* const a, const int lda, const double* const b, const int ldb, + const double beta, float* const c, const int ldc) +{ + MemorySpace::assert_is_host_ptr(a); + MemorySpace::assert_is_host_ptr(b); + MemorySpace::assert_is_host_ptr(c); + +#ifdef MGMOL_USE_BLIS + bligemm_tm.start(); + + // Create matrix objects + // When storing by columns, the row stride is 1 + // When storing by columns, the column stride is also sometimes called the + // leading dimension + obj_t A; + bli_obj_create_with_attached_buffer( + BLIS_FLOAT, m, k, const_cast(a), 1, lda, &A); + + obj_t B; + bli_obj_create_with_attached_buffer( + BLIS_DOUBLE, k, n, const_cast(b), 1, ldb, &B); + + obj_t C; + bli_obj_create_with_attached_buffer( + BLIS_FLOAT, m, n, const_cast(c), 1, ldc, &C); + + obj_t bli_alpha; + bli_obj_create_1x1(BLIS_DOUBLE, &bli_alpha); + bli_setsc(alpha, 0., &bli_alpha); + + obj_t bli_beta; + bli_obj_create_1x1(BLIS_DOUBLE, &bli_beta); + bli_setsc(beta, 0., &bli_beta); + + // accumulate results in double precision + bli_obj_set_comp_prec(BLIS_DOUBLE_PREC, &C); + + bli_gemm(&bli_alpha, &A, &B, &bli_beta, &C); + + // Clean up BLIS objects + bli_obj_free(&bli_alpha); + bli_obj_free(&bli_beta); + + bligemm_tm.stop(); +#else + char transa = 'n'; + char transb = 'n'; + + LAU_H::MPgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +#endif +} + // input in float, computation in double -void MPgemmTN(const int m, const int n, const int k, const double alpha, +template <> +template <> +void LAU_H::MPgemmTN(const int m, const int n, const int k, const double alpha, const float* const a, const int lda, const float* const b, const int ldb, const double beta, double* const c, const int ldc) { + // std::cout << "LAU_H::MPgemmTN" << std::endl; + MemorySpace::assert_is_host_ptr(a); + MemorySpace::assert_is_host_ptr(b); + MemorySpace::assert_is_host_ptr(c); + +#ifdef MGMOL_USE_BLIS + bligemm_tm.start(); + + // Create matrix objects + // When storing by columns, the row stride is 1 + // When storing by columns, the column stride is also sometimes called the + // leading dimension + obj_t A; + bli_obj_create_with_attached_buffer( + BLIS_FLOAT, k, m, const_cast(a), 1, lda, &A); + bli_obj_toggle_trans(&A); + + obj_t B; + bli_obj_create_with_attached_buffer( + BLIS_FLOAT, k, n, const_cast(b), 1, ldb, &B); + obj_t C; + bli_obj_create_with_attached_buffer( + BLIS_DOUBLE, m, n, const_cast(c), 1, ldc, &C); + + obj_t bli_alpha; + bli_obj_create_1x1(BLIS_DOUBLE, &bli_alpha); + bli_setsc(alpha, 0., &bli_alpha); + + obj_t bli_beta; + bli_obj_create_1x1(BLIS_DOUBLE, &bli_beta); + bli_setsc(beta, 0., &bli_beta); + + // accumulate results in double precision + // dafault: precision of C + bli_obj_set_comp_prec(BLIS_DOUBLE_PREC, &C); + bli_gemm(&bli_alpha, &A, &B, &bli_beta, &C); + + // Clean up BLIS objects + bli_obj_free(&bli_alpha); + bli_obj_free(&bli_beta); + + bligemm_tm.stop(); +#else char transa = 't'; char transb = 'n'; LAU_H::MPgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +#endif } // input in float, computation in double -void MPgemmTN(const int m, const int n, const int k, const double alpha, +template <> +template <> +void LAU_H::MPgemmTN(const int m, const int n, const int k, const double alpha, const float* const a, const int lda, const float* const b, const int ldb, const double beta, float* const c, const int ldc) { @@ -1053,12 +769,12 @@ void MPgemmTN(const int m, const int n, const int k, const double alpha, LAU_H::MPgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -/////// additional calls ... may be removed later if unused - +template template -void MPgemmTN(const int m, const int n, const int k, const double alpha, - const T1* const a, const int lda, const T2* const b, const int ldb, - const double beta, T3* const c, const int ldc) +void LinearAlgebraUtils::MPgemmTN(const int m, const int n, + const int k, const double alpha, const T1* const a, const int lda, + const T2* const b, const int ldb, const double beta, T3* const c, + const int ldc) { // if(onpe0)cout<<"template MPgemmNN..."<(const char transa, const double alpha, const float* const a, const int lda, const float* const b, const int ldb, const double beta, double* const c, const int ldc); -template void LAU_H::MPgemmNN(const int m, const int n, - const int k, const double alpha, const float* const a, const int lda, - const double* const b, const int ldb, const double beta, float* const c, +template void LAU_H::MPgemm(const char transa, + const char transb, const int m, const int n, const int k, + const double alpha, const float* const a, const int lda, + const float* const b, const int ldb, const double beta, float* const c, const int ldc); + +// template void LAU_H::MPgemmNN(const int m, const int n, +// const int k, const double alpha, const float* const a, const int lda, +// const double* const b, const int ldb, const double beta, float* const c, +// const int ldc); template void LAU_H::MPgemmNN(const int m, const int n, const int k, const double alpha, const double* const a, const int lda, const double* const b, const int ldb, const double beta, double* const c, @@ -1127,14 +849,18 @@ template void LAU_H::MPaxpy(const int len, const double scal, const float* __restrict__ xptr, double* __restrict__ yptr); template void LAU_H::MPaxpy(const int len, const double scal, const float* __restrict__ xptr, float* __restrict__ yptr); + template void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, const double alpha, const double* const a, const int lda, const double beta, float* c, const int ldc); template void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, const double alpha, const float* const a, const int lda, const double beta, double* c, const int ldc); +template void LAU_H::MPsyrk(const char uplo, const char trans, + const int n, const int k, const double alpha, const float* const a, + const int lda, const double beta, float* c, const int ldc); -template void MPgemmTN(const int m, const int n, +template void LAU_H::MPgemmTN(const int m, const int n, const int k, const double alpha, const double* const a, const int lda, const double* const b, const int ldb, const double beta, double* const c, const int ldc); diff --git a/src/linear_algebra/mputils.h b/src/linear_algebra/mputils.h index 17bd0b4b..fa838f17 100644 --- a/src/linear_algebra/mputils.h +++ b/src/linear_algebra/mputils.h @@ -11,8 +11,6 @@ #define MGMOL_MPUTILS_H #include "MGmol_blas1.h" -#include "blas2_c.h" -#include "blas3_c.h" #include "memory_space.h" /* scal */ @@ -100,6 +98,11 @@ struct LinearAlgebraUtils const double alpha, const T1* const a, const int lda, const T2* const b, const int ldb, const double beta, T3* const c, const int ldc); + template + static void MPgemmTN(const int m, const int n, const int k, + const double alpha, const T1* const a, const int lda, const T2* const b, + const int ldb, const double beta, T3* const c, const int ldc); + /* mixed-precision scalar times vector. Accumulates results * in double precision and stores as single precision. */ @@ -125,28 +128,12 @@ struct LinearAlgebraUtils static void MPsyrk(const char uplo, const char trans, const int n, const int k, const double alpha, const double* const a, const int lda, const double beta, double* c, const int ldc); - static void MPsyrk(const char uplo, const char trans, const int n, - const int k, const double alpha, const float* const a, const int lda, - const double beta, float* c, const int ldc); template static void MPsyrk(const char uplo, const char trans, const int n, const int k, const double alpha, const T1* const a, const int lda, const double beta, T2* c, const int ldc); }; -void MPgemmTN(const int m, const int n, const int k, const double alpha, - const float* const a, const int lda, const float* const b, const int ldb, - const double beta, double* const c, const int ldc); - -void MPgemmTN(const int m, const int n, const int k, const double alpha, - const float* const a, const int lda, const float* const b, const int ldb, - const double beta, float* const c, const int ldc); - -template -void MPgemmTN(const int m, const int n, const int k, const double alpha, - const T1* const a, const int lda, const T2* const b, const int ldb, - const double beta, T3* const c, const int ldc); - /* trsm */ void Ttrsm(const char, const char, const char, const char, const int, const int, const double, const double* const, const int, double* const, const int); diff --git a/src/linear_algebra/syrk_impl.cc b/src/linear_algebra/syrk_impl.cc new file mode 100644 index 00000000..e1a4833f --- /dev/null +++ b/src/linear_algebra/syrk_impl.cc @@ -0,0 +1,227 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "Timer.h" +#include "mputils.h" + +#include "MGmol_blas1.h" +#include "syrk_impl.h" + +#include "blas3_c.h" + +#include +#include +#include + +Timer mpsyrk_tm("mpsyrk"); +Timer tttsyrk_tm("tttsyrk"); + +using LAU_H = LinearAlgebraUtils; + +template <> +void syrk_impl(const char uplo, const char trans, const int n, const int k, + const double alpha, const float* const a, const int lda, const double beta, + float* c, const int ldc) +{ + mpsyrk_tm.start(); + + if (beta == 1. && (alpha == 0. || n == 0 || k == 0)) return; + + /* case Trans == 'N' */ + if (trans == 'N' || trans == 'n') + { + /* buffer to hold accumulation in double */ + std::vector buff(n); + if (uplo == 'U' || uplo == 'u') + { + for (int j = 0; j < n; j++) + { + const int len = j + 1; + std::fill(buff.begin(), buff.begin() + len, 0.); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const float* colL = a + lda * l; + /* get multiplier */ + double mult = static_cast( + alpha * colL[j]); // same as alpha * a[lda*l + j]; + LAU_H::MPaxpy(len, mult, colL, buff.data()); + } + /* Update col j of upper part of matrix C. */ + /* Get pointer to beginning of column j in C. */ + float* cj = c + ldc * j; + LAU_H::MPscal(len, beta, cj); + for (int i = 0; i < len; i++) + cj[i] += static_cast(buff[i]); + } + } + else /* uplo = 'L' or 'l' */ + { + for (int j = 0; j < n; j++) + { + const int len = n - (j + 1); + std::fill(buff.begin(), buff.begin() + len, 0.); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const float* colL = a + lda * l + j; + /* get multiplier */ + double mult = static_cast( + alpha * colL[0]); // same as alpha * a[lda*l + j]; + LAU_H::MPaxpy(len, mult, colL, buff.data()); + } + /* Update col j of upper part of matrix C. */ + /* Get pointer to beginning of column j in C. */ + float* cj = c + ldc * j + j; + LAU_H::MPscal(len, beta, cj); + for (int i = 0; i < len; i++) + cj[i] += static_cast(buff[i]); + } + } + } + else /* Trans == 'T' or 'C' */ + { + if (uplo == 'U' || uplo == 'u') + { + for (int j = 0; j < n; j++) + { + const float* __restrict__ aj = a + lda * j; + for (int i = 0; i < j; i++) + { + const int pos = ldc * j + i; + const float* __restrict__ ai = a + lda * i; + double bc = static_cast(c[pos]) * beta; + c[pos] = static_cast( + alpha * LAU_H::MPdot(k, ai, aj) + bc); + } + } + } + else /* uplo = 'L' or 'l' */ + { + for (int j = 0; j < n; j++) + { + const float* __restrict__ aj = a + lda * j; + for (int i = j; i < n; i++) + { + const int pos = ldc * j + i; + const float* __restrict__ ai = a + lda * i; + double bc = static_cast(c[pos]) * beta; + c[pos] = static_cast( + alpha * LAU_H::MPdot(k, ai, aj) + bc); + } + } + } + } + mpsyrk_tm.stop(); +} + +template +void syrk_impl(const char uplo, const char trans, const int n, const int k, + const double alpha, const T1* const a, const int lda, const double beta, + T2* c, const int ldc) +{ + tttsyrk_tm.start(); + + if (beta == 1. && (alpha == 0. || n == 0 || k == 0)) return; + + /* case Trans == 'N' */ + if (trans == 'N' || trans == 'n') + { + /* buffer to hold accumulation in double */ + std::vector buff(n); + if (uplo == 'U' || uplo == 'u') + { + for (int j = 0; j < n; j++) + { + const int len = j + 1; + std::fill(buff.begin(), buff.begin() + len, 0.); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const T1* colL = a + lda * l; + /* get multiplier */ + double mult = static_cast( + alpha * colL[j]); // same as alpha * a[lda*l + j]; + LAU_H::MPaxpy(len, mult, colL, buff.data()); + } + /* Update col j of upper part of matrix C. */ + /* Get pointer to beginning of column j in C. */ + T2* cj = c + ldc * j; + LAU_H::MPscal(len, beta, cj); + for (int i = 0; i < len; i++) + cj[i] += (T2)buff[i]; + } + } + else /* uplo = 'L' or 'l' */ + { + for (int j = 0; j < n; j++) + { + const int len = n - (j + 1); + std::fill(buff.begin(), buff.begin() + len, 0.); + for (int l = 0; l < k; l++) + { + /* pointer to beginning of column l in matrix a */ + const T1* colL = a + lda * l + j; + /* get multiplier */ + double mult = static_cast( + alpha * colL[0]); // same as alpha * a[lda*l + j]; + LAU_H::MPaxpy(len, mult, colL, buff.data()); + } + /* Update col j of upper part of matrix C. */ + /* Get pointer to beginning of column j in C. */ + T2* cj = c + ldc * j + j; + LAU_H::MPscal(len, beta, cj); + for (int i = 0; i < len; i++) + cj[i] += (T2)buff[i]; + } + } + } + else /* Trans == 'T' or 'C' */ + { + if (uplo == 'U' || uplo == 'u') + { + for (int j = 0; j < n; j++) + { + const T1* __restrict__ aj = a + lda * j; + for (int i = 0; i < j; i++) + { + const int pos = ldc * j + i; + const T1* __restrict__ ai = a + lda * i; + double bc = static_cast(c[pos]) * beta; + c[pos] + = static_cast(alpha * LAU_H::MPdot(k, ai, aj) + bc); + } + } + } + else /* uplo = 'L' or 'l' */ + { + for (int j = 0; j < n; j++) + { + const T1* __restrict__ aj = a + lda * j; + for (int i = j; i < n; i++) + { + const int pos = ldc * j + i; + const T1* __restrict__ ai = a + lda * i; + double bc = static_cast(c[pos]) * beta; + c[pos] + = static_cast(alpha * LAU_H::MPdot(k, ai, aj) + bc); + } + } + } + } + + tttsyrk_tm.stop(); +} + +template void syrk_impl(const char uplo, const char trans, + const int n, const int k, const double alpha, const double* const a, + const int lda, const double beta, float* c, const int ldc); +template void syrk_impl(const char uplo, const char trans, + const int n, const int k, const double alpha, const float* const a, + const int lda, const double beta, double* c, const int ldc); diff --git a/src/linear_algebra/syrk_impl.h b/src/linear_algebra/syrk_impl.h new file mode 100644 index 00000000..ab3786f4 --- /dev/null +++ b/src/linear_algebra/syrk_impl.h @@ -0,0 +1,13 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +template +void syrk_impl(const char uplo, const char trans, const int n, const int k, + const double alpha, const T1* const a, const int lda, const double beta, + T2* c, const int ldc); diff --git a/src/mgmol_run.cc b/src/mgmol_run.cc index 468d61bd..92390e83 100644 --- a/src/mgmol_run.cc +++ b/src/mgmol_run.cc @@ -20,6 +20,10 @@ #include #include +#ifdef MGMOL_USE_BLIS +#include +#endif + #ifdef _OPENMP #include #endif @@ -39,6 +43,10 @@ int mgmol_init(MPI_Comm comm) assert(mype > -1); MPIdata::onpe0 = (MPIdata::mype == 0); +#ifdef MGMOL_USE_BLIS + bli_init(); +#endif + #ifdef HAVE_MAGMA magma_int_t magmalog = magma_init(); if (magmalog == MAGMA_SUCCESS) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 838ce6a4..df774171 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -19,6 +19,8 @@ add_executable(testDistVector ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc @@ -32,6 +34,8 @@ add_executable(testReplicated2DistMatrix ${CMAKE_SOURCE_DIR}/src/local_matrices/SquareLocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc @@ -43,6 +47,8 @@ add_executable(testDistMatrix ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc @@ -54,6 +60,8 @@ add_executable(testConditionDistMatrix ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc @@ -68,6 +76,8 @@ add_executable(testConditionDistMatrixPower ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc @@ -79,6 +89,8 @@ add_executable(testPower ${CMAKE_SOURCE_DIR}/src/local_matrices/LocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/SquareLocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/DistMatrix.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc @@ -93,6 +105,8 @@ add_executable(testPowerDistMatrix ${CMAKE_SOURCE_DIR}/src/local_matrices/LocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/SquareLocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/DistMatrix.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc @@ -123,6 +137,8 @@ add_executable(testVariableSizeMatrix ${CMAKE_SOURCE_DIR}/src/sparse_linear_algebra/Table.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/tests/ut_main.cc) @@ -138,6 +154,8 @@ add_executable(testTradeGhostValues ${CMAKE_SOURCE_DIR}/src/pb/FDkernels.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/tests/ut_main.cc) @@ -153,6 +171,8 @@ add_executable(testSetGhostValues ${CMAKE_SOURCE_DIR}/src/pb/FDkernels.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/tests/ut_main.cc) @@ -172,6 +192,8 @@ add_executable(testLaph4 ${CMAKE_SOURCE_DIR}/src/pb/FDkernels.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/tests/ut_main.cc) @@ -189,6 +211,8 @@ add_executable(testBatchLaph4 ${CMAKE_SOURCE_DIR}/src/pb/GridFuncVector.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/pb/MGkernels.cc ${CMAKE_SOURCE_DIR}/src/pb/FDkernels.cc @@ -204,6 +228,8 @@ add_executable(testMGkernels ${CMAKE_SOURCE_DIR}/src/pb/GridFuncVector.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/pb/MGkernels.cc ${CMAKE_SOURCE_DIR}/src/pb/FDkernels.cc @@ -225,6 +251,8 @@ add_executable(testGramMatrix ${CMAKE_SOURCE_DIR}/src/DistMatrix/DistMatrixTools.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc @@ -244,6 +272,8 @@ add_executable(testDensityMatrix ${CMAKE_SOURCE_DIR}/src/DistMatrix/DistMatrixTools.cc ${CMAKE_SOURCE_DIR}/src/tools/Timer.cc ${CMAKE_SOURCE_DIR}/src/linear_algebra/mputils.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/syrk_impl.cc + ${CMAKE_SOURCE_DIR}/src/linear_algebra/gemm_impl.cc ${CMAKE_SOURCE_DIR}/src/tools/MGmol_MPI.cc ${CMAKE_SOURCE_DIR}/src/tools/mgmol_mpi_tools.cc ${CMAKE_SOURCE_DIR}/src/tools/random.cc From e37983b9a11a5ffca035254f0348fcbc246a5a75 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 26 Mar 2025 09:46:33 -0400 Subject: [PATCH 34/99] Clean up and fixes Ions (#326) * reset static index counter ion Ion when needed * simplify some loops * make Ions copy constructor private --- src/Ion.cc | 2 + src/Ion.h | 2 + src/Ions.cc | 109 +++++++++++++++++----------------------------- src/Ions.h | 31 +++++++------ tests/testIons.cc | 4 ++ 5 files changed, 66 insertions(+), 82 deletions(-) diff --git a/src/Ion.cc b/src/Ion.cc index 90350477..896a1961 100644 --- a/src/Ion.cc +++ b/src/Ion.cc @@ -24,6 +24,8 @@ unsigned short isqrt(unsigned value) static unsigned int _nlproj_gid = 0; static unsigned int _index = 0; +void Ion::resetIndexCount() { _index = 0; } + Ion::Ion(const Species& species, const std::string& name, const double crds[3], const double velocity[3], const bool lock) : name_(name), species_(species), index_(_index), nlproj_gid_(_nlproj_gid) diff --git a/src/Ion.h b/src/Ion.h index 871205a3..cf52db97 100644 --- a/src/Ion.h +++ b/src/Ion.h @@ -314,6 +314,8 @@ class Ion void getKBcoeffs(std::vector& coeffs); double energyDiff( Ion& ion, const double lattice[3], const short bc[3]) const; + + static void resetIndexCount(); }; #endif diff --git a/src/Ions.cc b/src/Ions.cc index 10baad10..f10fb834 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -9,7 +9,6 @@ #include "Ions.h" #include "Control.h" -#include "HDFrestart.h" #include "MGmol_MPI.h" #include "MGmol_blas1.h" #include "MPIdata.h" @@ -20,13 +19,10 @@ #include "mgmol_mpi_tools.h" #include "tools.h" -#include - #include #include #include -#include -#include +#include Timer ions_setupInteractingIons_tm("ions_setupInteractingIons"); Timer ions_setup_tm("ions::setup"); @@ -187,6 +183,8 @@ void Ions::setup() ions_setup_tm.start(); + Ion::resetIndexCount(); + updateListIons(); //#ifndef NDEBUG @@ -394,7 +392,7 @@ void Ions::iiforce(const short bc[3]) std::vector::const_iterator ion1 = local_ions_.begin(); int ion1_index = 0; - ; + while (ion1 != local_ions_.end()) { const double z1 = (*ion1)->getZion(); @@ -1800,7 +1798,7 @@ int Ions::readAtomsFromXYZ( } int Ions::setAtoms( - const std::vector& crds, const std::vector& spec) + const std::vector& crds, const std::vector& anum) { const int natoms = crds.size() / 3; @@ -1813,7 +1811,7 @@ int Ions::setAtoms( while (it != species_.end()) { ++isp; - if (it->getAtomicNumber() == spec[ia]) + if (it->getAtomicNumber() == anum[ia]) { break; } @@ -1823,7 +1821,7 @@ int Ions::setAtoms( for (std::map::iterator itr = map_species_.begin(); itr != map_species_.end(); ++itr) { - if (itr->second == spec[ia]) + if (itr->second == anum[ia]) { spname = itr->first; break; @@ -1833,7 +1831,7 @@ int Ions::setAtoms( { (*MPIdata::serr) << "Ions::setAtoms() --- ERROR: unknown " "species for atomic number " - << spec[ia] << std::endl; + << anum[ia] << std::endl; return -1; } @@ -2212,6 +2210,7 @@ void Ions::getLocalPositions(std::vector& tau) const void Ions::getLocalNames(std::vector& names) const { + names.clear(); for (auto& ion : local_ions_) { names.push_back(ion->name()); @@ -2220,6 +2219,7 @@ void Ions::getLocalNames(std::vector& names) const void Ions::getNames(std::vector& names) const { + names.clear(); for (auto& ion : list_ions_) { names.push_back(ion->name()); @@ -2513,7 +2513,6 @@ void Ions::gatherLockedNames( std::vector& names, const int root, const MPI_Comm comm) const { std::vector local_names; - for (auto& ion : local_ions_) { if (ion->locked()) local_names.push_back(ion->name()); @@ -2533,7 +2532,6 @@ void Ions::gatherIndexes( std::vector& indexes, const int root, const MPI_Comm comm) const { std::vector local_indexes; - for (auto& ion : local_ions_) { local_indexes.push_back(ion->index()); @@ -2553,7 +2551,6 @@ void Ions::gatherNLprojIds( std::vector& nlprojids, const int root, const MPI_Comm comm) const { std::vector local_nlprojids; - for (auto& ion : local_ions_) { local_nlprojids.push_back(ion->nlprojid()); @@ -2573,7 +2570,6 @@ void Ions::gatherAtomicNumbers( std::vector& atnumbers, const int root, const MPI_Comm comm) const { std::vector local_atnumbers; - for (auto& ion : local_ions_) { local_atnumbers.push_back(ion->atomic_number()); @@ -2593,7 +2589,6 @@ void Ions::gatherRandStates(std::vector& rstates, const int root, const MPI_Comm comm) const { std::vector local_rstates; - for (auto& ion : local_ions_) { local_rstates.push_back(ion->randomState(0)); @@ -2615,10 +2610,8 @@ void Ions::gatherPositions( std::vector& positions, const int root, const MPI_Comm comm) const { std::vector local_positions; - for (auto& ion : local_ions_) { - // get position of local ion double position[3]; ion->getPosition(&position[0]); local_positions.push_back(position[0]); @@ -2640,7 +2633,6 @@ void Ions::gatherPreviousPositions( std::vector& positions, const int root, const MPI_Comm comm) const { std::vector local_positions; - for (auto& ion : local_ions_) { local_positions.push_back(ion->getPreviousPosition(0)); @@ -2662,17 +2654,14 @@ void Ions::gatherForces( std::vector& forces, const int root, const MPI_Comm comm) const { std::vector local_forces; - for (auto& ion : local_ions_) { - // get position of local ion double force[3]; ion->getForce(&force[0]); local_forces.push_back(force[0]); local_forces.push_back(force[1]); local_forces.push_back(force[2]); } - // gather data to PE root std::vector data; mgmol_tools::gatherV(local_forces, data, root, comm); @@ -2687,7 +2676,6 @@ void Ions::gatherVelocities( std::vector& velocities, const int root, const MPI_Comm comm) const { std::vector local_velocities; - for (auto& ion : local_ions_) { local_velocities.push_back(ion->velocity(0)); @@ -2711,13 +2699,10 @@ void Ions::gatherPositions(std::vector& positions, const int root) const positions.resize(3 * num_ions_, 0.); - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions().end()) + for (auto& ion : local_ions_) { - // get local positions - const int index = (*ion)->index(); - (*ion)->getPosition(&positions[3 * index]); - ++ion; + const int index = ion->index(); + ion->getPosition(&positions[3 * index]); } // gather data to PE root @@ -2726,17 +2711,17 @@ void Ions::gatherPositions(std::vector& positions, const int root) const void Ions::gatherForces(std::vector& forces, const int root) const { + assert(num_ions_ > 0); + MGmol_MPI& mmpi(*(MGmol_MPI::instance())); forces.resize(3 * num_ions_, 0.); - - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions().end()) + for (auto& ion : local_ions_) { - // get local forces - const int index = (*ion)->index(); - (*ion)->getForce(&forces[3 * index]); - ion++; + const int index = ion->index(); + assert(forces.size() >= 3 * index); + assert(index < num_ions_); + ion->getForce(&forces[3 * index]); } // gather data to PE root @@ -2749,15 +2734,10 @@ void Ions::gatherLockedData(std::vector& locked_data, const int root) const MGmol_MPI& mmpi(*(MGmol_MPI::instance())); locked_data.resize(num_ions_, 0); - - std::vector::const_iterator ion = local_ions_.begin(); - while (ion != local_ions().end()) + for (auto& ion : local_ions_) { - // get local ion index - const int index = (*ion)->index(); - if ((*ion)->locked()) locked_data[index] = 1; - - ++ion; + const int index = ion->index(); + if (ion->locked()) locked_data[index] = 1; } // gather data to PE root @@ -3214,11 +3194,9 @@ void Ions::updateTaupInteractingIons() void Ions::clearLists() { local_ions_.clear(); - std::vector::iterator ion = list_ions_.begin(); - while (ion != list_ions_.end()) + for (auto& ion : list_ions_) { - delete *ion; - ion++; + delete ion; } list_ions_.clear(); } @@ -3335,24 +3313,21 @@ void Ions::initStepperData() { clearStepperData(); - std::vector::iterator lion = local_ions_.begin(); - while (lion != local_ions_.end()) + for (auto& lion : local_ions_) { - local_names_.push_back((*lion)->name()); - atmove_.push_back(!(*lion)->locked()); - pmass_.push_back((*lion)->getMass()); - gids_.push_back((*lion)->index()); + local_names_.push_back(lion->name()); + atmove_.push_back(!lion->locked()); + pmass_.push_back(lion->getMass()); + gids_.push_back(lion->index()); for (short i = 0; i < 3; i++) { - taum_.push_back((*lion)->getPreviousPosition(i)); - tau0_.push_back((*lion)->position(i)); - fion_.push_back((*lion)->force(i)); - velocity_.push_back((*lion)->velocity(i)); - rand_states_.push_back((*lion)->randomState(i)); + taum_.push_back(lion->getPreviousPosition(i)); + tau0_.push_back(lion->position(i)); + fion_.push_back(lion->force(i)); + velocity_.push_back(lion->velocity(i)); + rand_states_.push_back(lion->randomState(i)); } - - lion++; } // initialize taup to enable computing velocities int size_tau = (int)tau0_.size(); @@ -3399,11 +3374,9 @@ void Ions::updateIons() void Ions::shiftIons(const Vector3D& shift) { // update local_ions data - std::vector::iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { - (*ion)->shiftPositionXLBOMDTest(shift); - ion++; + ion->shiftPositionXLBOMDTest(shift); } // update various list of ions @@ -3421,12 +3394,10 @@ void Ions::rescaleVelocities(const double factor) std::cout << "Ions::rescaleVelocities() with factor " << factor << std::endl; } - std::vector::iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) - { - (*ion)->rescaleVelocity(factor); - ion++; + for (auto& ion : local_ions_) + { + ion->rescaleVelocity(factor); } } diff --git a/src/Ions.h b/src/Ions.h index 33b8982b..3420d725 100644 --- a/src/Ions.h +++ b/src/Ions.h @@ -10,20 +10,20 @@ #ifndef MGMOL_IONS_H #define MGMOL_IONS_H -#include -#include -#include -#include -#include - #include "DistributedIonicData.h" +#include "HDFrestart.h" #include "Ion.h" -#include "hdf5.h" -class HDFrestart; +#include +#include +#include class Ions { +private: + /*! + * map species to atomic numbers + */ static std::map map_species_; static int num_ions_; @@ -37,7 +37,7 @@ class Ions std::vector list_ions_; - /* + /*! * ions located in local sub-domain */ std::vector local_ions_; @@ -61,6 +61,13 @@ class Ions bool has_locked_atoms_; + /*! + * Prevent usage of copy constructor by making it private and + * non-implemented + */ + Ions(const Ions&); + void operator=(const Ions&); + void readRestartVelocities(HDFrestart& h5_file); void readRestartRandomStates(HDFrestart& h5_file); void readRestartPositions(HDFrestart& h5_file); @@ -193,11 +200,9 @@ class Ions std::vector& getGids() { return gids_; } void resetForces() { - std::vector::iterator ion = local_ions_.begin(); - while (ion != local_ions_.end()) + for (auto& ion : local_ions_) { - (*ion)->resetForce(); - ion++; + ion->resetForce(); } } void resetPositionsToPrevious(); diff --git a/tests/testIons.cc b/tests/testIons.cc index e23e0e24..39c96f4a 100644 --- a/tests/testIons.cc +++ b/tests/testIons.cc @@ -166,8 +166,12 @@ int main(int argc, char** argv) const double fval = 1.12; for (auto& f : forces) f = fval; + + ions.getNames(names); ions.setLocalForces(forces, names); + ions.printForcesGlobal(std::cout); + int nlocal = ions.getNumLocIons(); std::vector lforces(3 * nlocal); ions.getLocalForces(lforces); From 89630b3fba8e8f0c998bc53433c8e4c904274b2e Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 26 Mar 2025 13:14:19 -0400 Subject: [PATCH 35/99] Fixes for build without HDF5P (#324) --- src/Ions.cc | 3 ++- tests/CMakeLists.txt | 18 ++++++++++-------- tests/MVP/mvp.cfg | 1 - 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Ions.cc b/src/Ions.cc index f10fb834..ed3d54e1 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -947,10 +947,11 @@ void Ions::initFromRestartFile(HDFrestart& h5_file) num_ions_ = at_names.size(); #ifdef MGMOL_USE_HDF5P if (!h5_file.useHdf5p()) +#endif { mmpi.allreduce(&num_ions_, 1, MPI_SUM); } -#endif + if (onpe0 && ct.verbose > 0) { (*MPIdata::sout) << "Ions::setFromRestartFile(), read " << num_ions_ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index df774171..865ef354 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -547,14 +547,16 @@ add_test(NAME testMD_D72 ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/lrs.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) -add_test(NAME testHDF5single - COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/test.py - ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} - ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt - ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/mgmol.cfg - ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/md.cfg - ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/h2o.xyz - ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +if(MGMOL_USE_HDF5P) + add_test(NAME testHDF5single + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/mgmol.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/md.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/HDF5single/h2o.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +endif() add_test(NAME testMD_MVP COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MD_MVP/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} diff --git a/tests/MVP/mvp.cfg b/tests/MVP/mvp.cfg index bb3ab52a..868e5703 100644 --- a/tests/MVP/mvp.cfg +++ b/tests/MVP/mvp.cfg @@ -32,4 +32,3 @@ solver=MVP nb_inner_it=2 [Restart] output_level=2 -output_type=single_file From e80193b2095ded2daef4c821961653f3df5dc574 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 26 Mar 2025 13:14:37 -0400 Subject: [PATCH 36/99] Fix testRhoVhRestart (#325) --- tests/RhoVhRestart/testRhoVhRestart.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/RhoVhRestart/testRhoVhRestart.cc b/tests/RhoVhRestart/testRhoVhRestart.cc index 09da3320..ce40272d 100644 --- a/tests/RhoVhRestart/testRhoVhRestart.cc +++ b/tests/RhoVhRestart/testRhoVhRestart.cc @@ -46,8 +46,8 @@ int testRhoRestart(MGmolInterface* mgmol_) /* check if the recomputed density is the same */ for (int d = 0; d < (int)rho0.size(); d++) { - double error = abs(rho0[d] - rho->rho_[0][d]) / abs(rho0[d]); - if (error > 1e-10) + double error = abs(rho0[d] - rho->rho_[0][d]); + if (error > 1e-10 * abs(rho0[d])) { printf("rank %d, rho[%d]=%.15e, rho0[%d]=%.15e\n", rank, d, rho->rho_[0][d], d, rho0[d]); From bcad1127e11f4cedec5a21f774b661589b4eb8b1 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 26 Mar 2025 14:21:30 -0400 Subject: [PATCH 37/99] Fix a few more issues with class Ions (#328) --- src/Ions.cc | 29 +++++++++++------------------ src/Ions.h | 6 +++--- tests/testIons.cc | 31 +++++++++++++++++++++++++------ 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/src/Ions.cc b/src/Ions.cc index ed3d54e1..3b128b0e 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -109,14 +109,12 @@ Ions::Ions(const double lat[3], const std::vector& sp) : species_(sp) Ions::Ions(const Ions& ions, const double shift[3]) : species_(ions.species_) { - std::vector::const_iterator ion = ions.list_ions_.begin(); - while (ion != ions.list_ions_.end()) + for (const auto& ion : ions.list_ions_) { - Ion* newion = new Ion(**ion); + Ion* newion = new Ion(*ion); newion->shiftPosition(shift); newion->setup(); list_ions_.push_back(newion); - ion++; } for (short i = 0; i < 3; ++i) lattice_[i] = ions.lattice_[i]; @@ -2212,7 +2210,7 @@ void Ions::getLocalPositions(std::vector& tau) const void Ions::getLocalNames(std::vector& names) const { names.clear(); - for (auto& ion : local_ions_) + for (const auto& ion : local_ions_) { names.push_back(ion->name()); } @@ -2220,14 +2218,14 @@ void Ions::getLocalNames(std::vector& names) const void Ions::getNames(std::vector& names) const { - names.clear(); - for (auto& ion : list_ions_) - { - names.push_back(ion->name()); - } + std::vector local_names; + getLocalNames(local_names); + + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + mmpi.allGatherV(local_names, names); } -void Ions::getPositions(std::vector& tau) +void Ions::getPositions(std::vector& tau) const { std::vector tau_local(3 * local_ions_.size()); @@ -2237,10 +2235,9 @@ void Ions::getPositions(std::vector& tau) mmpi.allGatherV(tau_local, tau); } -void Ions::getAtomicNumbers(std::vector& atnumbers) +void Ions::getAtomicNumbers(std::vector& atnumbers) const { std::vector local_atnumbers; - for (auto& ion : local_ions_) { local_atnumbers.push_back(ion->atomic_number()); @@ -2250,15 +2247,11 @@ void Ions::getAtomicNumbers(std::vector& atnumbers) mmpi.allGatherV(local_atnumbers, atnumbers); } -void Ions::getForces(std::vector& forces) +void Ions::getForces(std::vector& forces) const { std::vector forces_local(3 * local_ions_.size()); - getLocalForces(forces_local); - int n = getNumIons(); - forces.resize(3 * n); - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); mmpi.allGatherV(forces_local, forces); } diff --git a/src/Ions.h b/src/Ions.h index 3420d725..3d382eb7 100644 --- a/src/Ions.h +++ b/src/Ions.h @@ -292,10 +292,10 @@ class Ions void getLocalPositions(std::vector& tau) const; void getLocalNames(std::vector& names) const; void getNames(std::vector& names) const; - void getPositions(std::vector& tau); - void getAtomicNumbers(std::vector& atnumbers); + void getPositions(std::vector& tau) const; + void getAtomicNumbers(std::vector& atnumbers) const; - void getForces(std::vector& forces); + void getForces(std::vector& forces) const; void getLocalForces(std::vector& tau) const; /*! diff --git a/tests/testIons.cc b/tests/testIons.cc index 39c96f4a..6bea5d64 100644 --- a/tests/testIons.cc +++ b/tests/testIons.cc @@ -107,6 +107,7 @@ int main(int argc, char** argv) ions.getAtomicNumbers(anumbers); if (myrank == 0) { + std::cout << "Positions:" << std::endl; int i = 0; for (auto& position : positions) { @@ -121,7 +122,7 @@ int main(int argc, char** argv) MPI_Barrier(MPI_COMM_WORLD); // swap x and z - for (size_t i = 0; i < positions.size() - 2; i++) + for (size_t i = 0; i < positions.size() - 2; i += 3) { double x = positions[i]; double z = positions[i + 2]; @@ -162,11 +163,13 @@ int main(int argc, char** argv) MPI_Barrier(MPI_COMM_WORLD); std::vector forces(3 * na); - // arbitrary value - const double fval = 1.12; + // set forces to a different arbitrary value for each component + int i = 0; for (auto& f : forces) - f = fval; - + { + f = (double)i; + i++; + } ions.getNames(names); ions.setLocalForces(forces, names); @@ -177,13 +180,29 @@ int main(int argc, char** argv) ions.getLocalForces(lforces); for (auto& f : lforces) { - if (std::abs(f - fval) > 1.e-14) + if (std::fmod(f, 1.) > 1.e-14) { std::cerr << "f = " << f << std::endl; MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } } + ions.getForces(forces); + if (myrank == 0) + for (auto f0 = forces.begin(); f0 != forces.end(); f0++) + { + std::cout << "f0 = " << *f0 << std::endl; + for (auto f1 = f0 + 1; f1 != forces.end(); f1++) + { + // make sure each force component is different + if (std::abs(*f0 - *f1) < 1.e-14) + { + std::cerr << "f0 = " << *f0 << ", f1 = " << *f1 + << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + } + } + } mpirc = MPI_Finalize(); if (mpirc != MPI_SUCCESS) { From a78e260d4e19416217cd68a1e72d0f4a422ec5a6 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 27 Mar 2025 14:46:11 -0400 Subject: [PATCH 38/99] Introduce new constructor for class Ions (#330) * modify various functions to use argument of type Ions instead of MGmol::ions_ --- src/ABPG.cc | 6 +- src/DFTsolver.cc | 4 +- src/DavidsonSolver.cc | 8 +- src/Energy.cc | 29 ++- src/Energy.h | 12 +- src/GrassmanLineMinimization.cc | 4 +- src/HamiltonianMVPSolver.cc | 14 +- src/Ions.cc | 193 ++++++++---------- src/Ions.h | 19 +- src/MGmol.cc | 55 ++--- src/MGmol.h | 8 +- src/MGmolInterface.h | 3 - src/MVPSolver.cc | 4 +- src/PolakRibiereSolver.cc | 8 +- src/lbfgsrlx.cc | 1 - src/quench.cc | 4 +- .../testRestartEnergyAndForces.cc | 4 - 17 files changed, 183 insertions(+), 193 deletions(-) diff --git a/src/ABPG.cc b/src/ABPG.cc index 62cb6ba1..107edc63 100644 --- a/src/ABPG.cc +++ b/src/ABPG.cc @@ -38,7 +38,7 @@ void ABPG::setup(T& orbitals) // // orthof=true: wants orthonormalized updated wave functions template -int ABPG::updateWF(T& orbitals, Ions& /*ions*/, const double precond_factor, +int ABPG::updateWF(T& orbitals, Ions& ions, const double precond_factor, const bool /*orthof*/, T& work_orbitals, const bool accelerate, const bool print_res, const double atol) { @@ -51,8 +51,8 @@ int ABPG::updateWF(T& orbitals, Ions& /*ions*/, const double precond_factor, T res("Residual", orbitals, false); const bool check_res = (atol > 0.); - double normRes = mgmol_strategy_->computeResidual( - orbitals, work_orbitals, res, (print_res || check_res), check_res); + double normRes = mgmol_strategy_->computeResidual(orbitals, work_orbitals, + ions, res, (print_res || check_res), check_res); if (normRes < atol && check_res) { abpg_nl_update_tm_.stop(); diff --git a/src/DFTsolver.cc b/src/DFTsolver.cc index 91c4a0f6..206fba91 100644 --- a/src/DFTsolver.cc +++ b/src/DFTsolver.cc @@ -196,8 +196,8 @@ double DFTsolver::evaluateEnergy( // Get the new total energy const double ts = 0.5 * proj_matrices_->computeEntropy(); // in [Ha] - eks_history_[0] - = energy_->evaluateTotal(ts, proj_matrices_, orbitals, print_flag, os_); + eks_history_[0] = energy_->evaluateTotal( + ts, proj_matrices_, ions_, orbitals, print_flag, os_); sum_eig_[1] = sum_eig_[0]; sum_eig_[0] = 2. * proj_matrices_->getEigSum(); // 2.*sum in [Ry] diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index 0dc88ef4..a049fde7 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -500,7 +500,7 @@ int DavidsonSolver::solve( ts0 = evalEntropy(projmatrices, (ct.verbose > 1), os_); e0 = energy_->evaluateTotal( - ts0, projmatrices, orbitals, printE, os_); + ts0, projmatrices, ions_, orbitals, printE, os_); retval = checkConvergence(e0, outer_it, ct.conv_tol); if (retval == 0 || (outer_it == ct.max_electronic_steps)) @@ -549,7 +549,7 @@ int DavidsonSolver::solve( ts0 = evalEntropy(proj_mat2N_.get(), (ct.verbose > 1), os_); e0 = energy_->evaluateTotal( - ts0, proj_mat2N_.get(), orbitals, printE, os_); + ts0, proj_mat2N_.get(), ions_, orbitals, printE, os_); } // 2N x 2N target... @@ -621,8 +621,8 @@ int DavidsonSolver::solve( const double ts1 = evalEntropy(proj_mat2N_.get(), (ct.verbose > 2), os_); - const double e1 = energy_->evaluateTotal( - ts1, proj_mat2N_.get(), orbitals, ct.verbose - 1, os_); + const double e1 = energy_->evaluateTotal(ts1, proj_mat2N_.get(), + ions_, orbitals, ct.verbose - 1, os_); // line minimization beta = minQuadPolynomial(e0, e1, de0, (ct.verbose > 2), os_); diff --git a/src/Energy.cc b/src/Energy.cc index 1cee6d67..f53dbc93 100644 --- a/src/Energy.cc +++ b/src/Energy.cc @@ -25,11 +25,10 @@ #define RY2HA 0.5 template -Energy::Energy(const pb::Grid& mygrid, const Ions& ions, - const Potentials& pot, const Electrostatic& es, const Rho& rho, - const XConGrid& xc, SpreadPenaltyInterface* spread_penalty) +Energy::Energy(const pb::Grid& mygrid, const Potentials& pot, + const Electrostatic& es, const Rho& rho, const XConGrid& xc, + SpreadPenaltyInterface* spread_penalty) : mygrid_(mygrid), - ions_(ions), pot_(pot), es_(es), rho_(rho), @@ -59,7 +58,7 @@ double Energy::getEVrhoRho() const } template -double Energy::evaluateEnergyIonsInVext() +double Energy::evaluateEnergyIonsInVext(Ions& ions) { double energy = 0.; @@ -69,12 +68,12 @@ double Energy::evaluateEnergyIonsInVext() //(*MPIdata::sout)<<"Energy::evaluateEnergyIonsInVext()"< positions; - positions.reserve(3 * ions_.local_ions().size()); + positions.reserve(3 * ions.local_ions().size()); // loop over ions int nions = 0; - std::vector::const_iterator ion = ions_.local_ions().begin(); - while (ion != ions_.local_ions().end()) + std::vector::const_iterator ion = ions.local_ions().begin(); + while (ion != ions.local_ions().end()) { (*ion)->getPosition(position); positions.push_back(position[0]); @@ -88,9 +87,9 @@ double Energy::evaluateEnergyIonsInVext() pot_.getValVext(positions, val); // loop over ions again - ion = ions_.local_ions().begin(); + ion = ions.local_ions().begin(); int ion_index = 0; - while (ion != ions_.local_ions().end()) + while (ion != ions.local_ions().end()) { const double z = (*ion)->getZion(); // int ion_index=(*ion)->index(); @@ -112,16 +111,16 @@ double Energy::evaluateEnergyIonsInVext() template double Energy::evaluateTotal(const double ts, // in [Ha] - ProjectedMatricesInterface* projmatrices, const T& phi, const int verbosity, - std::ostream& os) + ProjectedMatricesInterface* projmatrices, Ions& ions, const T& phi, + const int verbosity, std::ostream& os) { eval_te_tm_.start(); Control& ct = *(Control::instance()); - const double eself = ions_.energySelf(); - const double ediff = ions_.energyDiff(ct.bcPoisson); - const double eipot = evaluateEnergyIonsInVext(); + const double eself = ions.energySelf(); + const double ediff = ions.energyDiff(ct.bcPoisson); + const double eipot = evaluateEnergyIonsInVext(ions); const double eigsum = 0.5 * projmatrices->getExpectationH(); diff --git a/src/Energy.h b/src/Energy.h index e6cafd98..d55d4040 100644 --- a/src/Energy.h +++ b/src/Energy.h @@ -11,6 +11,7 @@ #define MGMOL_ENERGY_H #include "Grid.h" +#include "Ions.h" #include "Rho.h" #include "SpreadPenaltyInterface.h" #include "Timer.h" @@ -20,7 +21,6 @@ #include class Potentials; -class Ions; class Electrostatic; class ProjectedMatricesInterface; class XConGrid; @@ -29,7 +29,6 @@ template class Energy { const pb::Grid& mygrid_; - const Ions& ions_; const Potentials& pot_; const Electrostatic& es_; const Rho& rho_; @@ -45,16 +44,15 @@ class Energy double getEVrhoRho() const; public: - Energy(const pb::Grid&, const Ions&, const Potentials&, - const Electrostatic&, const Rho&, const XConGrid&, - SpreadPenaltyInterface*); + Energy(const pb::Grid&, const Potentials&, const Electrostatic&, + const Rho&, const XConGrid&, SpreadPenaltyInterface*); static Timer eval_te_tm() { return eval_te_tm_; } - double evaluateTotal(const double ts, ProjectedMatricesInterface*, + double evaluateTotal(const double ts, ProjectedMatricesInterface*, Ions&, const T& phi, const int, std::ostream&); - double evaluateEnergyIonsInVext(); + double evaluateEnergyIonsInVext(Ions&); void saveVofRho(); }; diff --git a/src/GrassmanLineMinimization.cc b/src/GrassmanLineMinimization.cc index a83d6b08..f642e0f4 100644 --- a/src/GrassmanLineMinimization.cc +++ b/src/GrassmanLineMinimization.cc @@ -29,7 +29,7 @@ Timer GrassmanLineMinimization::update_states_tm_("Grassman_update_states"); // // orthof=true: wants orthonormalized updated wave functions template -int GrassmanLineMinimization::updateWF(T& orbitals, Ions& /*ions*/, +int GrassmanLineMinimization::updateWF(T& orbitals, Ions& ions, const double precond_factor, const bool orthof, T& work_orbitals, const bool accelerate, const bool print_res, const double atol) { @@ -61,7 +61,7 @@ int GrassmanLineMinimization::updateWF(T& orbitals, Ions& /*ions*/, // Update wavefunctions const bool check_res = (atol > 0.); double normRes = mgmol_strategy_->computeResidual(orbitals, work_orbitals, - *new_grad_, (print_res || check_res), check_res); + ions, *new_grad_, (print_res || check_res), check_res); if (normRes < atol && check_res) { nl_update_tm_.stop(); diff --git a/src/HamiltonianMVPSolver.cc b/src/HamiltonianMVPSolver.cc index 4bc017dc..f90dd228 100644 --- a/src/HamiltonianMVPSolver.cc +++ b/src/HamiltonianMVPSolver.cc @@ -160,8 +160,8 @@ int HamiltonianMVPSolver::solve( // compute energy at origin const int printE = (ct.verbose > 1) ? 1 : 0; - double e0 - = energy_->evaluateTotal(ts0, projmatrices, orbitals, printE, os_); + double e0 = energy_->evaluateTotal( + ts0, projmatrices, ions_, orbitals, printE, os_); // // compute energy at end for new H @@ -189,8 +189,8 @@ int HamiltonianMVPSolver::solve( projmatrices->setHB2H(); // compute energy at end (beta=1.) - double e1 - = energy_->evaluateTotal(ts1, projmatrices, orbitals, printE, os_); + double e1 = energy_->evaluateTotal( + ts1, projmatrices, ions_, orbitals, printE, os_); // // evaluate energy at mid-point @@ -226,8 +226,8 @@ int HamiltonianMVPSolver::solve( projmatrices->setHB2H(); // compute energy at midpoint - double ei - = energy_->evaluateTotal(tsi, projmatrices, orbitals, printE, os_); + double ei = energy_->evaluateTotal( + tsi, projmatrices, ions_, orbitals, printE, os_); // line minimization double beta @@ -285,7 +285,7 @@ int HamiltonianMVPSolver::solve( // compute energy at end (beta=1.) ei = energy_->evaluateTotal( - tsi, projmatrices, orbitals, printE, os_); + tsi, projmatrices, ions_, orbitals, printE, os_); // line minimization beta = minQuadPolynomialFrom3values( diff --git a/src/Ions.cc b/src/Ions.cc index 3b128b0e..ef9d687c 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -70,8 +70,18 @@ void writeData2d(HDFrestart& h5f_file, std::string datasetname, } } -Ions::Ions(const double lat[3], const std::vector& sp) : species_(sp) +Ions::Ions(const double lat[3], const std::vector& sp) + : species_(sp), setup_(false), has_locked_atoms_(false) { + setupSubdomains(lat); +} + +void Ions::setupSubdomains(const double lat[3]) +{ + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + Mesh* mymesh = Mesh::instance(); + const pb::PEenv& myPEenv = mymesh->peenv(); + for (short i = 0; i < 3; i++) { assert(lat[i] > 0.); @@ -79,15 +89,10 @@ Ions::Ions(const double lat[3], const std::vector& sp) : species_(sp) { (*MPIdata::serr) << "Ions constructor: lattice[" << i << "]=" << lat[i] << "!!!" << std::endl; - exit(2); + mmpi.abort(); } lattice_[i] = lat[i]; } - setup_ = false; - has_locked_atoms_ = false; - - Mesh* mymesh = Mesh::instance(); - const pb::PEenv& myPEenv = mymesh->peenv(); for (short i = 0; i < 3; i++) div_lattice_[i] = lattice_[i] / (double)(myPEenv.n_mpi_task(i)); @@ -107,6 +112,20 @@ Ions::Ions(const double lat[3], const std::vector& sp) : species_(sp) MPI_Cart_shift(cart_comm_, dir, disp, &source_[dir], &dest_[dir]); } +Ions::Ions(const std::vector& p, const std::vector& anum, + const double lat[3], const std::vector& sp) + : species_(sp) +{ + setupSubdomains(lat); + + double rmax = getMaxListRadius(); + setupListIonsBoundaries(rmax); + + num_ions_ = setAtoms(p, anum); + + setup(); +} + Ions::Ions(const Ions& ions, const double shift[3]) : species_(ions.species_) { for (const auto& ion : ions.list_ions_) @@ -181,8 +200,6 @@ void Ions::setup() ions_setup_tm.start(); - Ion::resetIndexCount(); - updateListIons(); //#ifndef NDEBUG @@ -197,7 +214,7 @@ void Ions::setup() has_locked_atoms_ = hasLockedAtoms(); // initialize data for constraints - setupContraintsData(interacting_ions_); + setupContraintsData(); computeMaxNumProjs(); @@ -297,13 +314,13 @@ void Ions::setupInteractingIons() // setup arrays to be used in constraints enforcement // using references to local_ions and extra "dummy" data -void Ions::setupContraintsData(std::vector& ions_for_constraints) +void Ions::setupContraintsData() { Control& ct = *(Control::instance()); if (ct.verbose > 0) printWithTimeStamp("Ions::setupContraintsData()...", std::cout); - const int nnloc = ions_for_constraints.size() - local_ions_.size(); + const int nnloc = interacting_ions_.size() - local_ions_.size(); // std::cout<<"interacting_ions_.size()="<::finalEnergy() // Get the total energy const double ts = 0.5 * proj_matrices_->computeEntropy(); // in [Ha] total_energy_ = energy_->evaluateTotal( - ts, proj_matrices_.get(), *current_orbitals_, 2, os_); + ts, proj_matrices_.get(), *ions_, *current_orbitals_, 2, os_); } template @@ -828,7 +827,7 @@ void MGmol::setupPotentials(Ions& ions) pot.initialize(ions); if (ct.verbose > 0) printWithTimeStamp("Setup kbpsi...", os_); - g_kbpsi_->setup(*ions_); + g_kbpsi_->setup(ions); electrostat_->setupRhoc(pot.rho_comp()); @@ -1170,8 +1169,8 @@ void MGmol::precond_mg(OrbitalsType& phi) template double MGmol::computeResidual(OrbitalsType& orbitals, - OrbitalsType& work_orbitals, OrbitalsType& res, const bool print_residual, - const bool norm_res) + OrbitalsType& work_orbitals, Ions& ions, OrbitalsType& res, + const bool print_residual, const bool norm_res) { assert(orbitals.getIterativeIndex() >= 0); @@ -1190,7 +1189,7 @@ double MGmol::computeResidual(OrbitalsType& orbitals, // get H*psi stored in work_orbitals.psi // and psi^T H psi in Hij - getHpsiAndTheta(*ions_, orbitals, work_orbitals); + getHpsiAndTheta(ions, orbitals, work_orbitals); double norm2Res = computeConstraintResidual( orbitals, work_orbitals, res, print_residual, norm_res); @@ -1428,17 +1427,22 @@ double MGmol::evaluateEnergyAndForces(Orbitals* orbitals, Control& ct = *(Control::instance()); - ions_->setPositions(tau, atnumbers); + // create a new temporary Ions object to be used for + // energy end forces calculation + Mesh* mymesh = Mesh::instance(); + const pb::Grid& mygrid = mymesh->grid(); + const double lattice[3] = { mygrid.ll(0), mygrid.ll(1), mygrid.ll(2) }; + Ions ions(tau, atnumbers, lattice, ions_->getSpecies()); - setupPotentials(*ions_); + setupPotentials(ions); double eks = 0.; OrbitalsType* dorbitals = dynamic_cast(orbitals); - quench(*dorbitals, *ions_, ct.max_electronic_steps, 20, eks); + quench(*dorbitals, ions, ct.max_electronic_steps, 20, eks); - force(*dorbitals, *ions_); + force(*dorbitals, ions); - ions_->getForces(forces); + ions.getForces(forces); return eks; } @@ -1450,37 +1454,42 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, { OrbitalsType* dorbitals = dynamic_cast(orbitals); - ions_->setPositions(tau, atnumbers); + // create a new temporary Ions object to be used for + // energy end forces calculation + Mesh* mymesh = Mesh::instance(); + const pb::Grid& mygrid = mymesh->grid(); + const double lattice[3] = { mygrid.ll(0), mygrid.ll(1), mygrid.ll(2) }; + Ions ions(tau, atnumbers, lattice, ions_->getSpecies()); - setupPotentials(*ions_); + setupPotentials(ions); // initialize electronic density rho_->update(*dorbitals); // initialize potential - update_pot(*ions_); + update_pot(ions); // initialize projected matrices - updateHmatrix(*dorbitals, *ions_); + updateHmatrix(*dorbitals, ions); proj_matrices_->updateThetaAndHB(); // compute DM std::shared_ptr> dm_strategy( DMStrategyFactory>::create(comm_, os_, *ions_, + dist_matrix::DistMatrix>::create(comm_, os_, ions, rho_.get(), energy_.get(), electrostat_.get(), this, proj_matrices_.get(), dorbitals)); dm_strategy->update(*dorbitals); // evaluate energy and forces - double ts = 0.; - double eks - = energy_->evaluateTotal(ts, proj_matrices_.get(), *dorbitals, 2, os_); + double ts = 0.; + double eks = energy_->evaluateTotal( + ts, proj_matrices_.get(), ions, *dorbitals, 2, os_); - force(*dorbitals, *ions_); + force(*dorbitals, ions); - ions_->getForces(forces); + ions.getForces(forces); return eks; } diff --git a/src/MGmol.h b/src/MGmol.h index 12197dfd..c85e7b59 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -335,17 +335,13 @@ class MGmol : public MGmolInterface void precond_mg(OrbitalsType& orbitals); void setGamma(const pb::Lap& lapOper, const Potentials& pot); double computeResidual(OrbitalsType& orbitals, OrbitalsType& work_orbitals, - OrbitalsType& res, const bool print_residual, const bool norm_res); + Ions& ions, OrbitalsType& res, const bool print_residual, + const bool norm_res); void applyAOMMprojection(OrbitalsType&); void force(OrbitalsType& orbitals, Ions& ions) { forces_->force(orbitals, ions); } - void setPositions(const std::vector& positions, - const std::vector& atnumbers) - { - ions_->setPositions(positions, atnumbers); - } /* * simply dump current state diff --git a/src/MGmolInterface.h b/src/MGmolInterface.h index dc54a8e7..9a9bf8a6 100644 --- a/src/MGmolInterface.h +++ b/src/MGmolInterface.h @@ -40,9 +40,6 @@ class MGmolInterface virtual void getAtomicPositions(std::vector& tau) = 0; virtual void getAtomicNumbers(std::vector& an) = 0; - virtual void setPositions(const std::vector& positions, - const std::vector& atnumbers) - = 0; virtual std::shared_ptr getProjectedMatrices() = 0; virtual void dumpRestart() = 0; diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 7ab9f665..6990e29d 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -256,7 +256,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) const double ts0 = evalEntropyMVP(current_proj_mat, (ct.verbose > 1), os_); const double e0 = energy_->evaluateTotal( - ts0, current_proj_mat, orbitals, printE, os_); + ts0, current_proj_mat, ions_, orbitals, printE, os_); MatrixType target("target", numst_, numst_); @@ -309,7 +309,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) const double ts1 = evalEntropyMVP(proj_mat_work_, (ct.verbose > 2), os_); const double e1 = energy_->evaluateTotal( - ts1, proj_mat_work_, orbitals, ct.verbose - 1, os_); + ts1, proj_mat_work_, ions_, orbitals, ct.verbose - 1, os_); // line minimization const double beta diff --git a/src/PolakRibiereSolver.cc b/src/PolakRibiereSolver.cc index 6b5f88de..3f46a00e 100644 --- a/src/PolakRibiereSolver.cc +++ b/src/PolakRibiereSolver.cc @@ -243,9 +243,9 @@ double PolakRibiereSolver::evaluateEnergy( const OrbitalsType& orbitals, const bool print_flag) { // Get the new total energy - const double ts = 0.5 * proj_matrices_->computeEntropy(); // in [Ha] - const double energy - = energy_->evaluateTotal(ts, proj_matrices_, orbitals, print_flag, os_); + const double ts = 0.5 * proj_matrices_->computeEntropy(); // in [Ha] + const double energy = energy_->evaluateTotal( + ts, proj_matrices_, ions_, orbitals, print_flag, os_); // Control& ct(*(Control::instance())); // if( ct.verbose>2 && onpe0 )os_<<"energy="<::solve(OrbitalsType& orbitals, // evaluate residuals, preconditioned residuals for current orbitals double normRes = mgmol_strategy_->computeResidual(orbitals, - work_orbitals, *r_k_, (print_res || ct.checkResidual()), + work_orbitals, ions_, *r_k_, (print_res || ct.checkResidual()), ct.checkResidual()); if (normRes < ct.conv_tol && ct.checkResidual()) { diff --git a/src/lbfgsrlx.cc b/src/lbfgsrlx.cc index 6698eef0..ce05c1a3 100644 --- a/src/lbfgsrlx.cc +++ b/src/lbfgsrlx.cc @@ -14,7 +14,6 @@ #include "Energy.h" #include "Ions.h" #include "LBFGS.h" -#include "LBFGS_IonicStepper.h" #include "LocalizationRegions.h" #include "MGmol.h" #include "MGmol_blas1.h" diff --git a/src/quench.cc b/src/quench.cc index 073798f6..6eed74bb 100644 --- a/src/quench.cc +++ b/src/quench.cc @@ -600,8 +600,8 @@ int MGmol::quench(OrbitalsType& orbitals, Ions& ions, << " TS [Ha] = " << ts << std::endl; } } - last_eks - = energy_->evaluateTotal(ts, proj_matrices_.get(), orbitals, 2, os_); + last_eks = energy_->evaluateTotal( + ts, proj_matrices_.get(), ions, orbitals, 2, os_); if (ct.computeCondGramMD()) { diff --git a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc index 3027ab55..b1fb0094 100644 --- a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc +++ b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc @@ -124,8 +124,6 @@ int main(int argc, char** argv) } } - mgmol->setPositions(positions, anumbers); - Mesh* mymesh = Mesh::instance(); const pb::Grid& mygrid = mymesh->grid(); const pb::PEenv& myPEenv = mymesh->peenv(); @@ -195,8 +193,6 @@ int main(int argc, char** argv) } } - mgmol->setPositions(positions, anumbers); - // // evaluate energy and forces with wavefunctions just read // From c4817b8a1028923d71458a69a95ee05cd9486826 Mon Sep 17 00:00:00 2001 From: "Siu Wun \"Tony\" Cheung" Date: Thu, 27 Mar 2025 16:44:32 -0700 Subject: [PATCH 39/99] Add cleanup and MGmolInterface (#331) --- src/MGmolInterface.h | 1 + tests/WFEnergyAndForces/testWFEnergyAndForces.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/src/MGmolInterface.h b/src/MGmolInterface.h index 9a9bf8a6..ec913944 100644 --- a/src/MGmolInterface.h +++ b/src/MGmolInterface.h @@ -25,6 +25,7 @@ class MGmolInterface virtual int setupConstraintsFromInput(const std::string input_file) = 0; virtual void setup() = 0; virtual void run() = 0; + virtual void cleanup() = 0; virtual double evaluateEnergyAndForces(const std::vector& tau, const std::vector& atnumbers, std::vector& forces) diff --git a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc index c9e28d48..64765c7a 100644 --- a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc +++ b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc @@ -190,6 +190,7 @@ int main(int argc, char** argv) } } + mgmol->cleanup(); delete mgmol; } // close main scope From a5f6f3a9906c8e4cee1bfe7a7ce2197531ee8f83 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 28 Mar 2025 10:58:24 -0400 Subject: [PATCH 40/99] Add function to set local forces (#333) * based on matching coordinates to local ions --- src/Ions.cc | 33 +++++++++++++++++++++++++++ src/Ions.h | 6 +++++ tests/testIons.cc | 58 ++++++++++++++++++++++++++++++++++++----------- 3 files changed, 84 insertions(+), 13 deletions(-) diff --git a/src/Ions.cc b/src/Ions.cc index ef9d687c..462373a9 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -1322,6 +1322,39 @@ void Ions::setLocalForces( } } +void Ions::setLocalForces( + const std::vector& forces, const std::vector& coords) +{ + assert(forces.size() == coords.size()); + + // tolerance can be pretty loose, as long as it does not allow mix up + // with coordinates of other atoms + const double tol = 1.e-2; + + // loop over global list of forces and coordinates + std::vector::const_iterator cit = coords.begin(); + for (auto fit = forces.begin(); fit != forces.end(); fit += 3) + { + // find possible matching ion + for (auto& ion : local_ions_) + { + double p[3]; + ion->getPosition(&p[0]); + double d2 = (p[0] - (*cit)) * (p[0] - (*cit)) + + (p[1] - (*(cit + 1))) * (p[0] - (*(cit + 1))) + + (p[2] - (*(cit + 2))) * (p[0] - (*(cit + 2))); + double d = std::sqrt(d2); + if (d < tol) + { + ion->set_force(0, *fit); + ion->set_force(1, *(fit + 1)); + ion->set_force(2, *(fit + 2)); + } + } + cit += 3; + } +} + // Writes out the postions of the ions and the current forces on them by root void Ions::printForcesGlobal(std::ostream& os, const int root) const { diff --git a/src/Ions.h b/src/Ions.h index f39e1fed..aaf48e79 100644 --- a/src/Ions.h +++ b/src/Ions.h @@ -308,6 +308,12 @@ class Ions void setLocalForces(const std::vector& forces, const std::vector& names); + /*! + * set forces for ions in local_ions_ based on coordinates matching + */ + void setLocalForces( + const std::vector& forces, const std::vector& coords); + void syncData(const std::vector& sp); // void syncNames(const int nions, std::vector& local_names, // std::vector& names); diff --git a/tests/testIons.cc b/tests/testIons.cc index 6bea5d64..33c3cd24 100644 --- a/tests/testIons.cc +++ b/tests/testIons.cc @@ -6,6 +6,29 @@ #include +// check that all forces components have integer values larger than 0 +// and differ from each other +int checkForces(std::vector& forces) +{ + const double tol = 1.e-14; + + for (auto f0 = forces.begin(); f0 != forces.end(); f0++) + { + std::cout << "f0 = " << *f0 << std::endl; + for (auto f1 = f0 + 1; f1 != forces.end(); f1++) + { + // make sure each force component is different + if (std::abs(*f0 - *f1) < tol || *f1 < tol || *f0 < tol) + { + std::cerr << "f0 = " << *f0 << ", f1 = " << *f1 << std::endl; + return 1; + } + } + } + + return 0; +} + int main(int argc, char** argv) { int status = 0; @@ -164,7 +187,7 @@ int main(int argc, char** argv) std::vector forces(3 * na); // set forces to a different arbitrary value for each component - int i = 0; + int i = 1; for (auto& f : forces) { f = (double)i; @@ -189,20 +212,29 @@ int main(int argc, char** argv) ions.getForces(forces); if (myrank == 0) - for (auto f0 = forces.begin(); f0 != forces.end(); f0++) + { + int status = checkForces(forces); + if (status > 0) MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); + } + + // test Ions::setLocalForces based on coordinates matching + { + std::vector positions; + std::vector anumbers; + ions.getPositions(positions); + + ions.setLocalForces(forces, positions); + + ions.printForcesGlobal(std::cout); + + ions.getForces(forces); + if (myrank == 0) { - std::cout << "f0 = " << *f0 << std::endl; - for (auto f1 = f0 + 1; f1 != forces.end(); f1++) - { - // make sure each force component is different - if (std::abs(*f0 - *f1) < 1.e-14) - { - std::cerr << "f0 = " << *f0 << ", f1 = " << *f1 - << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); - } - } + int status = checkForces(forces); + if (status > 0) MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } + } + mpirc = MPI_Finalize(); if (mpirc != MPI_SUCCESS) { From 4e82239a124bea0128562e627b15a82d07ec2bcb Mon Sep 17 00:00:00 2001 From: "Siu Wun \"Tony\" Cheung" Date: Sun, 30 Mar 2025 10:48:27 -0700 Subject: [PATCH 41/99] Fix setLocalForces (#335) --- src/Ions.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Ions.cc b/src/Ions.cc index 462373a9..70ca5c36 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -1341,8 +1341,8 @@ void Ions::setLocalForces( double p[3]; ion->getPosition(&p[0]); double d2 = (p[0] - (*cit)) * (p[0] - (*cit)) - + (p[1] - (*(cit + 1))) * (p[0] - (*(cit + 1))) - + (p[2] - (*(cit + 2))) * (p[0] - (*(cit + 2))); + + (p[1] - (*(cit + 1))) * (p[1] - (*(cit + 1))) + + (p[2] - (*(cit + 2))) * (p[2] - (*(cit + 2))); double d = std::sqrt(d2); if (d < tol) { From f715dd9961afa5d2589be19e9423a4885a4f8ac9 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 4 Apr 2025 15:36:16 -0400 Subject: [PATCH 42/99] Fix test WFEnergyAndForces (#336) * writing restart twice in same directory was failing --- tests/WFEnergyAndForces/testWFEnergyAndForces.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc index 64765c7a..f9039abe 100644 --- a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc +++ b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc @@ -190,6 +190,9 @@ int main(int argc, char** argv) } } + // rename output restart file + ct.out_restart_file = ct.out_restart_file + "1"; + mgmol->cleanup(); delete mgmol; From 89d502e776bd8150c8e127fdb9980741e73ab713 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 4 Apr 2025 16:20:50 -0400 Subject: [PATCH 43/99] Add mixing option in MVP (#337) --- src/Control.cc | 4 +- src/MVPSolver.cc | 127 +++++++++++++++++++++++------------------ src/MVPSolver.h | 3 +- src/MVP_DMStrategy.cc | 2 +- tests/CMakeLists.txt | 7 +++ tests/MVP/test.py | 2 + tests/MVPmix/h2o.xyz | 6 ++ tests/MVPmix/mgmol.cfg | 36 ++++++++++++ tests/MVPmix/test.py | 103 +++++++++++++++++++++++++++++++++ 9 files changed, 230 insertions(+), 60 deletions(-) create mode 100644 tests/MVPmix/h2o.xyz create mode 100644 tests/MVPmix/mgmol.cfg create mode 100644 tests/MVPmix/test.py diff --git a/src/Control.cc b/src/Control.cc index 33fd93c4..22de9b27 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -699,8 +699,8 @@ void Control::setDefaultValues() void Control::adjust() { - // change dm_mix default to 1. if not using Davidson - if (it_algo_type_ != 2 && dm_mix < 0.) dm_mix = 1.; + // change dm_mix default to 1. if not using Davidson or MVP + if ((it_algo_type_ != 2 && DM_solver_ != 1) && dm_mix < 0.) dm_mix = 1.; if (nel_ - 2 * numst == 0) { diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 6990e29d..770acdd0 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -47,13 +47,14 @@ MVPSolver::MVPSolver(MPI_Comm comm, std::ostream& os, Electrostatic* electrostat, MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, - const short n_inner_steps, const bool use_old_dm) + const short n_inner_steps, const double mixing, const bool use_old_dm) : comm_(comm), os_(os), n_inner_steps_(n_inner_steps), use_old_dm_(use_old_dm), ions_(ions), - numst_(numst) + numst_(numst), + mixing_(mixing) { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 0) @@ -267,65 +268,79 @@ int MVPSolver::solve(OrbitalsType& orbitals) MatrixType delta_dm("delta_dm", numst_, numst_); delta_dm = target; delta_dm -= dmInit; - - double de0 = evaluateDerivative(dmInit, delta_dm, ts0); - - if (std::abs(de0) < tol_de0 && inner_it > 0) + double beta = 0.; + if (mixing_ > 0.) { - if (onpe0 && ct.verbose > 0) - std::cout << "MVP: de0 = " << de0 - << ", convergence achieved" << std::endl; - break; + beta = mixing_; + if (onpe0 && ct.verbose > 1) + { + os_ << "MVP with beta = " << beta << std::endl; + os_ << std::setprecision(12); + os_ << std::fixed << "MVP inner iteration " << inner_it + << ", E0=" << e0 << std::endl; + } } - - // - // evaluate free energy at beta=1 - // - if (onpe0 && ct.verbose > 2) - std::cout << "MVP --- Target energy..." << std::endl; - proj_mat_work_->setDM(target, orbitals.getIterativeIndex()); - proj_mat_work_->computeOccupationsFromDM(); - if (ct.verbose > 2) proj_mat_work_->printOccupations(os_); - const double nel = proj_mat_work_->getNel(); - if (onpe0 && ct.verbose > 1) - os_ << "MVP --- Number of electrons at beta=1 : " << nel - << std::endl; - - rho_->computeRho(orbitals, target); - - mgmol_strategy_->update_pot(vh_init, ions_); - - energy_->saveVofRho(); - - // update h11 + else { - h11 = h11_nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); - } - - proj_mat_work_->assignH(h11); - proj_mat_work_->setHB2H(); - - const double ts1 - = evalEntropyMVP(proj_mat_work_, (ct.verbose > 2), os_); - const double e1 = energy_->evaluateTotal( - ts1, proj_mat_work_, ions_, orbitals, ct.verbose - 1, os_); + double de0 = evaluateDerivative(dmInit, delta_dm, ts0); + + if (std::abs(de0) < tol_de0 && inner_it > 0) + { + if (onpe0 && ct.verbose > 0) + std::cout << "MVP: de0 = " << de0 + << ", convergence achieved" << std::endl; + break; + } + + // + // evaluate free energy at beta=1 + // + if (onpe0 && ct.verbose > 2) + std::cout << "MVP --- Target energy..." << std::endl; + proj_mat_work_->setDM(target, orbitals.getIterativeIndex()); + proj_mat_work_->computeOccupationsFromDM(); + if (ct.verbose > 2) proj_mat_work_->printOccupations(os_); + const double nel = proj_mat_work_->getNel(); + if (onpe0 && ct.verbose > 1) + os_ << "MVP --- Number of electrons at beta=1 : " << nel + << std::endl; + + rho_->computeRho(orbitals, target); + + mgmol_strategy_->update_pot(vh_init, ions_); + + energy_->saveVofRho(); + + // update h11 + { + h11 = h11_nl; + mgmol_strategy_->addHlocal2matrix( + orbitals, orbitals, h11); + } + + proj_mat_work_->assignH(h11); + proj_mat_work_->setHB2H(); + + const double ts1 + = evalEntropyMVP(proj_mat_work_, (ct.verbose > 2), os_); + const double e1 = energy_->evaluateTotal(ts1, + proj_mat_work_, ions_, orbitals, ct.verbose - 1, os_); + + // line minimization + beta + = minQuadPolynomial(e0, e1, de0, (ct.verbose > 2), os_); + assert(!std::isnan(beta)); - // line minimization - const double beta - = minQuadPolynomial(e0, e1, de0, (ct.verbose > 2), os_); - assert(!std::isnan(beta)); - - if (onpe0 && ct.verbose > 0) - { - os_ << std::setprecision(12); - os_ << std::fixed << "MVP inner iteration " << inner_it - << ", E0=" << e0 << ", E1=" << e1; - os_ << std::scientific << ", E0'=" << de0 - << " -> beta=" << beta; - os_ << std::endl; + if (onpe0 && ct.verbose > 0) + { + os_ << std::setprecision(12); + os_ << std::fixed << "MVP inner iteration " << inner_it + << ", E0=" << e0 << ", E1=" << e1; + os_ << std::scientific << ", E0'=" << de0 + << " -> beta=" << beta; + os_ << std::endl; + } } - // update DM *work_ = dmInit; work_->axpy(beta, delta_dm); diff --git a/src/MVPSolver.h b/src/MVPSolver.h index 8e7a7c70..5558bcdd 100644 --- a/src/MVPSolver.h +++ b/src/MVPSolver.h @@ -33,6 +33,7 @@ class MVPSolver Ions& ions_; int numst_; + double mixing_; Rho* rho_; Energy* energy_; @@ -56,7 +57,7 @@ class MVPSolver Electrostatic* electrostat, MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, - const short n_inner_steps, const bool use_old_dm); + const short n_inner_steps, const double mixing, const bool use_old_dm); ~MVPSolver(); int solve(OrbitalsType& orbitals); diff --git a/src/MVP_DMStrategy.cc b/src/MVP_DMStrategy.cc index 8712ae92..899bca46 100644 --- a/src/MVP_DMStrategy.cc +++ b/src/MVP_DMStrategy.cc @@ -54,7 +54,7 @@ int MVP_DMStrategy::update(OrbitalsType& orbitals) MVPSolver solver(comm_, os_, ions_, rho_, energy_, electrostat_, mgmol_strategy_, ct.numst, ct.occ_width, global_indexes_, - ct.dm_inner_steps, use_old_dm_); + ct.dm_inner_steps, ct.dm_mix, use_old_dm_); return solver.solve(orbitals); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 865ef354..d90b9ac0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -500,6 +500,13 @@ add_test(NAME testMVP ${CMAKE_CURRENT_SOURCE_DIR}/MVP/mvp.cfg ${CMAKE_CURRENT_SOURCE_DIR}/MVP/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testMVPmix + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MVPmix/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/MVPmix/mgmol.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/MVPmix/h2o.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME BandGapN2 COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/BandGapN2/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS} diff --git a/tests/MVP/test.py b/tests/MVP/test.py index bd5d708e..57d9522e 100644 --- a/tests/MVP/test.py +++ b/tests/MVP/test.py @@ -51,6 +51,8 @@ energies=[] print("Check forces are smaller than tol = {}".format(tol)) for line in lines: + if line.count(b'MVP'): + print(line) if line.count(b'%%'): print(line) words=line.split() diff --git a/tests/MVPmix/h2o.xyz b/tests/MVPmix/h2o.xyz new file mode 100644 index 00000000..d5171c8b --- /dev/null +++ b/tests/MVPmix/h2o.xyz @@ -0,0 +1,6 @@ +3 + +O 0.00 0.00 0.00 +H -0.76 0.59 0.00 +H 0.76 0.59 0.00 + diff --git a/tests/MVPmix/mgmol.cfg b/tests/MVPmix/mgmol.cfg new file mode 100644 index 00000000..10edc464 --- /dev/null +++ b/tests/MVPmix/mgmol.cfg @@ -0,0 +1,36 @@ +verbosity=2 +xcFunctional=PBE +FDtype=4th +[Mesh] +nx=48 +ny=48 +nz=48 +[Domain] +ox=-4.5 +oy=-4.5 +oz=-4.5 +lx=9. +ly=9. +lz=9. +[Potentials] +pseudopotential=pseudo.O_ONCV_PBE_SG15 +pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Run] +type=QUENCH +[Quench] +solver=PSD +max_steps=100 +atol=1.e-8 +step_length=2. +ortho_freq=10 +[Orbitals] +initial_type=random +initial_width=1.5 +temperature=10. +nempty=5 +[Restart] +output_level=0 +[DensityMatrix] +solver=MVP +nb_inner_it=2 +mixing=0.5 diff --git a/tests/MVPmix/test.py b/tests/MVPmix/test.py new file mode 100644 index 00000000..8ffafdbb --- /dev/null +++ b/tests/MVPmix/test.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +import sys +import os +import subprocess +import string + +print("Test MVP solver with mixing coefficient...") + +nargs=len(sys.argv) + +mpicmd = sys.argv[1]+" "+sys.argv[2]+" "+sys.argv[3] +for i in range(4,nargs-4): + mpicmd = mpicmd + " "+sys.argv[i] +print("MPI run command: {}".format(mpicmd)) + +exe = sys.argv[nargs-4] +inp = sys.argv[nargs-3] +coords = sys.argv[nargs-2] +print("coordinates file: %s"%coords) + +#create links to potentials files +cwd = os.getcwd() + +dst = 'pseudo.O_ONCV_PBE_SG15' +src = sys.argv[nargs-1] + '/' + dst +if not os.path.exists(cwd+'/'+dst): + print("Create link to %s"%dst) + os.symlink(src, dst) + +dst = 'pseudo.H_ONCV_PBE_SG15' +src = sys.argv[nargs-1] + '/' + dst +if not os.path.exists(cwd+'/'+dst): + print("Create link to %s"%dst) + os.symlink(src, dst) + +#run mgmol +command = "{} {} -c {} -i {}".format(mpicmd,exe,inp,coords) +print("Run command: {}".format(command)) + +output = subprocess.check_output(command,stderr=subprocess.STDOUT,shell=True) + +#analyse mgmol standard output +#make sure force is below tolerance +lines=output.split(b'\n') + +convergence=0 +for line in lines: + if line.count(b'DFTsolver:') and line.count(b'convergence'): + convergence=1 + break + +if convergence==0: + print("MVP Solver did not converge") + sys.exit(1) + +flag = 0 +eigenvalues=[] +energies=[] +ecount=0 +for line in lines: + if line.count(b'FERMI'): + flag = 0 + if flag==1: + words=line.split() + for w in words: + eigenvalues.append(eval(w)) + if line.count(b'Eigenvalues'): + flag = 1 + eigenvalues=[] + if line.count(b'%%'): + words=line.split() + e=words[5][0:-1] + print(e) + ecount=ecount+1 + energies.append(eval(e)) +print(energies) + +print(eigenvalues) +tol = 1.e-4 +eigenvalue0 = -0.916 +if abs(eigenvalues[0]-eigenvalue0)>tol: + print("Expected eigenvalue 0 to be {}".format(eigenvalue0)) + sys.exit(1) +eigenvalue8 = 0.219 +if abs(eigenvalues[8]-eigenvalue8)>tol: + print("Expected eigenvalue 8 to be {}".format(eigenvalue8)) + sys.exit(1) + +niterations = ecount +print("MVP solver ran for {} iterations".format(niterations)) +if niterations>180: + print("MVP test FAILED for taking too many iterations") + sys.exit(1) + +print("Check energy...") +last_energy = energies[-1] +print("Energy = {}".format(last_energy)) +if last_energy>-17.16269: + print("Last energy = {}".format(last_energy)) + sys.exit(1) + +print("Test PASSED") +sys.exit(0) From f198397436eb7b4b3bd7e6a72d0fd5f47348623c Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 9 Apr 2025 10:48:07 -0400 Subject: [PATCH 44/99] Update MVP with mixing (#339) * use tolerance to terminate iterations * tune verbosity --- src/Control.cc | 6 +++++- src/MVPSolver.cc | 34 +++++++++++++++++++--------------- src/MVPSolver.h | 8 +++++++- src/MVP_DMStrategy.cc | 2 +- src/read_config.cc | 4 +++- 5 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/Control.cc b/src/Control.cc index 22de9b27..57e12336 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -224,6 +224,7 @@ void Control::print(std::ostream& os) << conv_tol << std::endl; os << std::fixed; os << " Density matrix mixing = " << dm_mix << std::endl; + os << " Density matrix tol = " << dm_tol << std::endl; if (DMEigensolver() == DMEigensolverType::Eigensolver) { os << " Density matrix computation algorithm = " @@ -439,7 +440,7 @@ void Control::sync(void) memset(&int_buffer[0], 0, size_int_buffer * sizeof(int)); } - const short size_float_buffer = 43; + const short size_float_buffer = 44; float* float_buffer = new float[size_float_buffer]; if (mype_ == 0) { @@ -485,6 +486,7 @@ void Control::sync(void) float_buffer[40] = threshold_eigenvalue_gram_quench_; float_buffer[41] = pair_mlwf_distance_threshold_; float_buffer[42] = e0_; + float_buffer[43] = dm_tol; } else { @@ -680,6 +682,7 @@ void Control::sync(void) threshold_eigenvalue_gram_quench_ = float_buffer[40]; pair_mlwf_distance_threshold_ = float_buffer[41]; e0_ = float_buffer[42]; + dm_tol = float_buffer[43]; max_electronic_steps_loose_ = max_electronic_steps; delete[] short_buffer; @@ -1720,6 +1723,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) lrs_extrapolation = 10; dm_mix = vm["DensityMatrix.mixing"].as(); + dm_tol = vm["DensityMatrix.tol"].as(); dm_inner_steps = vm["DensityMatrix.nb_inner_it"].as(); dm_use_old_ = vm["DensityMatrix.use_old"].as() ? 1 : 0; str = vm["DensityMatrix.algo"].as(); diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 770acdd0..1c093e53 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -47,14 +47,16 @@ MVPSolver::MVPSolver(MPI_Comm comm, std::ostream& os, Electrostatic* electrostat, MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, - const short n_inner_steps, const double mixing, const bool use_old_dm) + const short n_inner_steps, const double mixing, const double tol_de0, + const bool use_old_dm) : comm_(comm), os_(os), n_inner_steps_(n_inner_steps), use_old_dm_(use_old_dm), ions_(ions), numst_(numst), - mixing_(mixing) + mixing_(mixing), + tol_de0_(tol_de0) { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 0) @@ -208,7 +210,6 @@ int MVPSolver::solve(OrbitalsType& orbitals) kbpsi.computeHvnlMatrix(&kbpsi, ions_, h11_nl); - const double tol_de0 = 1.e-12; for (int inner_it = 0; inner_it < n_inner_steps_; inner_it++) { if (onpe0 && ct.verbose > 1) @@ -268,13 +269,26 @@ int MVPSolver::solve(OrbitalsType& orbitals) MatrixType delta_dm("delta_dm", numst_, numst_); delta_dm = target; delta_dm -= dmInit; + + double de0 = evaluateDerivative(dmInit, delta_dm, ts0); + + // check for convergence + if (std::abs(de0) < tol_de0_ && inner_it > 0) + { + if (onpe0 && ct.verbose > 0) + std::cout << "MVP: de0 = " << de0 + << ", convergence achieved" << std::endl; + break; + } + double beta = 0.; if (mixing_ > 0.) { beta = mixing_; - if (onpe0 && ct.verbose > 1) + if (onpe0 && ct.verbose > 0) { - os_ << "MVP with beta = " << beta << std::endl; + if (ct.verbose > 1) + os_ << "MVP with beta = " << beta << std::endl; os_ << std::setprecision(12); os_ << std::fixed << "MVP inner iteration " << inner_it << ", E0=" << e0 << std::endl; @@ -282,16 +296,6 @@ int MVPSolver::solve(OrbitalsType& orbitals) } else { - double de0 = evaluateDerivative(dmInit, delta_dm, ts0); - - if (std::abs(de0) < tol_de0 && inner_it > 0) - { - if (onpe0 && ct.verbose > 0) - std::cout << "MVP: de0 = " << de0 - << ", convergence achieved" << std::endl; - break; - } - // // evaluate free energy at beta=1 // diff --git a/src/MVPSolver.h b/src/MVPSolver.h index 5558bcdd..77a6a345 100644 --- a/src/MVPSolver.h +++ b/src/MVPSolver.h @@ -35,6 +35,11 @@ class MVPSolver int numst_; double mixing_; + /*! + * tolerance on energy slope in inner iterations + */ + double tol_de0_; + Rho* rho_; Energy* energy_; Electrostatic* electrostat_; @@ -57,7 +62,8 @@ class MVPSolver Electrostatic* electrostat, MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, - const short n_inner_steps, const double mixing, const bool use_old_dm); + const short n_inner_steps, const double mixing, const double tol_de0, + const bool use_old_dm); ~MVPSolver(); int solve(OrbitalsType& orbitals); diff --git a/src/MVP_DMStrategy.cc b/src/MVP_DMStrategy.cc index 899bca46..3fb53dee 100644 --- a/src/MVP_DMStrategy.cc +++ b/src/MVP_DMStrategy.cc @@ -54,7 +54,7 @@ int MVP_DMStrategy::update(OrbitalsType& orbitals) MVPSolver solver(comm_, os_, ions_, rho_, energy_, electrostat_, mgmol_strategy_, ct.numst, ct.occ_width, global_indexes_, - ct.dm_inner_steps, ct.dm_mix, use_old_dm_); + ct.dm_inner_steps, ct.dm_mix, ct.dm_tol, use_old_dm_); return solver.solve(orbitals); } diff --git a/src/read_config.cc b/src/read_config.cc index 642b1b99..ed0e66e1 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -297,7 +297,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, po::value()->default_value(0), "Flag for computing new centers from extrapolated orbitals.")( "DensityMatrix.mixing", po::value()->default_value(-1.), - "Mixing coefficient for Density Matrix")("DensityMatrix.solver", + "Mixing coefficient for Density Matrix")("DensityMatrix.tol", + po::value()->default_value(1.e-12), + "Tolerance for Density Matrix convergence")("DensityMatrix.solver", po::value()->default_value("Mixing"), "Algorithm for updating Density Matrix: Mixing, MVP, HMVP")( "DensityMatrix.nb_inner_it", po::value()->default_value(3), From 781d7e563a53fb65ca15b9e50813164a2f322f44 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 11 Apr 2025 16:32:34 -0400 Subject: [PATCH 45/99] Update DensityMatrix class (#342) * add extrapolation function * remove unused function * simplify use of iterative index --- src/DavidsonSolver.cc | 12 +-- src/DensityMatrix.cc | 81 +++++++++---------- src/DensityMatrix.h | 40 ++++----- src/DensityMatrixSparse.cc | 15 ++-- src/DensityMatrixSparse.h | 16 ++-- src/EigenDMStrategy.cc | 2 +- src/HamiltonianMVPSolver.cc | 22 ++--- src/MGmol.cc | 4 +- src/MVPSolver.cc | 12 +-- src/NonOrthoDMStrategy.cc | 6 +- src/ProjectedMatrices.cc | 52 ++++++------ src/ProjectedMatrices.h | 40 ++++----- src/ProjectedMatrices2N.cc | 4 +- src/ProjectedMatrices2N.h | 4 +- src/ProjectedMatricesInterface.h | 22 ++--- src/ProjectedMatricesSparse.h | 9 +-- .../testDMandEnergyAndForces.cc | 2 +- .../testRestartEnergyAndForces.cc | 2 +- tests/testDensityMatrix.cc | 4 +- 19 files changed, 154 insertions(+), 195 deletions(-) diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index a049fde7..c34b6396 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -206,8 +206,8 @@ double DavidsonSolver::evaluateDerivative( const double dbeta = 0.0001; *work2N_ = dm2Ninit; work2N_->axpy(dbeta, delta_dm); - // proj_mat2N_->setDM(*work2N_,orbitals.getIterativeIndex()); - proj_mat2N_->setDM(*work2N_, -1); + + proj_mat2N_->setDM(*work2N_); proj_mat2N_->computeOccupationsFromDM(); const double tsd0e = evalEntropy(proj_mat2N_.get(), false, os_); @@ -250,7 +250,7 @@ void DavidsonSolver::buildTarget2N_MVP( // if( onpe0 )os_<<"Compute X2N..."<setHB2H(); - proj_mat2N_->updateDM(orbitals_index); + proj_mat2N_->updateDM(); target = proj_mat2N_->dm(); @@ -584,7 +584,7 @@ int DavidsonSolver::solve( // if (mmpi.PE0() && ct.verbose > 2) os_ << "Target energy..." << std::endl; - proj_mat2N_->setDM(target, orbitals.getIterativeIndex()); + proj_mat2N_->setDM(target); proj_mat2N_->computeOccupationsFromDM(); double nel = proj_mat2N_->getNel(); if (mmpi.PE0() && ct.verbose > 2) @@ -643,7 +643,7 @@ int DavidsonSolver::solve( // update DM *work2N_ = dm2Ninit; work2N_->axpy(beta, delta_dm); - proj_mat2N_->setDM(*work2N_, orbitals.getIterativeIndex()); + proj_mat2N_->setDM(*work2N_); if (inner_it < ct.dm_inner_steps - 1) { @@ -809,7 +809,7 @@ int DavidsonSolver::solve( = dynamic_cast*>( orbitals.getProjMatrices()); assert(pmat); - pmat->buildDM(new_occ, orbitals.getIterativeIndex()); + pmat->buildDM(new_occ); if (retval == 0) break; diff --git a/src/DensityMatrix.cc b/src/DensityMatrix.cc index 78d7008e..7be19aab 100644 --- a/src/DensityMatrix.cc +++ b/src/DensityMatrix.cc @@ -22,8 +22,6 @@ const double factor_kernel4dot = 10.; -#define PROCRUSTES 0 - #define MGMOL_DENSITYMATRIX_FAIL(X) \ { \ std::cerr << "DensityMatrix failure:" << std::endl; \ @@ -36,7 +34,7 @@ const double factor_kernel4dot = 10.; template DensityMatrix::DensityMatrix(const int ndim) : dim_(ndim), - orbitals_index_(-1), + update_index_(-1), occ_uptodate_(false), uniform_occ_(false), stripped_(false) @@ -66,8 +64,8 @@ DensityMatrix::~DensityMatrix() } template -void DensityMatrix::build(const MatrixType& zmat, - const std::vector& occ, const int new_orbitals_index) +void DensityMatrix::build( + const MatrixType& zmat, const std::vector& occ) { #ifdef PRINT_OPERATIONS MGmol_MPI& mmpi = *(MGmol_MPI::instance()); @@ -98,32 +96,30 @@ void DensityMatrix::build(const MatrixType& zmat, work_->symm('r', 'l', 1., gamma, zmat, 0.); kernel4dot_->gemm('n', 't', 1., *work_, zmat, 0.); - stripped_ = false; - orbitals_index_ = new_orbitals_index; + stripped_ = false; + update_index_++; } template -void DensityMatrix::build( - const MatrixType& zmat, const int new_orbitals_index) +void DensityMatrix::build(const MatrixType& zmat) { - build(zmat, occupation_, new_orbitals_index); + build(zmat, occupation_); } // build diagonal matrix template -void DensityMatrix::build( - const std::vector& occ, const int new_orbitals_index) +void DensityMatrix::build(const std::vector& occ) { assert(dm_ != nullptr); assert(!occ.empty()); setOccupations(occ); - build(new_orbitals_index); + build(); } template -void DensityMatrix::build(const int new_orbitals_index) +void DensityMatrix::build() { MGmol_MPI& mmpi = *(MGmol_MPI::instance()); #ifdef PRINT_OPERATIONS @@ -148,13 +144,12 @@ void DensityMatrix::build(const int new_orbitals_index) * std::min(1., factor_kernel4dot * occupation_[i])); kernel4dot_->setDiagonal(w); - stripped_ = false; - orbitals_index_ = new_orbitals_index; + stripped_ = false; + update_index_++; } template -void DensityMatrix::setUniform( - const double nel, const int new_orbitals_index) +void DensityMatrix::setUniform(const double nel) { assert(!occupation_.empty()); @@ -171,22 +166,13 @@ void DensityMatrix::setUniform( uniform_occ_ = true; - build(occupation_, new_orbitals_index); -} - -template -void DensityMatrix::buildFromBlock(const MatrixType& block00) -{ - dm_->clear(); - dm_->assign(block00, 0, 0); - dm_->print(std::cout, 0, 0, 25, 25); + build(occupation_); } template void DensityMatrix::rotate( const MatrixType& rotation_matrix, const bool flag_eigen) { - if (!flag_eigen) { MatrixType invU(rotation_matrix); @@ -204,6 +190,8 @@ void DensityMatrix::rotate( invU.getrs('n', tmp, ipiv); *dm_ = tmp; + + update_index_++; } } @@ -370,8 +358,7 @@ double DensityMatrix::computeEntropy() const } template -void DensityMatrix::setto2InvS( - const MatrixType& invS, const int orbitals_index) +void DensityMatrix::setto2InvS(const MatrixType& invS) { *dm_ = invS; dm_->scal(orbital_occupation_); @@ -382,8 +369,8 @@ void DensityMatrix::setto2InvS( occupation_[st] = 1.; occ_uptodate_ = true; } - uniform_occ_ = false; - orbitals_index_ = orbitals_index; + uniform_occ_ = false; + update_index_++; } template @@ -400,8 +387,7 @@ void DensityMatrix::stripS(const MatrixType& ls) } template -void DensityMatrix::dressUpS( - const MatrixType& ls, const int new_orbitals_index) +void DensityMatrix::dressUpS(const MatrixType& ls) { assert(stripped_); @@ -410,10 +396,11 @@ void DensityMatrix::dressUpS( *dm_ = *work_; ls.trtrs('l', 't', 'n', *dm_); - orbitals_index_ = new_orbitals_index; - occ_uptodate_ = false; - uniform_occ_ = false; - stripped_ = false; + update_index_++; + + occ_uptodate_ = false; + uniform_occ_ = false; + stripped_ = false; } // dm_ -> u*dm_*u^T @@ -424,6 +411,8 @@ void DensityMatrix::transform(const MatrixType& u) { work_->gemm('n', 't', 1., *dm_, u, 0.); dm_->gemm('n', 'n', 1., u, *work_, 0.); + + update_index_++; } template @@ -434,13 +423,21 @@ double DensityMatrix::getExpectation(const MatrixType& A) } template -void DensityMatrix::mix( - const double mix, const MatrixType& matA, const int new_orbitals_index) +void DensityMatrix::mix(const double mix, const MatrixType& matA) { dm_->scal(1. - mix); - dm_->axpy(mix, matA); - orbitals_index_ = new_orbitals_index; + + update_index_++; +} + +template +void DensityMatrix::linearExtrapolate(const MatrixType& previous_dm) +{ + dm_->scal(2.); + dm_->axpy(-1., previous_dm); + + update_index_++; } template diff --git a/src/DensityMatrix.h b/src/DensityMatrix.h index 1d09e79e..8ce74542 100644 --- a/src/DensityMatrix.h +++ b/src/DensityMatrix.h @@ -31,7 +31,10 @@ class DensityMatrix MatrixType* kernel4dot_; MatrixType* work_; - int orbitals_index_; + /*! + * Keep track of changes, incremented every time dm_ is updated + */ + int update_index_; bool occ_uptodate_; bool uniform_occ_; @@ -46,16 +49,16 @@ class DensityMatrix DensityMatrix& operator=(const DensityMatrix&); DensityMatrix(const DensityMatrix&); - void build(const int new_orbitals_index); + void build(); public: DensityMatrix(const int ndim); ~DensityMatrix(); - void setUniform(const double nel, const int new_orbitals_index); + void setUniform(const double nel); - int getOrbitalsIndex() const { return orbitals_index_; } + int getIndex() const { return update_index_; } bool occupationsUptodate() const { return occ_uptodate_; } bool fromUniformOccupations() const { return uniform_occ_; } @@ -84,10 +87,10 @@ class DensityMatrix const MatrixType& kernel4dot() const { return *kernel4dot_; } - void setMatrix(const MatrixType& mat, const int orbitals_index) + void setMatrix(const MatrixType& mat) { - *dm_ = mat; - orbitals_index_ = orbitals_index; + *dm_ = mat; + update_index_++; setDummyOcc(); @@ -112,7 +115,7 @@ class DensityMatrix uniform_occ_ = false; stripped_ = false; - orbitals_index_ = 0; + update_index_ = 0; } void getOccupations(std::vector& occ) const @@ -126,22 +129,19 @@ class DensityMatrix void setOccupations(const std::vector& occ); - void setto2InvS(const MatrixType& invS, const int orbitals_index); + void setto2InvS(const MatrixType& invS); void stripS(const MatrixType& ls); - void dressUpS(const MatrixType& ls, const int new_orbitals_index); + void dressUpS(const MatrixType& ls); // dm_ -> u*dm_*u^T void transform(const MatrixType& u); - void buildFromBlock(const MatrixType& block00); - double computeEntropy() const; void computeOccupations(const MatrixType& ls); - void build(const std::vector& occ, const int new_orbitals_index); - void build(const MatrixType& z, const int new_orbitals_index); - void build(const MatrixType& z, const std::vector& occ, - const int new_orbitals_index); + void build(const std::vector& occ); + void build(const MatrixType& z); + void build(const MatrixType& z, const std::vector& occ); void rotate(const MatrixType& rotation_matrix, const bool flag_eigen); void printOccupations(std::ostream& os) const; @@ -149,8 +149,12 @@ class DensityMatrix void diagonalize( const char eigv, std::vector& occ, MatrixType& vect); double getExpectation(const MatrixType& A); - void mix( - const double mix, const MatrixType& matA, const int new_orbitals_index); + void mix(const double mix, const MatrixType& matA); + + /*! + * dm <- dm + (dm-previous_dm) = 2.*dm - previous_dm + */ + void linearExtrapolate(const MatrixType& previous_dm); int write(HDFrestart& h5f_file, std::string& name); int read(HDFrestart& h5f_file, std::string& name); diff --git a/src/DensityMatrixSparse.cc b/src/DensityMatrixSparse.cc index 4217b8ac..e554501a 100644 --- a/src/DensityMatrixSparse.cc +++ b/src/DensityMatrixSparse.cc @@ -26,7 +26,7 @@ DensityMatrixSparse::DensityMatrixSparse( MGmol_MPI& mmpi = *(MGmol_MPI::instance()); orbital_occupation_ = mmpi.nspin() > 1 ? 1. : 2.; - orbitals_index_ = -1; + update_index_ = -1; if (dim_ > 0) { @@ -44,27 +44,24 @@ DensityMatrixSparse::~DensityMatrixSparse() } } -void DensityMatrixSparse::setUniform(const double nel, const int orbitals_index) +void DensityMatrixSparse::setUniform(const double nel) { const double occ = (double)((double)nel / (double)dim_); assert(occ < 1.01); - orbitals_index_ = orbitals_index; + update_index_++; (*dm_).reset(); const double uval = (double)occ * orbital_occupation_; for (std::vector::const_iterator st = locvars_.begin(); st != locvars_.end(); ++st) (*dm_).insertMatrixElement(*st, *st, uval, INSERT, true); - - return; } -void DensityMatrixSparse::setto2InvS( - const VariableSizeMatrix& invS, const int orbitals_index) +void DensityMatrixSparse::setto2InvS(const VariableSizeMatrix& invS) { *dm_ = invS; dm_->scale(orbital_occupation_); - orbitals_index_ = orbitals_index; + update_index_++; } // build density matrix, given computed locally centered data void DensityMatrixSparse::assembleMatrixFromCenteredData( @@ -92,7 +89,7 @@ void DensityMatrixSparse::assembleMatrixFromCenteredData( dtor_DM.updateLocalRows((*dm_)); gather_DM_tm_.stop(); - orbitals_index_ = orbitals_index; + update_index_ = orbitals_index; } // compute trace of dot product dm_ . vsmat double DensityMatrixSparse::getTraceDotProductWithMat( diff --git a/src/DensityMatrixSparse.h b/src/DensityMatrixSparse.h index b574bd0b..170057fa 100644 --- a/src/DensityMatrixSparse.h +++ b/src/DensityMatrixSparse.h @@ -32,7 +32,7 @@ class DensityMatrixSparse VariableSizeMatrix* dm_; - int orbitals_index_; + int update_index_; double orbital_occupation_; @@ -43,16 +43,14 @@ class DensityMatrixSparse ~DensityMatrixSparse(); - void setUniform(const double nel, const int new_orbitals_index); + void setUniform(const double nel); - void setto2InvS( - const VariableSizeMatrix& invS, const int orbitals_index); - int getOrbitalsIndex() const { return orbitals_index_; } - void setMatrix( - const VariableSizeMatrix& mat, const int orbitals_index) + void setto2InvS(const VariableSizeMatrix& invS); + int getIndex() const { return update_index_; } + void setMatrix(const VariableSizeMatrix& mat) { - *dm_ = mat; - orbitals_index_ = orbitals_index; + *dm_ = mat; + update_index_++; } void assembleMatrixFromCenteredData(const std::vector& data, const std::vector& locRowIds, const int* globalColIds, diff --git a/src/EigenDMStrategy.cc b/src/EigenDMStrategy.cc index 8fd8443a..358ed5f9 100644 --- a/src/EigenDMStrategy.cc +++ b/src/EigenDMStrategy.cc @@ -37,7 +37,7 @@ int EigenDMStrategy::update(OrbitalsType& orbitals) = dynamic_cast< ProjectedMatrices>*>( proj_matrices_); - pmat->updateDMwithEigenstatesAndRotate(orbitals.getIterativeIndex(), zz); + pmat->updateDMwithEigenstatesAndRotate(zz); // if( onpe0 && ct.verbose>2 ) // (*MPIdata::sout)<<"get_dm_diag: rotate orbitals "<::solve( ProjMatrixType* projmatrices = dynamic_cast(orbitals.getProjMatrices()); - int iterative_index = 0; - // save computed vh for a fair energy "comparison" with vh computed // in close neigborhood const pb::GridFunc vh_init(electrostat_->getVh()); @@ -134,13 +132,11 @@ int HamiltonianMVPSolver::solve( // // evaluate energy at origin // - iterative_index++; - projmatrices->assignH(*hmatrix_); projmatrices->setHB2H(); // update DM and compute entropy - projmatrices->updateDM(iterative_index); + projmatrices->updateDM(); double ts0 = evalEntropyMVP(projmatrices, true, os_); // Update density rho_->update(orbitals); @@ -168,10 +164,8 @@ int HamiltonianMVPSolver::solve( // MatrixType htarget(projmatrices->getH()); - iterative_index++; - // update DM and compute entropy - projmatrices->updateDM(iterative_index); + projmatrices->updateDM(); double ts1 = evalEntropyMVP(projmatrices, true, os_); // Update density rho_->update(orbitals); @@ -204,10 +198,8 @@ int HamiltonianMVPSolver::solve( projmatrices->assignH(h11); projmatrices->setHB2H(); - iterative_index++; - // update DM and entropy - projmatrices->updateDM(iterative_index); + projmatrices->updateDM(); double tsi = evalEntropyMVP(projmatrices, true, os_); // Update density @@ -262,10 +254,8 @@ int HamiltonianMVPSolver::solve( projmatrices->assignH(h11); projmatrices->setHB2H(); - iterative_index++; - // update DM and entropy - projmatrices->updateDM(iterative_index); + projmatrices->updateDM(); tsi = evalEntropyMVP(projmatrices, true, os_); // Update density @@ -325,9 +315,7 @@ int HamiltonianMVPSolver::solve( projmatrices->assignH(*hmatrix_); projmatrices->setHB2H(); - iterative_index++; - - projmatrices->updateDM(iterative_index); + projmatrices->updateDM(); // Generate new density rho_->update(orbitals); diff --git a/src/MGmol.cc b/src/MGmol.cc index 018a07a2..b3db1b77 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -364,9 +364,7 @@ int MGmol::initial() // initialize Rho if (ct.verbose > 0) printWithTimeStamp("Initialize Rho...", os_); - if (ct.restart_info <= 1) - proj_matrices_->setDMuniform( - ct.getNelSpin(), current_orbitals_->getIterativeIndex()); + if (ct.restart_info <= 1) proj_matrices_->setDMuniform(ct.getNelSpin()); rho_->setup(ct.getOrthoType(), current_orbitals_->getOverlappingGids()); diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 1c093e53..4f9165a5 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -104,8 +104,8 @@ double MVPSolver::evaluateDerivative( const double dbeta = 0.001; *work_ = dmInit; work_->axpy(dbeta, delta_dm); - // proj_mat_work_->setDM(*work_,orbitals.getIterativeIndex()); - proj_mat_work_->setDM(*work_, -1); + + proj_mat_work_->setDM(*work_); proj_mat_work_->computeOccupationsFromDM(); const double tsd0e = evalEntropyMVP(proj_mat_work_, false, os_); @@ -145,7 +145,7 @@ void MVPSolver::buildTarget_MVP( // proj_mat_work_->setHB2H(); - proj_mat_work_->updateDM(orbitals_index); + proj_mat_work_->updateDM(); target = proj_mat_work_->dm(); if (ct.verbose > 2) @@ -301,7 +301,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) // if (onpe0 && ct.verbose > 2) std::cout << "MVP --- Target energy..." << std::endl; - proj_mat_work_->setDM(target, orbitals.getIterativeIndex()); + proj_mat_work_->setDM(target); proj_mat_work_->computeOccupationsFromDM(); if (ct.verbose > 2) proj_mat_work_->printOccupations(os_); const double nel = proj_mat_work_->getNel(); @@ -353,7 +353,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) { *work_ = target; } - proj_mat_work_->setDM(*work_, orbitals.getIterativeIndex()); + proj_mat_work_->setDM(*work_); if (inner_it < n_inner_steps_ - 1) { @@ -373,7 +373,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) ProjectedMatrices* projmatrices = dynamic_cast*>( orbitals.getProjMatrices()); - projmatrices->setDM(*work_, orbitals.getIterativeIndex()); + projmatrices->setDM(*work_); projmatrices->setEigenvalues(proj_mat_work_->getEigenvalues()); projmatrices->assignH(proj_mat_work_->getH()); projmatrices->setHB2H(); diff --git a/src/NonOrthoDMStrategy.cc b/src/NonOrthoDMStrategy.cc index 3835b039..28a8f1d5 100644 --- a/src/NonOrthoDMStrategy.cc +++ b/src/NonOrthoDMStrategy.cc @@ -31,7 +31,7 @@ void NonOrthoDMStrategy::initialize(OrbitalsType& orbitals) (*MPIdata::sout) << "NonOrthoDMStrategy::initialize()..." << std::endl; } - proj_matrices_->updateDM(orbitals.getIterativeIndex()); + proj_matrices_->updateDM(); } template @@ -59,11 +59,11 @@ int NonOrthoDMStrategy::update(OrbitalsType& orbitals) if (mix_ < 1.) proj_matrices_->saveDM(); // compute new density matrix - proj_matrices_->updateDM(orbitals.getIterativeIndex()); + proj_matrices_->updateDM(); if (mix_ < 1.) { - proj_matrices_->updateDMwithRelax(mix_, orbitals.getIterativeIndex()); + proj_matrices_->updateDMwithRelax(mix_); } if (ct.verbose > 2) diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index 74ff01ed..87c9fd48 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -271,7 +271,7 @@ void ProjectedMatrices::setDMto2InvS() if (mmpi.instancePE0() && ct.verbose > 1) std::cout << "ProjectedMatrices::setDMto2InvS()..." << std::endl; - dm_->setto2InvS(gm_->getInverse(), gm_->getAssociatedOrbitalsIndex()); + dm_->setto2InvS(gm_->getInverse()); } template @@ -296,30 +296,28 @@ void ProjectedMatrices::solveGenEigenProblem( } template -void ProjectedMatrices::buildDM( - const MatrixType& z, const int orbitals_index) +void ProjectedMatrices::buildDM(const MatrixType& z) { - dm_->build(z, orbitals_index); + dm_->build(z); } template -void ProjectedMatrices::buildDM(const MatrixType& z, - const std::vector& occ, const int orbitals_index) +void ProjectedMatrices::buildDM( + const MatrixType& z, const std::vector& occ) { - dm_->build(z, occ, orbitals_index); + dm_->build(z, occ); } template void ProjectedMatrices::buildDM( - const std::vector& occ, const int orbitals_index) + const std::vector& occ) { - dm_->build(occ, orbitals_index); + dm_->build(occ); } // Use Chebyshev approximation to compute chemical potential and density matrix template -void ProjectedMatrices::updateDMwithChebApproximation( - const int iterative_index) +void ProjectedMatrices::updateDMwithChebApproximation() { MGmol_MPI& mmpi = *(MGmol_MPI::instance()); Control& ct = *(Control::instance()); @@ -354,15 +352,14 @@ void ProjectedMatrices::updateDMwithChebApproximation( } // compute chemical potential and density matrix with Chebyshev // approximation. - double final_mu = computeChemicalPotentialAndDMwithChebyshev( - order, emin, emax, iterative_index); + double final_mu + = computeChemicalPotentialAndDMwithChebyshev(order, emin, emax); if (mmpi.instancePE0() && ct.verbose > 1) std::cout << "Final mu_ = " << final_mu << " [Ha]" << std::endl; } template -void ProjectedMatrices::updateDMwithEigenstates( - const int iterative_index) +void ProjectedMatrices::updateDMwithEigenstates() { MGmol_MPI& mmpi = *(MGmol_MPI::instance()); Control& ct = *(Control::instance()); @@ -381,14 +378,14 @@ void ProjectedMatrices::updateDMwithEigenstates( // Build the density matrix X // X = Z * gamma * Z^T - buildDM(zz, iterative_index); + buildDM(zz); } //"replicated" implementation of SP2. // Theta is replicated on each MPI task, and SP2 solve run independently // by each MPI task template -void ProjectedMatrices::updateDMwithSP2(const int iterative_index) +void ProjectedMatrices::updateDMwithSP2() { MGmol_MPI& mmpi = *(MGmol_MPI::instance()); Control& ct = *(Control::instance()); @@ -433,21 +430,21 @@ void ProjectedMatrices::updateDMwithSP2(const int iterative_index) MatrixType dm("dm", dim_, dim_); sp2.getDM(dm, gm_->getInverse()); - dm_->setMatrix(dm, iterative_index); + dm_->setMatrix(dm); } template -void ProjectedMatrices::updateDM(const int iterative_index) +void ProjectedMatrices::updateDM() { Control& ct = *(Control::instance()); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); if (ct.DMEigensolver() == DMEigensolverType::Eigensolver) - updateDMwithEigenstates(iterative_index); + updateDMwithEigenstates(); else if (ct.DMEigensolver() == DMEigensolverType::Chebyshev) - updateDMwithChebApproximation(iterative_index); + updateDMwithChebApproximation(); else if (ct.DMEigensolver() == DMEigensolverType::SP2) - updateDMwithSP2(iterative_index); + updateDMwithSP2(); else { std::cerr << "Eigensolver not available in " @@ -470,7 +467,7 @@ void ProjectedMatrices::updateDM(const int iterative_index) template void ProjectedMatrices::updateDMwithEigenstatesAndRotate( - const int iterative_index, MatrixType& zz) + MatrixType& zz) { // solves generalized eigenvalue problem // and return solution in zz @@ -479,7 +476,7 @@ void ProjectedMatrices::updateDMwithEigenstatesAndRotate( rotateAll(zz, true); - dm_->build(zz, iterative_index); + dm_->build(zz); } template @@ -614,7 +611,7 @@ void ProjectedMatrices::dressupDM() if (mmpi.instancePE0()) std::cout << "ProjectedMatrices::dressupDM()" << std::endl; #endif - dm_->dressUpS(gm_->getCholeskyL(), gm_->getAssociatedOrbitalsIndex()); + dm_->dressUpS(gm_->getCholeskyL()); } template @@ -999,8 +996,7 @@ double ProjectedMatrices::computeTraceInvSmultMatMultTheta( template double ProjectedMatrices::computeChemicalPotentialAndDMwithChebyshev( - const int order, const double emin, const double emax, - const int iterative_index) + const int order, const double emin, const double emax) { assert(emax > emin); assert(nel_ >= 0.); @@ -1175,7 +1171,7 @@ ProjectedMatrices::computeChemicalPotentialAndDMwithChebyshev( dm.gemm('N', 'N', 1., tmp, gm_->getInverse(), 0.); double orbital_occupation = mmpi.nspin() > 1 ? 1. : 2.; dm.scal(orbital_occupation); - dm_->setMatrix(dm, iterative_index); + dm_->setMatrix(dm); return mu_; } diff --git a/src/ProjectedMatrices.h b/src/ProjectedMatrices.h index f2881763..142dbf70 100644 --- a/src/ProjectedMatrices.h +++ b/src/ProjectedMatrices.h @@ -80,8 +80,8 @@ class ProjectedMatrices : public ProjectedMatricesInterface { computeChemicalPotentialAndOccupations(width_, dim_); } - double computeChemicalPotentialAndDMwithChebyshev(const int order, - const double emin, const double emax, const int iterative_index); + double computeChemicalPotentialAndDMwithChebyshev( + const int order, const double emin, const double emax); protected: // indexes corresponding to valid function in each subdomain @@ -197,12 +197,9 @@ class ProjectedMatrices : public ProjectedMatricesInterface int getDMMatrixIndex() const override { assert(dm_); - return dm_->getOrbitalsIndex(); - } - void setDMuniform(const double nel, const int orbitals_index) override - { - dm_->setUniform(nel, orbitals_index); + return dm_->getIndex(); } + void setDMuniform(const double nel) override { dm_->setUniform(nel); } int dim() const { return dim_; } void computeInvS() override; @@ -280,10 +277,9 @@ class ProjectedMatrices : public ProjectedMatricesInterface } void setDMto2InvS() override; - void buildDM(const MatrixType& z, const int orbitals_index); - void buildDM(const MatrixType& z, const std::vector&, - const int orbitals_index); - void buildDM(const std::vector&, const int orbitals_index); + void buildDM(const MatrixType& z); + void buildDM(const MatrixType& z, const std::vector&); + void buildDM(const std::vector&); double getEigSum() override; double getExpectation(const MatrixType& A); @@ -323,12 +319,11 @@ class ProjectedMatrices : public ProjectedMatricesInterface int readDM(HDFrestart& h5f_file) override; int readWFDM(HDFrestart& h5f_file); void printEigenvalues(std::ostream& os) const; - void updateDM(const int iterative_index) override; - void updateDMwithEigenstates(const int iterative_index); - void updateDMwithSP2(const int iterative_index); - void updateDMwithEigenstatesAndRotate( - const int iterative_index, MatrixType& zz); - void updateDMwithChebApproximation(const int iterative_index) override; + void updateDM() override; + void updateDMwithEigenstates(); + void updateDMwithSP2(); + void updateDMwithEigenstatesAndRotate(MatrixType& zz); + void updateDMwithChebApproximation() override; void computeChemicalPotentialAndOccupations( const double width, const int max_numst) { @@ -354,16 +349,16 @@ class ProjectedMatrices : public ProjectedMatricesInterface void resetDM() override { - dm_->setMatrix(*mat_X_old_, 0); + dm_->setMatrix(*mat_X_old_); dm_->stripS(*mat_L_old_); } - void updateDMwithRelax(const double mix, const int itindex) override + void updateDMwithRelax(const double mix) override { // cout<<"ProjectedMatrices::updateDMwithRelax()..."<mix(mix, *mat_X_old_, itindex); + dm_->mix(mix, *mat_X_old_); } SquareLocalMatrices getReplicatedDM(); @@ -414,10 +409,7 @@ class ProjectedMatrices : public ProjectedMatricesInterface pmat.gemm('n', 'n', 1.0, mat, *theta_, 0.); } MatrixType& getMatHB() { return *matHB_; } - void setDM(const MatrixType& mat, const int orbitals_index) - { - dm_->setMatrix(mat, orbitals_index); - } + void setDM(const MatrixType& mat) { dm_->setMatrix(mat); } void setEigenvalues(const std::vector& eigenvalues) { memcpy(eigenvalues_.data(), eigenvalues.data(), diff --git a/src/ProjectedMatrices2N.cc b/src/ProjectedMatrices2N.cc index b0696f94..479ee4f0 100644 --- a/src/ProjectedMatrices2N.cc +++ b/src/ProjectedMatrices2N.cc @@ -39,7 +39,7 @@ void ProjectedMatrices2N::assignBlocksH( template void ProjectedMatrices2N::iterativeUpdateDMwithEigenstates( - const double occ_width, const int iterative_index, const bool flag_reduce_T) + const double occ_width, const bool flag_reduce_T) { MGmol_MPI& mmpi = *(MGmol_MPI::instance()); @@ -64,7 +64,7 @@ void ProjectedMatrices2N::iterativeUpdateDMwithEigenstates( (*MPIdata::sout) << "MVP target with mu = " << ProjectedMatricesInterface::mu_ << " [Ry]" << std::endl; - ProjectedMatrices::buildDM(*work2N_, iterative_index); + ProjectedMatrices::buildDM(*work2N_); } template class ProjectedMatrices2N>; diff --git a/src/ProjectedMatrices2N.h b/src/ProjectedMatrices2N.h index 69ca2838..a87bf83f 100644 --- a/src/ProjectedMatrices2N.h +++ b/src/ProjectedMatrices2N.h @@ -26,8 +26,8 @@ class ProjectedMatrices2N : public ProjectedMatrices void assignBlocksH(MatrixType&, MatrixType&, MatrixType&, MatrixType&); - void iterativeUpdateDMwithEigenstates(const double occ_width, - const int iterative_index, const bool flag_reduce_T = true); + void iterativeUpdateDMwithEigenstates( + const double occ_width, const bool flag_reduce_T = true); void diagonalizeDM(std::vector& occ, MatrixType& vect) { // we are assuming Gram matrix=identity diff --git a/src/ProjectedMatricesInterface.h b/src/ProjectedMatricesInterface.h index c52595e1..53508a2e 100644 --- a/src/ProjectedMatricesInterface.h +++ b/src/ProjectedMatricesInterface.h @@ -199,10 +199,10 @@ class ProjectedMatricesInterface : public ChebyshevApproximationFunction virtual double computeEntropy() = 0; virtual double getEigSum() = 0; - virtual void updateTheta() = 0; - virtual void computeInvB() = 0; - virtual void printGramMM(std::ofstream& tfile) = 0; - virtual void setDMuniform(const double nel, const int orbitals_index) = 0; + virtual void updateTheta() = 0; + virtual void computeInvB() = 0; + virtual void printGramMM(std::ofstream& tfile) = 0; + virtual void setDMuniform(const double nel) = 0; virtual double dotProductWithInvS( const SquareLocalMatrices& ss) @@ -258,10 +258,9 @@ class ProjectedMatricesInterface : public ChebyshevApproximationFunction } virtual void saveDM() { exitWithErrorMessage("saveDM"); } virtual void resetDM() { exitWithErrorMessage("resetDM"); } - virtual void updateDMwithRelax(const double mix, const int itindex) + virtual void updateDMwithRelax(const double mix) { (void)mix; - (void)itindex; exitWithErrorMessage("updateDMwithRelax"); } @@ -297,18 +296,11 @@ class ProjectedMatricesInterface : public ChebyshevApproximationFunction return 0; } - virtual void updateDMwithChebApproximation(const int iterative_index) + virtual void updateDMwithChebApproximation() { - (void)iterative_index; - exitWithErrorMessage("updateDMwithChebApproximation"); } - virtual void updateDM(const int iterative_index) - { - (void)iterative_index; - - exitWithErrorMessage("updateDM"); - } + virtual void updateDM() { exitWithErrorMessage("updateDM"); } virtual void setDMto2InvS() { exitWithErrorMessage("setDMto2InvS"); } virtual void initializeMatB( diff --git a/src/ProjectedMatricesSparse.h b/src/ProjectedMatricesSparse.h index fd3f6918..fdbb4f88 100644 --- a/src/ProjectedMatricesSparse.h +++ b/src/ProjectedMatricesSparse.h @@ -170,15 +170,12 @@ class ProjectedMatricesSparse : public ProjectedMatricesInterface /* scale H */ // (*matHB_).scale(vel_); } - void setDMuniform(const double nel, const int orbitals_index) override - { - dm_->setUniform(nel, orbitals_index); - } + void setDMuniform(const double nel) override { dm_->setUniform(nel); } int getDMMatrixIndex() const override { assert(dm_ != nullptr); - return dm_->getOrbitalsIndex(); + return dm_->getIndex(); } int getGramMatrixIndex() const { @@ -253,7 +250,7 @@ class ProjectedMatricesSparse : public ProjectedMatricesInterface void setDMto2InvS() override { assert(invS_ != nullptr); - dm_->setto2InvS(invS_->getInvS(), invS_->getGramMatrixOrbitalsIndex()); + dm_->setto2InvS(invS_->getInvS()); } void computeInvS() override diff --git a/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc b/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc index 1cf1cf88..a5a086fe 100644 --- a/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc +++ b/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc @@ -172,7 +172,7 @@ int main(int argc, char** argv) // // reset initial DM to test iterative solve for it - projmatrices->setDMuniform(ct.getNelSpin(), 0); + projmatrices->setDMuniform(ct.getNelSpin()); ct.dm_inner_steps = 50; eks = mgmol->evaluateDMandEnergyAndForces( &orbitals, positions, anumbers, forces); diff --git a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc index b1fb0094..058b0a8e 100644 --- a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc +++ b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc @@ -162,7 +162,7 @@ int main(int argc, char** argv) orbitals.setIterativeIndex(1); // set initial DM with uniform occupations - projmatrices->setDMuniform(ct.getNelSpin(), 0); + projmatrices->setDMuniform(ct.getNelSpin()); projmatrices->printDM(std::cout); // swap H and O to make sure order of atoms in list does not matter diff --git a/tests/testDensityMatrix.cc b/tests/testDensityMatrix.cc index 8a3649c1..6a65c995 100644 --- a/tests/testDensityMatrix.cc +++ b/tests/testDensityMatrix.cc @@ -95,10 +95,10 @@ TEST_CASE( // setup density matrix DensityMatrix dm(n); - dm.setMatrix(matK, 0); + dm.setMatrix(matK); dm.stripS(ls); - dm.dressUpS(ls, 1); + dm.dressUpS(ls); const MatrixType& newM = dm.getMatrix(); if (myrank == 0) std::cout << "new M" << std::endl; From 47cba98972fad2cc08e704e1736bb13ebd9fddcf Mon Sep 17 00:00:00 2001 From: "Siu Wun \"Tony\" Cheung" Date: Mon, 14 Apr 2025 07:34:23 -0700 Subject: [PATCH 46/99] Fixing DM tolerance (#341) * Fix Density Matrix tolerance read --------- Co-authored-by: Jean-Luc Fattebert --- src/Control.cc | 5 ++--- src/read_config.cc | 5 +---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/Control.cc b/src/Control.cc index 57e12336..208a8975 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -224,7 +224,8 @@ void Control::print(std::ostream& os) << conv_tol << std::endl; os << std::fixed; os << " Density matrix mixing = " << dm_mix << std::endl; - os << " Density matrix tol = " << dm_tol << std::endl; + os << std::setprecision(4) << std::scientific + << " Density matrix tol = " << dm_tol << std::endl; if (DMEigensolver() == DMEigensolverType::Eigensolver) { os << " Density matrix computation algorithm = " @@ -1743,8 +1744,6 @@ void Control::setOptions(const boost::program_options::variables_map& vm) else dm_algo_ = 2; - dm_tol = vm["DensityMatrix.tol"].as(); - str = vm["DensityMatrix.solver"].as(); if (str.compare("Mixing") == 0) DM_solver_ = 0; if (str.compare("MVP") == 0) DM_solver_ = 1; diff --git a/src/read_config.cc b/src/read_config.cc index ed0e66e1..54bf98f8 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -321,10 +321,7 @@ int read_config(int argc, char** argv, po::variables_map& vm, po::value()->default_value(100), "Maximum number of iterations for power method " "to compute interval for Chebyshev " - "approximation of density matrix. ")("DensityMatrix.tol", - po::value()->default_value(1.e-7), - "tolerance, used in iterative DM computation convergence " - "criteria"); + "approximation of density matrix. "); po::options_description cmdline_options; cmdline_options.add(generic); From ffc087cd7396ed0b9247a864cfe3df44794f5ac7 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 11 Jul 2025 18:04:04 -0400 Subject: [PATCH 47/99] Rm oversubscribe from CMakeLists.txt (#346) * already set in build script --- tests/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index d90b9ac0..61f4c3e5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -547,7 +547,7 @@ add_test(NAME ReplicatedSP2 ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testMD_D72 COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/test.py - ${MPIEXEC} --oversubscribe ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/mgmol_quench.cfg ${CMAKE_CURRENT_SOURCE_DIR}/MD_D72/mgmol_md.cfg @@ -601,7 +601,7 @@ add_test(NAME ChebyshevMVP if(NOT ${MGMOL_WITH_MAGMA}) add_test(NAME testShortSighted COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/ShortSighted/test.py - ${MPIEXEC} --oversubscribe ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt ${CMAKE_CURRENT_SOURCE_DIR}/ShortSighted/quench.cfg ${CMAKE_CURRENT_SOURCE_DIR}/ShortSighted/md.cfg From 089787685b10c5261d43212f90c2accd0b243f86 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 14 Jul 2025 13:38:48 -0400 Subject: [PATCH 48/99] Adapt to new environment on condo @ ORNL (#348) --- scripts/build_condo-mod.sh | 8 +++----- scripts/build_condo.sh | 21 +++++---------------- scripts/modules.condo | 9 ++++----- scripts/modules.condo-mod | 10 ++++------ 4 files changed, 16 insertions(+), 32 deletions(-) diff --git a/scripts/build_condo-mod.sh b/scripts/build_condo-mod.sh index d9abe034..4dfe5fc6 100755 --- a/scripts/build_condo-mod.sh +++ b/scripts/build_condo-mod.sh @@ -3,9 +3,6 @@ #Before compiling, load the following modules: source scripts/modules.condo-mod -# We need to define the cmake blas vendor option here to find the right one. -BLAS_VENDOR=OpenBLAS - MGMOL_ROOT=`pwd` INSTALL_DIR=${MGMOL_ROOT}/install @@ -15,16 +12,17 @@ BUILD_DIR=${MGMOL_ROOT}/build mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} +SCALAPACK_DIR=/home/q8j/Software/ScaLapack/scalapack-2.2.2 + # call cmake cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=mpiCC \ -DCMAKE_Fortran_COMPILER=mpif77 \ - -DBLA_VENDOR=${BLAS_VENDOR} \ -DMGMOL_USE_HDF5P=OFF \ -DMGMOL_WITH_CLANG_FORMAT=ON \ -DCMAKE_PREFIX_PATH=${HOME}/bin \ - -DSCALAPACK_LIBRARY="${SCALAPACK_DIR}/lib/libscalapack.a;/lib64/libgfortran.so.3" \ + -DSCALAPACK_LIBRARY="${SCALAPACK_DIR}/lib/libscalapack.a;/lib64/libgfortran.so.5" \ -DMPIEXEC_EXECUTABLE=${OPENMPI_DIR}/bin/mpiexec \ .. diff --git a/scripts/build_condo.sh b/scripts/build_condo.sh index 3a349cbe..68aa16dc 100755 --- a/scripts/build_condo.sh +++ b/scripts/build_condo.sh @@ -1,41 +1,30 @@ #/bin/bash ## An example script to build on ONRL condo systems (CADES). -## This script assumes intel/ mkl libraries are being used. #Before compiling, load the following modules: source scripts/modules.condo -# set some environment variables using loaded module path -export SCALAPACK_ROOT=${MKLROOT} - -# We need to define the cmake blas vendor option here to find the right one. -BLAS_VENDOR=Intel10_64lp - -# manually set the location of BLACS libraries for scalapack -BLACS_LIB=${MKLROOT}/lib/intel64 - MGMOL_ROOT=`pwd` -INSTALL_DIR=${MGMOL_ROOT}/mgmol_install +INSTALL_DIR=${MGMOL_ROOT}/install mkdir -p ${INSTALL_DIR} BUILD_DIR=${MGMOL_ROOT}/build mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} +SCALAPACK_DIR=/home/q8j/Software/ScaLapack/scalapack-2.2.2 + # call cmake cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=mpiCC \ -DCMAKE_Fortran_COMPILER=mpif77 \ - -DHDF5_LIBRARIES=${HDF5_DIR}/lib/libhdf5.so \ - -DHDF5_HL_LIBRARIES=${HDF5_DIR}/lib/libhdf5_hl.so \ - -DHDF5_INCLUDE_DIRS=${HDF5_DIR}/include \ - -DBLA_VENDOR=${BLAS_VENDOR} \ + -DMGMOL_USE_HDF5P=OFF \ -DMGMOL_WITH_CLANG_FORMAT=ON \ -DCMAKE_PREFIX_PATH=${HOME}/bin \ + -DSCALAPACK_LIBRARY="${SCALAPACK_DIR}/lib/libscalapack.a;/lib64/libgfortran.so.5" \ -DMPIEXEC_EXECUTABLE=${OPENMPI_DIR}/bin/mpiexec \ - -DSCALAPACK_BLACS_LIBRARY=${BLACS_LIB}/libmkl_blacs_openmpi_lp64.so \ .. # call make install diff --git a/scripts/modules.condo b/scripts/modules.condo index 6f27e62b..de36562f 100644 --- a/scripts/modules.condo +++ b/scripts/modules.condo @@ -1,6 +1,5 @@ -module load PE-intel/3.0 -module load boost/1.67.0-pe3 -module load mkl -module load hdf5_parallel/1.10.3 -module load cmake/3.18.4 +module load hdf5 +module load boost +module load cmake module load python +module load openblas diff --git a/scripts/modules.condo-mod b/scripts/modules.condo-mod index 18377905..de36562f 100644 --- a/scripts/modules.condo-mod +++ b/scripts/modules.condo-mod @@ -1,7 +1,5 @@ -module load PE-gnu/3.0 -module load hdf5-parallel/1.8.20 +module load hdf5 module load boost -module load cmake/3.20.3 -module load python/3.6.6 -module load openBLAS/0.2.19 -module load scalapack/2.0.2 +module load cmake +module load python +module load openblas From 7d89625fd44c9035284f7c8877c26184d8e3f0c4 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 15 Jul 2025 10:24:48 -0400 Subject: [PATCH 49/99] Remove unused function --- src/MGmol.h | 1 - src/readInput.cc | 42 ------------------------------------------ 2 files changed, 43 deletions(-) diff --git a/src/MGmol.h b/src/MGmol.h index c85e7b59..9ce4b81f 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -225,7 +225,6 @@ class MGmol : public MGmolInterface void printEigAndOcc(); - int readCoordinates(std::ifstream* tfile, const bool cell_relative); int readCoordinates(const std::string& filename, const bool cell_relative); double computeConstraintResidual(OrbitalsType& orbitals, const OrbitalsType& hphi, OrbitalsType& res, const bool print_residual, diff --git a/src/readInput.cc b/src/readInput.cc index 79fbecf1..8c9f9e03 100644 --- a/src/readInput.cc +++ b/src/readInput.cc @@ -162,48 +162,6 @@ int MGmol::readLRsFromInput(std::ifstream* tfile) return ct.numst; } -template -int MGmol::readCoordinates( - std::ifstream* tfile, const bool cell_relative) -{ - Control& ct = *(Control::instance()); - if (ct.verbose > 0) printWithTimeStamp("Read atomic coordinates...", os_); - Mesh* mymesh = Mesh::instance(); - const pb::Grid& mygrid = mymesh->grid(); - - const double lattice[3] = { mygrid.ll(0), mygrid.ll(1), mygrid.ll(2) }; - - // setup ions - const std::vector& sp(ct.getSpecies()); - ions_.reset(new Ions(lattice, sp)); - - if (ct.restart_info > 0 - && ct.override_restart == 0) // read restart ionic positions - { - if (ct.restart_info > 0) - { - if (onpe0 && ct.verbose > 0) - { - os_ << "Initialize ionic positions from restart file " - << ct.restart_file << std::endl; - } - ions_->initFromRestartFile(*h5f_file_); - } - } - else - { - // Coordinates and species type for each ion. - int info = ions_->readAtoms(tfile, cell_relative); - - return info; - } - - const int num_ions = ions_->getNumIons(); - if (onpe0) os_ << num_ions << " ions in simulation" << std::endl; - - return 0; -} - template int MGmol::readCoordinates( const std::string& filename, const bool cell_relative) From c0dd3b32b157fe47e12286745afef2c63de63715 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 15 Jul 2025 11:41:11 -0400 Subject: [PATCH 50/99] Clean up use of coords file when restarting * use coordinates file to override atomic coordinates when using restart wavefunctions --- src/Control.cc | 74 +++++++++++++++++++------------------- src/Control.h | 1 - src/main.cc | 10 +++--- src/md.cc | 2 +- src/readInput.cc | 21 +++++------ src/read_config.cc | 17 +++++---- tests/LBFGS/test.py | 2 +- tests/MD_D72/test.py | 2 +- tests/MD_MVP/test.py | 2 +- tests/RhoVhRestart/test.py | 4 +-- 10 files changed, 64 insertions(+), 71 deletions(-) diff --git a/src/Control.cc b/src/Control.cc index 208a8975..2980ce33 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -112,7 +112,6 @@ Control::Control() out_restart_file_type = -1; spread_radius = -1.; iprint_residual = -1; - override_restart = -1; dot_product_type = -1; spread_penalty_damping_ = -1; spread_penalty_alpha_ = -1; @@ -386,7 +385,7 @@ void Control::sync(void) short_buffer[47] = out_restart_file_naming_strategy; short_buffer[48] = enforceVmass0; short_buffer[49] = dm_inner_steps; - short_buffer[50] = override_restart; + short_buffer[50] = -1; short_buffer[51] = fgmres_kim; short_buffer[52] = fgmres_maxits; short_buffer[53] = ilu_type; @@ -599,41 +598,41 @@ void Control::sync(void) out_restart_file_naming_strategy = short_buffer[47]; enforceVmass0 = short_buffer[48]; dm_inner_steps = short_buffer[49]; - override_restart = short_buffer[50]; - fgmres_kim = short_buffer[51]; - fgmres_maxits = short_buffer[52]; - ilu_type = short_buffer[53]; - ilu_lof = short_buffer[54]; - ilu_maxfil = short_buffer[55]; - coloring_algo_ = short_buffer[56]; - diel_flag_ = short_buffer[57]; - poisson_pc_nu1 = short_buffer[58]; - poisson_pc_nu2 = short_buffer[59]; - poisson_pc_nlev = short_buffer[60]; - system_charge_ = short_buffer[61]; - md_print_freq = short_buffer[62]; - use_kernel_functions = short_buffer[63]; - ngpts_[0] = short_buffer[64]; - ngpts_[1] = short_buffer[65]; - ngpts_[2] = short_buffer[66]; - computeCondGram_ = short_buffer[67]; - lrs_extrapolation = short_buffer[68]; - parallel_transport = (bool)short_buffer[69]; - with_spin_ = (bool)short_buffer[70]; - conv_criterion_ = short_buffer[71]; - load_balancing_max_iterations = short_buffer[72]; - load_balancing_modulo = short_buffer[73]; - write_clusters = short_buffer[74]; - DM_solver_ = short_buffer[75]; - dm_algo_ = short_buffer[80]; - dm_approx_order = short_buffer[81]; - dm_approx_ndigits = short_buffer[82]; - dm_approx_power_maxits = short_buffer[83]; - spread_penalty_type_ = short_buffer[84]; - dm_use_old_ = short_buffer[85]; - max_electronic_steps_tight_ = short_buffer[86]; - hartree_reset_ = short_buffer[88]; - MD_last_step_ = short_buffer[89]; + //... = short_buffer[50]; + fgmres_kim = short_buffer[51]; + fgmres_maxits = short_buffer[52]; + ilu_type = short_buffer[53]; + ilu_lof = short_buffer[54]; + ilu_maxfil = short_buffer[55]; + coloring_algo_ = short_buffer[56]; + diel_flag_ = short_buffer[57]; + poisson_pc_nu1 = short_buffer[58]; + poisson_pc_nu2 = short_buffer[59]; + poisson_pc_nlev = short_buffer[60]; + system_charge_ = short_buffer[61]; + md_print_freq = short_buffer[62]; + use_kernel_functions = short_buffer[63]; + ngpts_[0] = short_buffer[64]; + ngpts_[1] = short_buffer[65]; + ngpts_[2] = short_buffer[66]; + computeCondGram_ = short_buffer[67]; + lrs_extrapolation = short_buffer[68]; + parallel_transport = (bool)short_buffer[69]; + with_spin_ = (bool)short_buffer[70]; + conv_criterion_ = short_buffer[71]; + load_balancing_max_iterations = short_buffer[72]; + load_balancing_modulo = short_buffer[73]; + write_clusters = short_buffer[74]; + DM_solver_ = short_buffer[75]; + dm_algo_ = short_buffer[80]; + dm_approx_order = short_buffer[81]; + dm_approx_ndigits = short_buffer[82]; + dm_approx_power_maxits = short_buffer[83]; + spread_penalty_type_ = short_buffer[84]; + dm_use_old_ = short_buffer[85]; + max_electronic_steps_tight_ = short_buffer[86]; + hartree_reset_ = short_buffer[88]; + MD_last_step_ = short_buffer[89]; poisson_lap_type_ = static_cast(short_buffer[90]); numst = int_buffer[0]; @@ -1787,7 +1786,6 @@ void Control::setOptions(const boost::program_options::variables_map& vm) // options not available in configure file lr_updates_type = 0; precond_factor_computed = false; - override_restart = 0; mix_pot = 1.; project_out_psd = 0; multipole_order = 1; diff --git a/src/Control.h b/src/Control.h index 9481cc24..c9a128b3 100644 --- a/src/Control.h +++ b/src/Control.h @@ -555,7 +555,6 @@ class Control short out_restart_file_naming_strategy; short restart_file_type; short out_restart_file_type; - short override_restart; short verbose; diff --git a/src/main.cc b/src/main.cc index 3c6830a0..621b72de 100644 --- a/src/main.cc +++ b/src/main.cc @@ -58,7 +58,7 @@ int main(int argc, char** argv) /* * read runtime parameters */ - std::string input_filename(""); + std::string coords_filename(""); std::string lrs_filename; std::string constraints_filename(""); @@ -70,7 +70,7 @@ int main(int argc, char** argv) // read from PE0 only if (MPIdata::onpe0) { - read_config(argc, argv, vm, input_filename, lrs_filename, + read_config(argc, argv, vm, coords_filename, lrs_filename, constraints_filename, total_spin, with_spin); } @@ -89,7 +89,7 @@ int main(int argc, char** argv) int ret = ct.checkOptions(); if (ret < 0) return ret; - mmpi.bcastGlobal(input_filename); + mmpi.bcastGlobal(coords_filename); mmpi.bcastGlobal(lrs_filename); // Enter main scope @@ -97,10 +97,10 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + coords_filename, lrs_filename, constraints_filename); else mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + coords_filename, lrs_filename, constraints_filename); mgmol->setup(); diff --git a/src/md.cc b/src/md.cc index edccdb17..b34ca8c8 100644 --- a/src/md.cc +++ b/src/md.cc @@ -333,7 +333,7 @@ void MGmol::md(OrbitalsType** orbitals, Ions& ions) constraints_->printConstraints(os_); - if (ct.restart_info > 0 && !ct.override_restart) + if (ct.restart_info > 0) { if (onpe0) os_ << "Use restart file to initialize MD..." << std::endl; stepper->init(*h5f_file_); diff --git a/src/readInput.cc b/src/readInput.cc index 8c9f9e03..bd6a7dd1 100644 --- a/src/readInput.cc +++ b/src/readInput.cc @@ -164,7 +164,7 @@ int MGmol::readLRsFromInput(std::ifstream* tfile) template int MGmol::readCoordinates( - const std::string& filename, const bool cell_relative) + const std::string& coords_filename, const bool cell_relative) { Control& ct = *(Control::instance()); if (ct.verbose > 0) printWithTimeStamp("Read atomic coordinates...", os_); @@ -177,29 +177,26 @@ int MGmol::readCoordinates( const std::vector& sp(ct.getSpecies()); ions_.reset(new Ions(lattice, sp)); - if (ct.restart_info > 0 - && ct.override_restart == 0) // read restart ionic positions + if (ct.restart_info > 0 && coords_filename.empty()) { - if (ct.restart_info > 0) + // read restart atomic positions + if (onpe0 && ct.verbose > 0) { - if (onpe0 && ct.verbose > 0) - { - os_ << "Initialize ionic positions from restart file " - << ct.restart_file << std::endl; - } - ions_->initFromRestartFile(*h5f_file_); + os_ << "Initialize atomic positions from restart file " + << ct.restart_file << std::endl; } + ions_->initFromRestartFile(*h5f_file_); } else { // Coordinates and species type for each ion. - int info = ions_->readAtoms(filename, cell_relative); + int info = ions_->readAtoms(coords_filename, cell_relative); return info; } const int num_ions = ions_->getNumIons(); - if (onpe0) os_ << num_ions << " ions in simulation" << std::endl; + if (onpe0) os_ << num_ions << " atoms in simulation" << std::endl; return 0; } diff --git a/src/read_config.cc b/src/read_config.cc index 54bf98f8..3ca9ec69 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -19,7 +19,7 @@ namespace po = boost::program_options; int read_config(int argc, char** argv, po::variables_map& vm, - std::string& input_file, std::string& lrs_filename, + std::string& coords_filename, std::string& lrs_filename, std::string& constraints_filename, float& total_spin, bool& with_spin) { // use configure file if it can be found @@ -35,9 +35,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, "help,h", "produce help message")("check", "check input")( "config,c", po::value(&config_file)->default_value("mgmol.cfg"), - "name of a file of a configuration.")("atomicCoordinates,i", - po::value>(), - "coordinates filename")("LRsFilename,l", + "name of configuration file")("atomicCoordinates,i", + po::value(&coords_filename), + "atomic coordinates filename")("LRsFilename,l", po::value(&lrs_filename), "LRs filename"); // Declare a group of options (with default when appropriate) that @@ -149,9 +149,8 @@ int read_config(int argc, char** argv, po::variables_map& vm, "Tolerance on forces for Geometry optimization")( "GeomOpt.max_steps", po::value()->default_value(1), "max. number of Geometry optimization steps")("GeomOpt.dt", - po::value(), "Delta t for trial pseudo-time steps")( - "atomicCoordinates", po::value>(), - "coordinates filename")("Thermostat.type", + po::value(), + "Delta t for trial pseudo-time steps")("Thermostat.type", po::value()->default_value("Langevin"), "Thermostat type")("Thermostat.temperature", po::value()->default_value(-1.), "Thermostat temperature")( @@ -379,9 +378,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, } if (vm.count("atomicCoordinates")) { - input_file + coords_filename = vm["atomicCoordinates"].as>()[0]; - std::cout << "Input files is: " << input_file << "\n"; + std::cout << "Coordinates files is: " << coords_filename << "\n"; } else { diff --git a/tests/LBFGS/test.py b/tests/LBFGS/test.py index 324fd129..7150b3ce 100755 --- a/tests/LBFGS/test.py +++ b/tests/LBFGS/test.py @@ -56,7 +56,7 @@ os.remove('wave.out') os.symlink(restart_file, 'wave.out') -command = "{} {} -c {} -i {} -l {}".format(mpicmd,exe,inp2,coords,lrs) +command = "{} {} -c {} -l {}".format(mpicmd,exe,inp2,lrs) print(command) output = subprocess.check_output(command,shell=True) diff --git a/tests/MD_D72/test.py b/tests/MD_D72/test.py index 3f98b2bd..22773b6b 100755 --- a/tests/MD_D72/test.py +++ b/tests/MD_D72/test.py @@ -54,7 +54,7 @@ os.remove('wave.out') os.symlink(restart_file, 'wave.out') -command = "{} {} -c {} -i {}".format(mpicmd,exe,inp2,coords) +command = "{} {} -c {}".format(mpicmd,exe,inp2) output2 = subprocess.check_output(command,shell=True) #remove created files diff --git a/tests/MD_MVP/test.py b/tests/MD_MVP/test.py index 47b5462d..c767031e 100755 --- a/tests/MD_MVP/test.py +++ b/tests/MD_MVP/test.py @@ -51,7 +51,7 @@ os.rename(restart_file, 'snapshotMVP') #run MGmol - command = "{} {} -c {} -i {}".format(mpicmd,exe,inp2,coords) + command = "{} {} -c {}".format(mpicmd,exe,inp2) output2 = subprocess.check_output(command,shell=True) #remove used restart files diff --git a/tests/RhoVhRestart/test.py b/tests/RhoVhRestart/test.py index a34b962f..0d14383e 100755 --- a/tests/RhoVhRestart/test.py +++ b/tests/RhoVhRestart/test.py @@ -53,7 +53,7 @@ sys.exit(1) #run MD -command = "{} {} -c {} -i {}".format(mpicmd,mgmol_exe,input2,coords) +command = "{} {} -c {}".format(mpicmd,mgmol_exe,input2) print("Run command: {}".format(command)) output = subprocess.check_output(command,shell=True) lines=output.split(b'\n') @@ -68,7 +68,7 @@ sys.exit(1) #run test -command = "{} {} -c {} -i {}".format(mpicmd,test_exe,input3,coords) +command = "{} {} -c {}".format(mpicmd,test_exe,input3) print("Run command: {}".format(command)) output = subprocess.check_output(command,shell=True) lines=output.split(b'\n') From a8be389ff3df7f15070d1d29344b7e20c7145ee0 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 15 Jul 2025 13:16:26 -0400 Subject: [PATCH 51/99] Add test restart with override atomic coordinates --- tests/CMakeLists.txt | 9 ++++ tests/OverrideCoords/mgmol.cfg | 27 +++++++++++ tests/OverrideCoords/restart.cfg | 24 +++++++++ tests/OverrideCoords/shifted.xyz | 8 +++ tests/OverrideCoords/sih4.xyz | 8 +++ tests/OverrideCoords/test.py | 83 ++++++++++++++++++++++++++++++++ 6 files changed, 159 insertions(+) create mode 100644 tests/OverrideCoords/mgmol.cfg create mode 100644 tests/OverrideCoords/restart.cfg create mode 100644 tests/OverrideCoords/shifted.xyz create mode 100644 tests/OverrideCoords/sih4.xyz create mode 100755 tests/OverrideCoords/test.py diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 61f4c3e5..01805cd7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -456,6 +456,15 @@ add_test(NAME testSiH4 ${CMAKE_CURRENT_SOURCE_DIR}/SiH4/mgmol.cfg ${CMAKE_CURRENT_SOURCE_DIR}/SiH4/sih4.xyz ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testOverrideCoords + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/OverrideCoords/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/OverrideCoords/mgmol.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/OverrideCoords/restart.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/OverrideCoords/sih4.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/OverrideCoords/shifted.xyz + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testCl2 COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/Cl2_ONCVPSP_LDA/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} diff --git a/tests/OverrideCoords/mgmol.cfg b/tests/OverrideCoords/mgmol.cfg new file mode 100644 index 00000000..fad77b2d --- /dev/null +++ b/tests/OverrideCoords/mgmol.cfg @@ -0,0 +1,27 @@ +verbosity=2 +xcFunctional=LDA +[Mesh] +nx=40 +ny=40 +nz=40 +[Domain] +ox=-6.75 +oy=-6.75 +oz=-6.75 +lx=13.5 +ly=13.5 +lz=13.5 +[Potentials] +pseudopotential=pseudo.Si +pseudopotential=pseudo.H +[Run] +type=QUENCH +[Quench] +max_steps=45 +atol=1.e-9 +num_lin_iterations=2 +[Orbitals] +initial_type=Gaussian +initial_width=2. +[Restart] +output_level=3 diff --git a/tests/OverrideCoords/restart.cfg b/tests/OverrideCoords/restart.cfg new file mode 100644 index 00000000..0715e6b1 --- /dev/null +++ b/tests/OverrideCoords/restart.cfg @@ -0,0 +1,24 @@ +verbosity=2 +xcFunctional=LDA +[Mesh] +nx=40 +ny=40 +nz=40 +[Domain] +ox=-6.75 +oy=-6.75 +oz=-6.75 +lx=13.5 +ly=13.5 +lz=13.5 +[Potentials] +pseudopotential=pseudo.Si +pseudopotential=pseudo.H +[Run] +type=QUENCH +[Quench] +max_steps=25 +atol=1.e-8 +[Restart] +input_level=3 +input_filename=wave.out diff --git a/tests/OverrideCoords/shifted.xyz b/tests/OverrideCoords/shifted.xyz new file mode 100644 index 00000000..089658f8 --- /dev/null +++ b/tests/OverrideCoords/shifted.xyz @@ -0,0 +1,8 @@ +5 +SiH4 molecule (coordinates in Angstrom) +Si 0.0 0.0 0.0 +H 0.885 0.885 0.935 +H -0.885 -0.885 0.885 +H -0.885 0.885 -0.885 +H 0.885 -0.885 -0.885 + diff --git a/tests/OverrideCoords/sih4.xyz b/tests/OverrideCoords/sih4.xyz new file mode 100644 index 00000000..b3f921e3 --- /dev/null +++ b/tests/OverrideCoords/sih4.xyz @@ -0,0 +1,8 @@ +5 +SiH4 molecule (coordinates in Angstrom) +Si 0.0 0.0 0.0 +H 0.885 0.885 0.885 +H -0.885 -0.885 0.885 +H -0.885 0.885 -0.885 +H 0.885 -0.885 -0.885 + diff --git a/tests/OverrideCoords/test.py b/tests/OverrideCoords/test.py new file mode 100755 index 00000000..2de3fd4f --- /dev/null +++ b/tests/OverrideCoords/test.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +import sys +import os +import subprocess +import string +import shutil + +print("Test Override coordinates...") + +nargs=len(sys.argv) + +mpicmd = sys.argv[1]+" "+sys.argv[2]+" "+sys.argv[3] +for i in range(4,nargs-6): + mpicmd = mpicmd + " "+sys.argv[i] +print("MPI run command: {}".format(mpicmd)) + +exe = sys.argv[nargs-6] +inp1 = sys.argv[nargs-5] +inp2 = sys.argv[nargs-4] +coords1 = sys.argv[nargs-3] +coords2 = sys.argv[nargs-2] +print("coordinates file: %s"%coords1) + +#create links to potentials files +dst1 = 'pseudo.Si' +dst2 = 'pseudo.H' +src1 = sys.argv[nargs-1] + '/' + dst1 +src2 = sys.argv[nargs-1] + '/' + dst2 + +if not os.path.exists(dst1): + print("Create link to %s"%dst1) + os.symlink(src1, dst1) +if not os.path.exists(dst2): + print("Create link to %s"%dst2) + os.symlink(src2, dst2) + +#run quench +command = "{} {} -c {} -i {}".format(mpicmd,exe,inp1,coords1) +print("Run command: {}".format(command)) +output1 = subprocess.check_output(command,shell=True) +lines=output1.split(b'\n') + +#analyse output of quench +for line in lines: + if line.count(b'%%'): + print(line) + +#run quench with shifted coordinates +command = "ls -ld snapshot* | awk '{ print $9 }' | tail -n1" +print(command) +restart_file = subprocess.check_output(command,shell=True) +restart_file=str(restart_file[:-1],'utf-8') +print(restart_file) + +try: + os.symlink(restart_file, 'wave.out') +except FileExistsError: + os.remove('wave.out') + os.symlink(restart_file, 'wave.out') + +command = "{} {} -c {} -i {}".format(mpicmd,exe,inp2,coords2) +print(command) +output2 = subprocess.check_output(command,shell=True) + +#remove created files +shutil.rmtree(restart_file) +os.remove('wave.out') + +#analyse mgmol standard output +lines=output2.split(b'\n') + +flag = 0 +for line in lines: + if line.count(b'%%'): + print(line) + if line.count(b'achieved'): + flag=1 + +if flag==0: + print("second run did not converge...") + sys.exit(1) + +sys.exit(0) From de6eb6baa56577fb73217d175ac1d7c6df6c3f5e Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 14 Jul 2025 11:26:05 -0400 Subject: [PATCH 52/99] Add some missing include files * required for gcc12 --- tests/testSetGhostValues.cc | 2 ++ tests/testTradeGhostValues.cc | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/testSetGhostValues.cc b/tests/testSetGhostValues.cc index c3680087..90aafde0 100644 --- a/tests/testSetGhostValues.cc +++ b/tests/testSetGhostValues.cc @@ -12,6 +12,8 @@ #include "catch.hpp" +#include + TEST_CASE("Set ghost values", "[set ghosts") { const double origin[3] = { 0., 0., 0. }; diff --git a/tests/testTradeGhostValues.cc b/tests/testTradeGhostValues.cc index 069b9374..8294aaec 100644 --- a/tests/testTradeGhostValues.cc +++ b/tests/testTradeGhostValues.cc @@ -13,6 +13,8 @@ #include "catch.hpp" +#include + // function of periodicity nx, ny, nz double cos3(const int i, const int j, const int k, const int nx, const int ny, const int nz) From 1b06e02b7d48e5a2b7530c96b7762a210f29a386 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 22 Jul 2025 16:34:00 -0400 Subject: [PATCH 53/99] Print MVP timers --- src/MGmol.cc | 4 ++++ src/MVPSolver.cc | 8 ++------ src/MVPSolver.h | 3 +-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index b3db1b77..d895aabc 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -45,6 +45,7 @@ #include "MGmol.h" #include "MLWFTransform.h" #include "MPIdata.h" +#include "MVPSolver.h" #include "MasksSet.h" #include "Mesh.h" #include "OrbitalsPreconditioning.h" @@ -900,6 +901,9 @@ void MGmol::printTimers() AndersonMix::update_tm().print(os_); proj_matrices_->printTimers(os_); ShortSightedInverse::printTimers(os_); + if (std::is_same::value) + MVPSolver>::printTimers(os_); VariableSizeMatrixInterface::printTimers(os_); DataDistribution::printTimers(os_); PackedCommunicationBuffer::printTimers(os_); diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 4f9165a5..699a01f3 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -398,12 +398,8 @@ int MVPSolver::solve(OrbitalsType& orbitals) template void MVPSolver::printTimers(std::ostream& os) { - if (onpe0) - { - os << std::setprecision(2) << std::fixed << std::endl; - solve_tm_.print(os); - target_tm_.print(os); - } + solve_tm_.print(os); + target_tm_.print(os); } template class MVPSolver class ProjectedMatrices; @@ -67,7 +66,7 @@ class MVPSolver ~MVPSolver(); int solve(OrbitalsType& orbitals); - void printTimers(std::ostream& os); + static void printTimers(std::ostream& os); }; #endif From 1bada0b413eda5bcc0082ca3de86ad072173580c Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 11 Aug 2025 21:57:11 -0400 Subject: [PATCH 54/99] Fix initialization for Dirichelt BC * was not correct for other than Gaussian functions initialization --- src/ExtendedGridOrbitals.cc | 6 ++++++ tests/CMakeLists.txt | 1 - tests/SpinO2LDA/mgmol.cfg | 2 +- tests/SpinO2LDA/test.py | 9 ++++----- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 3e887dad..91862141 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -1827,6 +1827,12 @@ void ExtendedGridOrbitals::initWF( } } + // needs to mask one layer of values when using 0 BC for + // wavefunctions the next two lines do that + setDataWithGhosts(); + trade_boundaries(); + setToDataWithGhosts(); + resetIterativeIndex(); if (onpe0 && ct.verbose > 2) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 01805cd7..ddb8d230 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -500,7 +500,6 @@ add_test(NAME testSpinO2LDA ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2LDA/mgmol.cfg ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2LDA/coords.in - ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2LDA/lrs.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testMVP COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MVP/test.py diff --git a/tests/SpinO2LDA/mgmol.cfg b/tests/SpinO2LDA/mgmol.cfg index ee2385e0..11326e2f 100644 --- a/tests/SpinO2LDA/mgmol.cfg +++ b/tests/SpinO2LDA/mgmol.cfg @@ -22,7 +22,7 @@ max_steps=100 atol=1.e-7 MLWC=true [Orbitals] -initial_type=Gaussian +initial_type=Random initial_width=1. bcx=0 bcy=0 diff --git a/tests/SpinO2LDA/test.py b/tests/SpinO2LDA/test.py index d6fa4242..77557ff0 100755 --- a/tests/SpinO2LDA/test.py +++ b/tests/SpinO2LDA/test.py @@ -13,10 +13,9 @@ mpicmd = mpicmd + " "+sys.argv[i] print("MPI run command: {}".format(mpicmd)) -exe = sys.argv[nargs-5] -inp = sys.argv[nargs-4] -coords = sys.argv[nargs-3] -lrs = sys.argv[nargs-2] +exe = sys.argv[nargs-4] +inp = sys.argv[nargs-3] +coords = sys.argv[nargs-2] print("coordinates file: %s"%coords) #create links to potentials files @@ -28,7 +27,7 @@ os.symlink(src1, dst1) #run mgmol -command = "{} {} -c {} -i {} -l {}".format(mpicmd,exe,inp,coords,lrs) +command = "{} {} -c {} -i {}".format(mpicmd,exe,inp,coords) print("Run command: {}".format(command)) output = subprocess.check_output(command,shell=True) From bd702b06f728093d0d1c0d5d723ec4645a6eda4b Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Tue, 29 Jul 2025 15:19:57 -0400 Subject: [PATCH 55/99] Modify two utils scripts --- util/compareForces.py | 118 +++++++++++----------------------- util/plotConvergenceEnergy.py | 62 ++++++++++-------- 2 files changed, 74 insertions(+), 106 deletions(-) diff --git a/util/compareForces.py b/util/compareForces.py index 829300a3..0297d5e8 100644 --- a/util/compareForces.py +++ b/util/compareForces.py @@ -11,6 +11,7 @@ #------------------------------------------------------------------------------- import sys, string from math import sqrt +import matplotlib.pyplot as plt input1=open(sys.argv[1],'r') input2=open(sys.argv[2],'r') @@ -20,28 +21,20 @@ frame=eval(sys.argv[3]) print( 'Input argument: Frame=',frame ) -L1=input1.readlines() -L2=input2.readlines() - -star='*' +lines1=input1.readlines() +lines2=input2.readlines() ############################################## # count number atoms def getNumAtoms(lines): - searchterm1='## ' - searchterm2='FORCES' - searchterm3='Forces' found_current_line=0 already_found_one=0 na=0 flag=0 for line in lines: ## loop over lines of file - num_matches1 = line.count(searchterm1) - num_matches2 = line.count(searchterm2) - num_matches3 = line.count(searchterm3) - if num_matches2 or num_matches3: + if line.count('FORCES') or line.count('Forces'): flag=1 - if num_matches1 & flag==1: + if line.count('## ') & flag==1: #print 'line=',line found_current_line=1 already_found_one =1 @@ -54,13 +47,12 @@ def getNumAtoms(lines): ############################################## -na1=getNumAtoms(L1) -na2=getNumAtoms(L2) +na1=getNumAtoms(lines1) +na2=getNumAtoms(lines2) print( 'N atoms in file1=', na1) print( 'N atoms in file2=', na2) - ############################################## def getForces(names,coords,forces,lines,fframe): @@ -113,25 +105,16 @@ def getForces(names,coords,forces,lines,fframe): ############################################## -forces1=[] -coords1=[] -names1=[] -for i in range(0,na1): - forces1.append(0) - coords1.append(0) - names1.append(0) - -forces2=[] -coords2=[] -names2=[] -for i in range(0,na2): - forces2.append(0) - coords2.append(0) - names2.append(0) - +forces1=[0]*na1 +coords1=[0]*na1 +names1=[0]*na1 + +forces2=[0]*na2 +coords2=[0]*na2 +names2=[0]*na2 -getForces(names1,coords1,forces1,L1,frame) -getForces(names2,coords2,forces2,L2,frame) +getForces(names1,coords1,forces1,lines1,frame) +getForces(names2,coords2,forces2,lines2,frame) mindf=100. maxdf=0. @@ -142,40 +125,7 @@ def getForces(names,coords,forces,lines,fframe): imax=0 jmax=0 dff=[] -bin=[] -for i in range(0,10): - bin.append(0) - -############################################## -def subtractAverageForce(forces): - avgx=0. - avgy=0. - avgz=0. - na=len(forces) - for i in range(na): - word=string.split(forces[i]) - fx=eval(word[0]) - fy=eval(word[1]) - fz=eval(word[2]) - avgx=avgx+fx - avgy=avgy+fy - avgz=avgz+fz - - avgx=avgx/na - avgy=avgy/na - avgz=avgz/na - - for i in range(na): - word=string.split(forces[i]) - fx=eval(word[0])-avgx - fy=eval(word[1])-avgy - fz=eval(word[2])-avgz - forces[i]=str(fx)+'\t'+str(fy)+'\t'+str(fz) - -############################################## - -#subtractAverageForce(forces1) -#subtractAverageForce(forces2) +bins=[0] * 10 na=0 for i in range(na1): @@ -209,16 +159,19 @@ def subtractAverageForce(forces): mindf=df na=na+1 print (names1[i],': delta f=',df) + print ('na=',na) avg=avg/na - -print ('N atoms =', na) -print ('Avg. df=',avgx,avgy,avgz) -print ('Avg. |df|=',avg) -print ('Min. df=',mindf) -print ('Max. df=',maxdf) -print ('df max for atom ',names1[imax],' and ',names2[jmax]) -print ('Forces atoms with largest force difference:') +avgx=avgx/na +avgy=avgz/na +avgz=avgz/na + +print ('N atoms = ', na) +print ('Avg. df = ',avgx,avgy,avgz) +print ('Avg. |df| = ',avg) +print ('Min. |df| = ',mindf) +print ('Max. |df| = ',maxdf) +print ('Atoms with largest force difference:') filename1=sys.argv[1] filename1=filename1.ljust(15) filename2=sys.argv[2] @@ -230,10 +183,15 @@ def subtractAverageForce(forces): for j in range(na): a=(dff[j]-mindf)/delf b=int(a) - bin[b]=bin[b]+1 + bins[b]=bins[b]+1 for i in range(0,10): - print (mindf+(i+0.5)*delf, bin[i]) - -#for j in range(na): -# print (dff[j]) + print (mindf+(i+0.5)*delf, bins[i]) + +plt.hist(dff, bins=10, edgecolor="black") +plt.ticklabel_format(axis='x', style='sci', scilimits=(0,0)) +plt.xlabel('force error magnitude [Ha/Bohr]',fontsize=12) +plt.ylabel('frequency',fontsize=12) +plt.xticks(fontsize=12) +plt.yticks(fontsize=12) +plt.savefig('errorForces.png', dpi=100) diff --git a/util/plotConvergenceEnergy.py b/util/plotConvergenceEnergy.py index d4e93c50..b89f9a3f 100644 --- a/util/plotConvergenceEnergy.py +++ b/util/plotConvergenceEnergy.py @@ -9,35 +9,45 @@ import sys, string import matplotlib.pyplot as plt -energies=[] +conv_energy=10000. -inputfile=open(sys.argv[1],'r') -lines=inputfile.readlines() +markers=['r.--','b.--','g.--'] -flag=0 -nst=0 -conv_energy=10000. -for line in lines: - if line.count( 'Number of states'): - words=line.split() - nst=eval(words[4]) - num_matches1 = line.count('ENERGY') - num_matches2 = line.count('%%') - if num_matches1 & num_matches2: - words=line.split() - energy=eval(words[5][:-1]) - energies.append(energy) - conv_energy=energy - -deltaes=[] -for energy in energies: - deltaes.append((energy-conv_energy)/nst) - -plt.plot(deltaes,'r.--') -plt.ylabel('error Eks/orbital [Ry]') -plt.xlabel('outer iterations') -plt.axis([0.,len(deltaes),10.*deltaes[-2],deltaes[0]]) +i=0 +for filename in sys.argv[1:]: + energies=[] + + inputfile=open(filename,'r') + lines=inputfile.readlines() + + flag=0 + na=0 + for line in lines: + if line.count('Number of ions'): + words=line.split() + na=eval(words[4]) + print('na = {}'.format(na)) + if line.count('ENERGY') & line.count('%%'): + words=line.split() + energy=eval(words[5][:-1]) + energies.append(energy) + if conv_energy>energy: + conv_energy=energy + + print('Reference energy [Ha/atom] = {}'.format(conv_energy/na)) + deltaes=[] + for energy in energies: + deltaes.append((energy-conv_energy)/na) + + plt.plot(deltaes,markers[i]) + plt.axis([0.,len(deltaes),10.*deltaes[-2],deltaes[0]]) + i=i+1 + +plt.ylabel('error Eks/atom [Ha]', fontsize=12) +plt.xlabel('outer iterations', fontsize=12) plt.yscale('log') +plt.xticks(fontsize=12) +plt.yticks(fontsize=12) #plt.show() plt.savefig('errorEnergy.png', dpi=100) From 461cd7b39e5ca5e301376856b810ceee7827e2cb Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Mon, 18 Aug 2025 11:13:27 -0400 Subject: [PATCH 56/99] Link tests with Lapack --- tests/CMakeLists.txt | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ddb8d230..4e4a365d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -662,7 +662,7 @@ if(${MAGMA_FOUND}) target_link_libraries(testConditionDistMatrix PRIVATE ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX PkgConfig::MAGMA) target_link_libraries(testConditionDistMatrixPower PRIVATE - ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} + ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX PkgConfig::MAGMA) target_link_libraries(testReplicatedMatrix PRIVATE MPI::MPI_CXX OpenMP::OpenMP_CXX PkgConfig::MAGMA) @@ -698,19 +698,20 @@ if(${MAGMA_FOUND}) endif() else() target_link_libraries(testDistVector PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testReplicated2DistMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testDistMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testConditionDistMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testConditionDistMatrixPower PRIVATE - ${SCALAPACK_LIBRARIES} ${BLAS_LIBRARIES} + ${SCALAPACK_LIBRARIES} ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) - target_link_libraries(testPower PRIVATE ${BLAS_LIBRARIES} ${SCALAPACK_LIBRARIES} + target_link_libraries(testPower PRIVATE ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} + ${SCALAPACK_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) - target_link_libraries(testPowerDistMatrix PRIVATE ${BLAS_LIBRARIES} + target_link_libraries(testPowerDistMatrix PRIVATE ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES} ${SCALAPACK_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testAndersonMix PRIVATE ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} @@ -728,9 +729,9 @@ else() target_link_libraries(testMGkernels PRIVATE ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testGramMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) target_link_libraries(testDensityMatrix PRIVATE ${SCALAPACK_LIBRARIES} - ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) + ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} MPI::MPI_CXX OpenMP::OpenMP_CXX) endif() set_tests_properties(testSiH4 PROPERTIES REQUIRED_FILES From a78cdc14f344002112602605b1c2c97e9e4303ff Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Thu, 21 Aug 2025 09:20:59 -0400 Subject: [PATCH 57/99] Fix mixed-precision to use ps-projectors in double --- src/global.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/global.h b/src/global.h index a23e4e3b..54353f24 100644 --- a/src/global.h +++ b/src/global.h @@ -23,19 +23,16 @@ typedef double ORBDTYPE; /* lmasktype sets the data type for the mask coeffs */ typedef ORBDTYPE lmasktype; -// typedef float lmasktype; typedef double RHODTYPE; -// typedef float RHODTYPE; typedef double MATDTYPE; typedef float MGPRECONDTYPE; typedef double POTDTYPE; -// typedef float POTDTYPE; -typedef ORBDTYPE KBPROJDTYPE; +typedef double KBPROJDTYPE; typedef float POISSONPRECONDTYPE; From 7947675347f7ec8e745c8fed313f2404d26c1ef4 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 21 Aug 2025 08:17:55 -0400 Subject: [PATCH 58/99] Template axpy functions to better use BLAS1 --- src/ABPG.cc | 2 +- src/AndersonMix.cc | 8 ++++---- src/BlockVector.h | 3 ++- src/DavidsonSolver.cc | 2 +- src/ExtendedGridOrbitals.cc | 20 +++++++++++------- src/ExtendedGridOrbitals.h | 8 +++++--- src/GrassmanCG.cc | 9 +++------ src/Hamiltonian.cc | 2 +- src/MGmol.cc | 4 +++- src/OrbitalsPreconditioning.cc | 2 +- src/computeHij.cc | 6 +++--- src/linear_algebra/mputils.cc | 37 +++++++++++++++++++++++++++------- src/linear_algebra/mputils.h | 6 ++---- src/pb/GridFunc.cc | 2 +- src/pb/GridFunc.h | 2 +- src/pb/GridFuncVector.cc | 16 ++++++++++----- src/pb/GridFuncVector.h | 4 +++- 17 files changed, 85 insertions(+), 48 deletions(-) diff --git a/src/ABPG.cc b/src/ABPG.cc index 107edc63..de501c44 100644 --- a/src/ABPG.cc +++ b/src/ABPG.cc @@ -127,7 +127,7 @@ void ABPG::update_states(T& orbitals, T& res, T& work_orbitals, else { // Preconditioned Power Method - orbitals.axpy(alpha, res); + orbitals.axpy((ORBDTYPE)alpha, res); if (ct.getOrthoType() == OrthoType::Orthonormal) orbitals.orthonormalizeLoewdin(false); diff --git a/src/AndersonMix.cc b/src/AndersonMix.cc index 8311c69b..7edae7d3 100644 --- a/src/AndersonMix.cc +++ b/src/AndersonMix.cc @@ -261,7 +261,7 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) for (int j = 0; j < mm_; j++) { - x_.axpy(theta_[j], *xi_[j]); + x_.axpy((ORBDTYPE)theta_[j], *xi_[j]); } // update xi_ for next step // restart @@ -288,7 +288,7 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) for (int j = 0; j < mm_; j++) { - f.axpy(theta_[j], *fi_[j]); + f.axpy((ORBDTYPE)theta_[j], *fi_[j]); } // update fi_ for next step @@ -309,9 +309,9 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) #endif // update x_ if (mm_ > 0) - x_.axpy(beta_, f); + x_.axpy((ORBDTYPE)beta_, f); else - x_.axpy(1., f); + x_.axpy((ORBDTYPE)1., f); postprocessUpdate(); diff --git a/src/BlockVector.h b/src/BlockVector.h index c9627466..47cbef2f 100644 --- a/src/BlockVector.h +++ b/src/BlockVector.h @@ -109,7 +109,8 @@ class BlockVector deallocate_storage(); } - void axpy(const double alpha, const BlockVector& bv) + template + void axpy(const ScalarType2 alpha, const BlockVector& bv) { assert(storage_ != nullptr); assert(bv.storage_ != nullptr); diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index c34b6396..b5557f7e 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -710,7 +710,7 @@ int DavidsonSolver::solve( // eigenvalues of DM orbitals.multiply_by_matrix(dm12); work_orbitals.multiply_by_matrix(dm22); - orbitals.axpy(1., work_orbitals); + orbitals.axpy((ORBDTYPE)1., work_orbitals); orbitals.incrementIterativeIndex(); orbitals.incrementIterativeIndex(); work_orbitals.incrementIterativeIndex(2); diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 91862141..21ec5a7e 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -199,8 +199,9 @@ void ExtendedGridOrbitals::assign(const ExtendedGridOrbitals& orbitals) assign_tm_.stop(); } +template void ExtendedGridOrbitals::axpy( - const double alpha, const ExtendedGridOrbitals& orbitals) + const CoeffType alpha, const ExtendedGridOrbitals& orbitals) { axpy_tm_.start(); @@ -1519,10 +1520,9 @@ void ExtendedGridOrbitals::normalize() // modify argument orbitals, by projecting out its component // along ExtendedGridOrbitals -void ExtendedGridOrbitals::projectOut( - ExtendedGridOrbitals& orbitals, const double scale) +void ExtendedGridOrbitals::projectOut(ExtendedGridOrbitals& orbitals) { - projectOut(orbitals.psi(0), lda_, scale); + projectOut(orbitals.psi(0), lda_); #if 0 // test if projection is now 0 @@ -1535,8 +1535,7 @@ void ExtendedGridOrbitals::projectOut( orbitals.incrementIterativeIndex(); } -void ExtendedGridOrbitals::projectOut( - ORBDTYPE* const array, const int lda, const double scale) +void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) { assert(lda > 1); assert(loc_numpt_ > 0); @@ -1584,7 +1583,7 @@ void ExtendedGridOrbitals::projectOut( MemorySpace::Memory::copy_view_to_host( parray, parray_size, parray_host_view); - double minus = -1. * scale; + ORBDTYPE minus = -1.; for (int j = 0; j < numst_; j++) LinearAlgebraUtils::MPaxpy(loc_numpt_, minus, tproduct + j * loc_numpt_, parray_host_view + j * lda); @@ -1863,6 +1862,13 @@ void ExtendedGridOrbitals::initWF( #endif } +template void ExtendedGridOrbitals::axpy( + const double alpha, const ExtendedGridOrbitals&); +#ifdef MGMOL_USE_MIXEDP +template void ExtendedGridOrbitals::axpy( + const float alpha, const ExtendedGridOrbitals&); +#endif + template void ExtendedGridOrbitals::setDataWithGhosts( pb::GridFuncVector* data_wghosts); template void ExtendedGridOrbitals::setDataWithGhosts( diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index c1a718d0..1a80d770 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -86,7 +86,7 @@ class ExtendedGridOrbitals : public Orbitals // // private functions // - void projectOut(ORBDTYPE* const, const int, const double scale = 1.); + void projectOut(ORBDTYPE* const, const int); void multiply_by_matrix( const DISTMATDTYPE* const, ORBDTYPE*, const int) const; @@ -334,7 +334,7 @@ class ExtendedGridOrbitals : public Orbitals block_vector_.scal(alpha); incrementIterativeIndex(); } - void projectOut(ExtendedGridOrbitals&, const double scale = 1.); + void projectOut(ExtendedGridOrbitals&); void normalize(); void orthonormalize2states(const int st1, const int st2); @@ -350,7 +350,9 @@ class ExtendedGridOrbitals : public Orbitals } void initGauss(const double, const std::shared_ptr); - virtual void axpy(const double alpha, const ExtendedGridOrbitals&); + + template + void axpy(const CoeffType alpha, const ExtendedGridOrbitals&); void app_mask(const int, pb::GridFunc&, const short) const {}; diff --git a/src/GrassmanCG.cc b/src/GrassmanCG.cc index d963bb36..3d74b9a3 100644 --- a/src/GrassmanCG.cc +++ b/src/GrassmanCG.cc @@ -237,9 +237,6 @@ void GrassmanCG::parallelTransportUpdate(const double /*lambda*/, T& phi) { Control& ct = *(Control::instance()); - // const double fact = lambda; - const double fact = 1.; - // update history data T* gradptr; // update gradient information @@ -255,13 +252,13 @@ void GrassmanCG::parallelTransportUpdate(const double /*lambda*/, T& phi) if (ct.parallel_transport) { // compute G_old = G - lambda*(Phi*S^{-1}*Phi^T*G).*corrmasks - phi.projectOut(*GrassmanLineMinimization::grad_, fact); + phi.projectOut(*GrassmanLineMinimization::grad_); GrassmanLineMinimization::grad_->applyCorrMask(true); // compute MG_old = MG - lambda*(Phi*S^{-1}*Phi^T*MG).*masks - phi.projectOut(*GrassmanLineMinimization::pcgrad_, fact); + phi.projectOut(*GrassmanLineMinimization::pcgrad_); GrassmanLineMinimization::pcgrad_->applyMask(true); // update preconditioned search direction information - phi.projectOut(*GrassmanLineMinimization::sdir_, fact); + phi.projectOut(*GrassmanLineMinimization::sdir_); GrassmanLineMinimization::sdir_->applyMask(true); } diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index 5a5d2a53..0bd773eb 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -127,7 +127,7 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) // gfvw1 = -Lap*phi gfv_phi->applyLap(0, gfvw1); // gfv_work1 = -Lap*phi + B*V*psi - gfv_work1.axpy(1., gfvw1); + gfv_work1.axpy((ORBDTYPE)1., gfvw1); // set hpsi data without ghosts hphi.setPsi(gfv_work1); } diff --git a/src/MGmol.cc b/src/MGmol.cc index d895aabc..8afa1331 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -95,6 +95,7 @@ extern Timer mpsyrk_tm; extern Timer tttsyrk_tm; extern Timer mpdot_tm; extern Timer ttdot_tm; +extern Timer loopaxpy_tm; extern Timer get_NOLMO_tm; extern Timer get_MLWF_tm; extern Timer md_iterations_tm; @@ -871,6 +872,7 @@ void MGmol::printTimers() tttsyrk_tm.print(os_); mpdot_tm.print(os_); ttdot_tm.print(os_); + loopaxpy_tm.print(os_); dist_matrix::SubMatrices::printTimers(os_); @@ -1266,7 +1268,7 @@ void MGmol::computeResidualUsingHPhi(OrbitalsType& psi, } // res = (B*phi*theta - H*phi) in [Ry] - res.axpy(-1., hphi); + res.axpy((ORBDTYPE)(-1.), hphi); } get_res_tm_.stop(); diff --git a/src/OrbitalsPreconditioning.cc b/src/OrbitalsPreconditioning.cc index 82754f60..3a5061df 100644 --- a/src/OrbitalsPreconditioning.cc +++ b/src/OrbitalsPreconditioning.cc @@ -101,7 +101,7 @@ void OrbitalsPreconditioning::precond_mg(T& orbitals) // store residual in GridFuncVector container // used for ghost values (no ghost values needed) orbitals.setDataWithGhosts(gfv_work2_); - gfv_work_->axpy(gamma_, *gfv_work2_); + gfv_work_->axpy((MGPRECONDTYPE)gamma_, *gfv_work2_); // block-implemented preconditioner precond_->mg(*gfv_work_, *gfv_work2_, lap_type_, 0); diff --git a/src/computeHij.cc b/src/computeHij.cc index 75b8ef86..50d26d90 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -334,8 +334,8 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, memory_space_type>::copy_view_to_host(hpsi, numpt, hpsi_host_view); - LinearAlgebraUtils::MPaxpy( - numpt, 1., work.data() + numpt * icolor, hpsi_host_view); + LinearAlgebraUtils::MPaxpy(numpt, + (ORBDTYPE)1., work.data() + numpt * icolor, hpsi_host_view); MemorySpace::Memory::copy_view_to_dev(hpsi_host_view, numpt, @@ -360,7 +360,7 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, hpsi_host_view); LinearAlgebraUtils::MPaxpy( - numpt, 1., hnl, hpsi_host_view); + numpt, (ORBDTYPE)1., hnl, hpsi_host_view); MemorySpace::Memory::copy_view_to_dev(hpsi_host_view, diff --git a/src/linear_algebra/mputils.cc b/src/linear_algebra/mputils.cc index 487d1367..53daee46 100644 --- a/src/linear_algebra/mputils.cc +++ b/src/linear_algebra/mputils.cc @@ -40,6 +40,7 @@ Timer ssyrk_tm("ssyrk"); Timer mpdot_tm("mpdot"); Timer ttdot_tm("ttdot"); +Timer loopaxpy_tm("loopaxpy"); /* Function definitions. See mputils.h for comments */ @@ -227,6 +228,7 @@ double LAU_D::MPdot( /////////////////////////////// // MemorySpace::Host template <> +template <> void LAU_H::MPaxpy(const int len, double scal, const double* __restrict__ xptr, double* __restrict__ yptr) { @@ -238,17 +240,33 @@ void LAU_H::MPaxpy(const int len, double scal, const double* __restrict__ xptr, } template <> -template -void LAU_H::MPaxpy(const int len, double scal, const T1* __restrict__ xptr, - T2* __restrict__ yptr) +template <> +void LAU_H::MPaxpy(const int len, float scal, const float* __restrict__ xptr, + float* __restrict__ yptr) { MemorySpace::assert_is_host_ptr(xptr); MemorySpace::assert_is_host_ptr(yptr); + + const int one = 1; + SAXPY(&len, &scal, xptr, &one, yptr, &one); +} + +template <> +template +void LAU_H::MPaxpy( + const int len, T0 scal, const T1* __restrict__ xptr, T2* __restrict__ yptr) +{ + loopaxpy_tm.start(); + + MemorySpace::assert_is_host_ptr(xptr); + MemorySpace::assert_is_host_ptr(yptr); #pragma omp parallel for simd for (int k = 0; k < len; k++) { yptr[k] += static_cast(scal * static_cast(xptr[k])); } + + loopaxpy_tm.stop(); } // MemorySpace::Device @@ -845,10 +863,15 @@ template double LAU_H::MPdot( const int len, const double* const xptr, const float* const yptr); template double LAU_H::MPdot( const int len, const float* const xptr, const double* const yptr); -template void LAU_H::MPaxpy(const int len, const double scal, - const float* __restrict__ xptr, double* __restrict__ yptr); -template void LAU_H::MPaxpy(const int len, const double scal, - const float* __restrict__ xptr, float* __restrict__ yptr); +template void LAU_H::MPaxpy(const int len, + const double scal, const float* __restrict__ xptr, + double* __restrict__ yptr); +template void LAU_H::MPaxpy(const int len, + const float scal, const float* __restrict__ xptr, + double* __restrict__ yptr); +template void LAU_H::MPaxpy(const int len, + const double scal, const float* __restrict__ xptr, + float* __restrict__ yptr); template void LAU_H::MPsyrk(const char uplo, const char trans, const int n, const int k, const double alpha, const double* const a, diff --git a/src/linear_algebra/mputils.h b/src/linear_algebra/mputils.h index fa838f17..d3f65afe 100644 --- a/src/linear_algebra/mputils.h +++ b/src/linear_algebra/mputils.h @@ -119,10 +119,8 @@ struct LinearAlgebraUtils /* mixed-precision vector times scalar plus vector. Accumulates results * in double precision and stores in single precision. */ - static void MPaxpy(const int len, double scal, - const double* __restrict__ xptr, double* __restrict__ yptr); - template - static void MPaxpy(const int len, double scal, const T1* __restrict__ xptr, + template + static void MPaxpy(const int len, T0 scal, const T1* __restrict__ xptr, T2* __restrict__ yptr); static void MPsyrk(const char uplo, const char trans, const int n, diff --git a/src/pb/GridFunc.cc b/src/pb/GridFunc.cc index c8bcf1df..f883d0d7 100644 --- a/src/pb/GridFunc.cc +++ b/src/pb/GridFunc.cc @@ -521,7 +521,7 @@ void GridFunc::scal(const double alpha) } template -void GridFunc::axpy(const double alpha, const GridFunc& vv) +void GridFunc::axpy(const T alpha, const GridFunc& vv) { assert(vv.grid_.sizeg() == grid_.sizeg()); diff --git a/src/pb/GridFunc.h b/src/pb/GridFunc.h index 699ed294..ab72c86d 100644 --- a/src/pb/GridFunc.h +++ b/src/pb/GridFunc.h @@ -192,7 +192,7 @@ class GridFunc : public GridFuncInterface GridFunc& operator/=(const GridFunc& B); - void axpy(const double alpha, const GridFunc& vv); + void axpy(const T alpha, const GridFunc& vv); void scal(const double alpha); void prod(const GridFunc& A, const GridFunc& B); void diff(const GridFunc& A, const GridFunc& B); diff --git a/src/pb/GridFuncVector.cc b/src/pb/GridFuncVector.cc index 64ffd867..7d9e375e 100644 --- a/src/pb/GridFuncVector.cc +++ b/src/pb/GridFuncVector.cc @@ -1649,8 +1649,8 @@ GridFuncVector::operator-=( assert(func.grid_.ghost_pt() == grid_.ghost_pt()); assert(this != &func); - LinearAlgebraUtils::MPaxpy( - nfunc_ * grid_.sizeg(), -1., func.memory_.get(), memory_.get()); + LinearAlgebraUtils::MPaxpy(nfunc_ * grid_.sizeg(), + (ScalarType)(-1.), func.memory_.get(), memory_.get()); updated_boundaries_ = (func.updated_boundaries_ && updated_boundaries_); @@ -1658,8 +1658,9 @@ GridFuncVector::operator-=( } template -void GridFuncVector::axpy( - const double alpha, const GridFuncVector& func) +template +void GridFuncVector::axpy(const ScalarType2 alpha, + const GridFuncVector& func) { LinearAlgebraUtils::MPaxpy( nfunc_ * grid_.sizeg(), alpha, func.memory_.get(), memory_.get()); @@ -2419,7 +2420,7 @@ void GridFuncVector::jacobi(const int type, { applyLap(type, w); w -= B; - axpy(-1. * jacobiFactor, w); + axpy((ScalarType)(-1. * jacobiFactor), w); set_updated_boundaries(false); } @@ -2459,6 +2460,11 @@ template void GridFuncVector::pointwiseProduct( GridFuncVector& A, const GridFunc& B); template void GridFuncVector::pointwiseProduct( GridFuncVector& A, const GridFunc& B); + +template void GridFuncVector::axpy( + const float alpha, const GridFuncVector& func); +template void GridFuncVector::axpy( + const double alpha, const GridFuncVector& func); #ifdef HAVE_MAGMA template class GridFuncVector; template class GridFuncVector; diff --git a/src/pb/GridFuncVector.h b/src/pb/GridFuncVector.h index 4798adea..ed69b3b0 100644 --- a/src/pb/GridFuncVector.h +++ b/src/pb/GridFuncVector.h @@ -462,7 +462,9 @@ class GridFuncVector void set_updated_boundaries(const bool flag) { updated_boundaries_ = flag; } GridFuncVector& operator-=( const GridFuncVector& func); - void axpy(const double alpha, + + template + void axpy(const ScalarType2 alpha, const GridFuncVector& func); template From 2bb915c87cc4daf16af502d6a11534c05288a0f3 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 21 Aug 2025 21:58:36 -0400 Subject: [PATCH 59/99] Replace Li PBE potential with ONCV --- potentials/pseudo.Li_ONCV_PBE_SG15 | 1915 ++++++++++++++++++++++++++++ potentials/pseudo.Li_pbe | 1248 ------------------ 2 files changed, 1915 insertions(+), 1248 deletions(-) create mode 100644 potentials/pseudo.Li_ONCV_PBE_SG15 delete mode 100644 potentials/pseudo.Li_pbe diff --git a/potentials/pseudo.Li_ONCV_PBE_SG15 b/potentials/pseudo.Li_ONCV_PBE_SG15 new file mode 100644 index 00000000..4713d8a0 --- /dev/null +++ b/potentials/pseudo.Li_ONCV_PBE_SG15 @@ -0,0 +1,1915 @@ +# This pseudopotential file has been produced using the code +# ONCVPSP (Optimized Norm-Conservinng Vanderbilt PSeudopotential) +# scalar-relativistic version 2.1.1, 03/26/2014 by D. R. Hamann +# The code is available through a link at URL www.mat-simresearch.com. +# Documentation with the package provides a full discription of the +# input data below. +# +# While it is not required under the terms of the GNU GPL, it is +# suggested that you cite D. R. Hamann, Phys. Rev. B 88, 085117 (2013) +# in any publication using these pseudopotentials. +# +# Copyright 2015 The Regents of the University of California +# +# This work is licensed under the Creative Commons Attribution-ShareAlike +# 4.0 International License. To view a copy of this license, visit +# http://creativecommons.org/licenses/by-sa/4.0/ or send a letter to +# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. +# +# This pseudopotential is part of the Schlipf-Gygi norm-conserving +# pseudopotential library. Its construction parameters were tuned to +# reproduce materials of a training set with very high accuracy and +# should be suitable as a general purpose pseudopotential to treat a +# variety of different compounds. For details of the construction and +# testing of the pseudopotential please refer to: +# +# M. Schlipf, F. Gygi, Comp. Phys. Comm. 196, 36 (2015) +# http://dx.doi.org/10.1016/j.cpc.2015.05.011 +# +# We kindly ask that you include this reference in all publications +# associated to this pseudopotential. +# +# +# Input file for PP generation: +# +# # ATOM AND REFERENCE CONFIGURATION +# # atsym z nc nv iexc psfile +# Li 3.00 0 2 4 fpmd +# # +# # n l f energy (Ha) +# 1 0 2.00 +# 2 0 1.00 +# # +# # PSEUDOPOTENTIAL AND OPTIMIZATION +# # lmax +# 1 +# # +# # l, rc, ep, ncon, nbas, qcut +# 0 1.74553 -1.90562 5 8 7.75888 +# 1 1.40824 0.11292 5 8 11.11410 +# # +# # LOCAL POTENTIAL +# # lloc, lpopt, rc(5), dvloc0 +# 4 5 0.89499 0.00000 +# # +# # VANDERBILT-KLEINMAN-BYLANDER PROJECTORs +# # l, nproj, debl +# 0 2 1.80000 +# 1 2 1.03347 +# # +# # MODEL CORE CHARGE +# # icmod, fcfact +# 0 0.00000 +# # +# # LOG DERIVATIVE ANALYSIS +# # epsh1, epsh2, depsh +# -5.00 3.00 0.02 +# # +# # OUTPUT GRID +# # rlmax, drl +# 6.00 0.01 +# # +# # TEST CONFIGURATIONS +# # ncnf +# 0 +# # nvcnf +# # n l f +# +Li_ONCV_PBE-1 +# +Silver +#radii of balls and covalent bonds +1.0 2.7 +# Nlcc flag +0 +# Atomic number +3 +# Atomic mass +6.94000006 +# Number of valence electrons +3 +#Gaussian core charge parameter rc +1.0 +# Number of potentials +3 +# l-value for state which is local, then type of potential format +2 3 +# Local potential radius +3.2 +# Non-local potential radius +3.2 +# number of points in radial grid +602 +# VANDERBILT-KLEINMAN-BYLANDER PROJECTORs +# l, nproj +0 2 -0.4707813485E+01 -0.2191592421E+01 +1 2 -0.1677821370E+01 -0.1382610420E+00 +# l= 0 +0.0 -7.7593005895 -1.0192109936 +0.01 -7.7568112882 -1.0194882249 +0.02 -7.7493621217 -1.0202971423 +0.03 -7.7369338327 -1.0216609501 +0.04 -7.7195071639 -1.0236028526 +0.05 -7.6970556726 -1.0261549101 +0.06 -7.6695462423 -1.0293576177 +0.07 -7.6369397322 -1.0332593707 +0.08 -7.5991917591 -1.037915821 +0.09 -7.5562536037 -1.0433891309 +0.1 -7.508073233 -1.0497471309 +0.11 -7.4545964294 -1.0570623897 +0.12 -7.3957680159 -1.0654112041 +0.13 -7.3315331656 -1.0748725194 +0.14 -7.2618387832 -1.0855267901 +0.15 -7.186634945 -1.0974547908 +0.16 -7.1058763841 -1.1107363896 +0.17 -7.0195240059 -1.1254492955 +0.18 -6.9275464193 -1.141667791 +0.19 -6.8299214694 -1.1594614629 +0.2 -6.7266377513 -1.1788939458 +0.21 -6.6176961029 -1.200021683 +0.22 -6.5031110416 -1.2228927294 +0.23 -6.3829121491 -1.2475455963 +0.24 -6.2571453715 -1.2740081622 +0.25 -6.1258742523 -1.3022966399 +0.26 -5.9891810223 -1.332414651 +0.27 -5.8471676284 -1.3643523576 +0.28 -5.6999565818 -1.3980857296 +0.29 -5.5476917005 -1.4335759024 +0.3 -5.3905386942 -1.4707686566 +0.31 -5.2286855923 -1.5095940253 +0.32 -5.0623429859 -1.5499660461 +0.33 -4.8917441534 -1.5917826115 +0.34 -4.7171449574 -1.6349254961 +0.35 -4.5388235441 -1.6792605403 +0.36 -4.3570799614 -1.7246379061 +0.37 -4.172235486 -1.7708925549 +0.38 -3.9846317723 -1.8178448665 +0.39 -3.7946299563 -1.8653013044 +0.4 -3.6026094194 -1.9130553363 +0.41 -3.4089664609 -1.9608884314 +0.42 -3.214112807 -2.0085711862 +0.43 -3.0184739587 -2.055864575 +0.44 -2.8224873949 -2.1025213123 +0.45 -2.626600643 -2.1482873175 +0.46 -2.4312692288 -2.1929032714 +0.47 -2.2369545193 -2.2361062538 +0.48 -2.0441214814 -2.2776314393 +0.49 -1.8532363685 -2.3172138591 +0.5 -1.6647643041 -2.3545902195 +0.51 -1.4791669005 -2.3895006957 +0.52 -1.2968998099 -2.4216907774 +0.53 -1.118410149 -2.450913204 +0.54 -0.94413434687 -2.4769295482 +0.55 -0.77449530271 -2.4995123575 +0.56 -0.60990047503 -2.5184465315 +0.57 -0.45073918141 -2.5335313282 +0.58 -0.29738077311 -2.5445816652 +0.59 -0.15017199575 -2.5514300525 +0.6 -0.0094358826149 -2.5539273235 +0.61 0.12453146732 -2.5519449868 +0.62 0.25146099181 -2.5453751729 +0.63 0.37111352658 -2.5341326766 +0.64 0.48328079855 -2.5181555598 +0.65 0.58778581947 -2.4974053018 +0.66 0.68448513053 -2.4718683148 +0.67 0.77326913801 -2.4415560143 +0.68 0.85406228696 -2.4065047639 +0.69 0.9268231688 -2.3667757677 +0.7 0.99154622952 -2.3224560865 +0.71 1.0482603331 -2.2736573657 +0.72 1.0970287085 -2.2205155535 +0.73 1.1379485353 -2.1631903396 +0.74 1.1711500525 -2.1018642456 +0.75 1.1967959926 -2.036741917 +0.76 1.2150805992 -1.9680490979 +0.77 1.2262277331 -1.8960309893 +0.78 1.2304896364 -1.820951035 +0.79 1.228145274 -1.7430894102 +0.8 1.2194985196 -1.6627413965 +0.81 1.2048762016 -1.5802156579 +0.82 1.184626025 -1.4958324308 +0.83 1.1591143867 -1.4099216449 +0.84 1.1287241012 -1.3228209881 +0.85 1.0938520569 -1.2348739334 +0.86 1.05490682 -1.1464277436 +0.87 1.0123062069 -1.0578314692 +0.88 0.96647484347 -0.96943395791 +0.89 0.91784174161 -0.88158190682 +0.9 0.86683791898 -0.79461798042 +0.91 0.81389319634 -0.70887759925 +0.92 0.7594323121 -0.62468477328 +0.93 0.70387167633 -0.54234914361 +0.94 0.64761702996 -0.4621644887 +0.95 0.59106122966 -0.38440732114 +0.96 0.53458281949 -0.30933630656 +0.97 0.4785423228 -0.23718904247 +0.98 0.42328195918 -0.16818279962 +0.99 0.36912362332 -0.10251308529 +1.0 0.31636746756 -0.040352882291 +1.01 0.26529068328 0.018147948585 +1.02 0.21614684997 0.072862757827 +1.03 0.16916539076 0.12368816752 +1.04 0.12454771649 0.17054715475 +1.05 0.082470373831 0.2133856147 +1.06 0.043083689239 0.25217324203 +1.07 0.0065120756055 0.28690287693 +1.08 -0.02714463863 0.31758903479 +1.09 -0.057816978622 0.34427244982 +1.1 -0.085458809175 0.36701361535 +1.11 -0.11004858788 0.3858935715 +1.12 -0.13158628508 0.40101111338 +1.13 -0.1500994279 0.41248727342 +1.14 -0.16563634513 0.42045937767 +1.15 -0.178265114 0.4250799321 +1.16 -0.18807046663 0.42651410161 +1.17 -0.19516078332 0.42494426276 +1.18 -0.19965807561 0.42056210722 +1.19 -0.20169758712 0.41356821114 +1.2 -0.20142872217 0.40417225404 +1.21 -0.19901390278 0.39259116996 +1.22 -0.19462294096 0.37904535273 +1.23 -0.18843296654 0.36375871026 +1.24 -0.18062956949 0.3469570493 +1.25 -0.17140042562 0.32886496448 +1.26 -0.16093611844 0.30970691787 +1.27 -0.14942835147 0.28970204679 +1.28 -0.13706676851 0.2690647395 +1.29 -0.12404067387 0.24800651356 +1.3 -0.11053235807 0.22672597944 +1.31 -0.096719595481 0.20541541404 +1.32 -0.082775967686 0.18426029179 +1.33 -0.068861416613 0.16342787686 +1.34 -0.0551310275 0.14307944398 +1.35 -0.04172908387 0.12336253621 +1.36 -0.028783936218 0.10440594115 +1.37 -0.016420371472 0.086334177332 +1.38 -0.0047416778062 0.069247108056 +1.39 0.0061586502908 0.053233909192 +1.4 0.016196242387 0.038373444503 +1.41 0.025311653016 0.024717655508 +1.42 0.033446675253 0.012317164084 +1.43 0.040568099018 0.0011960148746 +1.44 0.046652083258 -0.0086313620653 +1.45 0.051685603931 -0.017161923985 +1.46 0.055679271064 -0.024416714472 +1.47 0.058641496585 -0.030414623402 +1.48 0.060610312422 -0.035204332823 +1.49 0.061621969933 -0.038832637114 +1.5 0.061731691095 -0.04136515654 +1.51 0.06100215529 -0.042874613882 +1.52 0.059503512477 -0.043441079158 +1.53 0.057318988046 -0.043156859831 +1.54 0.054531363789 -0.042113991585 +1.55 0.051236871402 -0.040416609638 +1.56 0.047528779041 -0.038166294726 +1.57 0.043507889336 -0.035470691094 +1.58 0.039273079584 -0.032435823961 +1.59 0.034923278225 -0.029166627307 +1.6 0.030555075345 -0.025765776124 +1.61 0.026260024876 -0.022330178375 +1.62 0.022123590707 -0.018950671038 +1.63 0.018223882204 -0.015710358314 +1.64 0.014626616423 -0.012680135019 +1.65 0.011389900708 -0.0099231517255 +1.66 0.0085521986237 -0.0074834592058 +1.67 0.0061449383254 -0.0053980868807 +1.68 0.0041740658843 -0.0036795505535 +1.69 0.0026383564748 -0.0023333398664 +1.7 0.0015094773016 -0.0013390589523 +1.71 0.00074981081503 -0.00066738518164 +1.72 0.00029951000811 -0.00026781274301 +1.73 7.9879263209e-05 -7.2256702049e-05 +1.74 1.5661696141e-05 -1.487504535e-05 +1.75 6.2099461977e-06 -6.0427619678e-06 +1.76 -2.071124021e-06 1.6637105816e-06 +1.77 -7.5368369057e-07 6.9971701676e-07 +1.78 -9.3400344889e-07 8.671251814e-07 +1.79 0.0 0.0 +1.8 0.0 0.0 +1.81 0.0 0.0 +1.82 0.0 0.0 +1.83 0.0 0.0 +1.84 0.0 0.0 +1.85 0.0 0.0 +1.86 0.0 0.0 +1.87 0.0 0.0 +1.88 0.0 0.0 +1.89 0.0 0.0 +1.9 0.0 0.0 +1.91 0.0 0.0 +1.92 0.0 0.0 +1.93 0.0 0.0 +1.94 0.0 0.0 +1.95 0.0 0.0 +1.96 0.0 0.0 +1.97 0.0 0.0 +1.98 0.0 0.0 +1.99 0.0 0.0 +2.0 0.0 0.0 +2.01 0.0 0.0 +2.02 0.0 0.0 +2.03 0.0 0.0 +2.04 0.0 0.0 +2.05 0.0 0.0 +2.06 0.0 0.0 +2.07 0.0 0.0 +2.08 0.0 0.0 +2.09 0.0 0.0 +2.1 0.0 0.0 +2.11 0.0 0.0 +2.12 0.0 0.0 +2.13 0.0 0.0 +2.14 0.0 0.0 +2.15 0.0 0.0 +2.16 0.0 0.0 +2.17 0.0 0.0 +2.18 0.0 0.0 +2.19 0.0 0.0 +2.2 0.0 0.0 +2.21 0.0 0.0 +2.22 0.0 0.0 +2.23 0.0 0.0 +2.24 0.0 0.0 +2.25 0.0 0.0 +2.26 0.0 0.0 +2.27 0.0 0.0 +2.28 0.0 0.0 +2.29 0.0 0.0 +2.3 0.0 0.0 +2.31 0.0 0.0 +2.32 0.0 0.0 +2.33 0.0 0.0 +2.34 0.0 0.0 +2.35 0.0 0.0 +2.36 0.0 0.0 +2.37 0.0 0.0 +2.38 0.0 0.0 +2.39 0.0 0.0 +2.4 0.0 0.0 +2.41 0.0 0.0 +2.42 0.0 0.0 +2.43 0.0 0.0 +2.44 0.0 0.0 +2.45 0.0 0.0 +2.46 0.0 0.0 +2.47 0.0 0.0 +2.48 0.0 0.0 +2.49 0.0 0.0 +2.5 0.0 0.0 +2.51 0.0 0.0 +2.52 0.0 0.0 +2.53 0.0 0.0 +2.54 0.0 0.0 +2.55 0.0 0.0 +2.56 0.0 0.0 +2.57 0.0 0.0 +2.58 0.0 0.0 +2.59 0.0 0.0 +2.6 0.0 0.0 +2.61 0.0 0.0 +2.62 0.0 0.0 +2.63 0.0 0.0 +2.64 0.0 0.0 +2.65 0.0 0.0 +2.66 0.0 0.0 +2.67 0.0 0.0 +2.68 0.0 0.0 +2.69 0.0 0.0 +2.7 0.0 0.0 +2.71 0.0 0.0 +2.72 0.0 0.0 +2.73 0.0 0.0 +2.74 0.0 0.0 +2.75 0.0 0.0 +2.76 0.0 0.0 +2.77 0.0 0.0 +2.78 0.0 0.0 +2.79 0.0 0.0 +2.8 0.0 0.0 +2.81 0.0 0.0 +2.82 0.0 0.0 +2.83 0.0 0.0 +2.84 0.0 0.0 +2.85 0.0 0.0 +2.86 0.0 0.0 +2.87 0.0 0.0 +2.88 0.0 0.0 +2.89 0.0 0.0 +2.9 0.0 0.0 +2.91 0.0 0.0 +2.92 0.0 0.0 +2.93 0.0 0.0 +2.94 0.0 0.0 +2.95 0.0 0.0 +2.96 0.0 0.0 +2.97 0.0 0.0 +2.98 0.0 0.0 +2.99 0.0 0.0 +3.0 0.0 0.0 +3.01 0.0 0.0 +3.02 0.0 0.0 +3.03 0.0 0.0 +3.04 0.0 0.0 +3.05 0.0 0.0 +3.06 0.0 0.0 +3.07 0.0 0.0 +3.08 0.0 0.0 +3.09 0.0 0.0 +3.1 0.0 0.0 +3.11 0.0 0.0 +3.12 0.0 0.0 +3.13 0.0 0.0 +3.14 0.0 0.0 +3.15 0.0 0.0 +3.16 0.0 0.0 +3.17 0.0 0.0 +3.18 0.0 0.0 +3.19 0.0 0.0 +3.2 0.0 0.0 +3.21 0.0 0.0 +3.22 0.0 0.0 +3.23 0.0 0.0 +3.24 0.0 0.0 +3.25 0.0 0.0 +3.26 0.0 0.0 +3.27 0.0 0.0 +3.28 0.0 0.0 +3.29 0.0 0.0 +3.3 0.0 0.0 +3.31 0.0 0.0 +3.32 0.0 0.0 +3.33 0.0 0.0 +3.34 0.0 0.0 +3.35 0.0 0.0 +3.36 0.0 0.0 +3.37 0.0 0.0 +3.38 0.0 0.0 +3.39 0.0 0.0 +3.4 0.0 0.0 +3.41 0.0 0.0 +3.42 0.0 0.0 +3.43 0.0 0.0 +3.44 0.0 0.0 +3.45 0.0 0.0 +3.46 0.0 0.0 +3.47 0.0 0.0 +3.48 0.0 0.0 +3.49 0.0 0.0 +3.5 0.0 0.0 +3.51 0.0 0.0 +3.52 0.0 0.0 +3.53 0.0 0.0 +3.54 0.0 0.0 +3.55 0.0 0.0 +3.56 0.0 0.0 +3.57 0.0 0.0 +3.58 0.0 0.0 +3.59 0.0 0.0 +3.6 0.0 0.0 +3.61 0.0 0.0 +3.62 0.0 0.0 +3.63 0.0 0.0 +3.64 0.0 0.0 +3.65 0.0 0.0 +3.66 0.0 0.0 +3.67 0.0 0.0 +3.68 0.0 0.0 +3.69 0.0 0.0 +3.7 0.0 0.0 +3.71 0.0 0.0 +3.72 0.0 0.0 +3.73 0.0 0.0 +3.74 0.0 0.0 +3.75 0.0 0.0 +3.76 0.0 0.0 +3.77 0.0 0.0 +3.78 0.0 0.0 +3.79 0.0 0.0 +3.8 0.0 0.0 +3.81 0.0 0.0 +3.82 0.0 0.0 +3.83 0.0 0.0 +3.84 0.0 0.0 +3.85 0.0 0.0 +3.86 0.0 0.0 +3.87 0.0 0.0 +3.88 0.0 0.0 +3.89 0.0 0.0 +3.9 0.0 0.0 +3.91 0.0 0.0 +3.92 0.0 0.0 +3.93 0.0 0.0 +3.94 0.0 0.0 +3.95 0.0 0.0 +3.96 0.0 0.0 +3.97 0.0 0.0 +3.98 0.0 0.0 +3.99 0.0 0.0 +4.0 0.0 0.0 +4.01 0.0 0.0 +4.02 0.0 0.0 +4.03 0.0 0.0 +4.04 0.0 0.0 +4.05 0.0 0.0 +4.06 0.0 0.0 +4.07 0.0 0.0 +4.08 0.0 0.0 +4.09 0.0 0.0 +4.1 0.0 0.0 +4.11 0.0 0.0 +4.12 0.0 0.0 +4.13 0.0 0.0 +4.14 0.0 0.0 +4.15 0.0 0.0 +4.16 0.0 0.0 +4.17 0.0 0.0 +4.18 0.0 0.0 +4.19 0.0 0.0 +4.2 0.0 0.0 +4.21 0.0 0.0 +4.22 0.0 0.0 +4.23 0.0 0.0 +4.24 0.0 0.0 +4.25 0.0 0.0 +4.26 0.0 0.0 +4.27 0.0 0.0 +4.28 0.0 0.0 +4.29 0.0 0.0 +4.3 0.0 0.0 +4.31 0.0 0.0 +4.32 0.0 0.0 +4.33 0.0 0.0 +4.34 0.0 0.0 +4.35 0.0 0.0 +4.36 0.0 0.0 +4.37 0.0 0.0 +4.38 0.0 0.0 +4.39 0.0 0.0 +4.4 0.0 0.0 +4.41 0.0 0.0 +4.42 0.0 0.0 +4.43 0.0 0.0 +4.44 0.0 0.0 +4.45 0.0 0.0 +4.46 0.0 0.0 +4.47 0.0 0.0 +4.48 0.0 0.0 +4.49 0.0 0.0 +4.5 0.0 0.0 +4.51 0.0 0.0 +4.52 0.0 0.0 +4.53 0.0 0.0 +4.54 0.0 0.0 +4.55 0.0 0.0 +4.56 0.0 0.0 +4.57 0.0 0.0 +4.58 0.0 0.0 +4.59 0.0 0.0 +4.6 0.0 0.0 +4.61 0.0 0.0 +4.62 0.0 0.0 +4.63 0.0 0.0 +4.64 0.0 0.0 +4.65 0.0 0.0 +4.66 0.0 0.0 +4.67 0.0 0.0 +4.68 0.0 0.0 +4.69 0.0 0.0 +4.7 0.0 0.0 +4.71 0.0 0.0 +4.72 0.0 0.0 +4.73 0.0 0.0 +4.74 0.0 0.0 +4.75 0.0 0.0 +4.76 0.0 0.0 +4.77 0.0 0.0 +4.78 0.0 0.0 +4.79 0.0 0.0 +4.8 0.0 0.0 +4.81 0.0 0.0 +4.82 0.0 0.0 +4.83 0.0 0.0 +4.84 0.0 0.0 +4.85 0.0 0.0 +4.86 0.0 0.0 +4.87 0.0 0.0 +4.88 0.0 0.0 +4.89 0.0 0.0 +4.9 0.0 0.0 +4.91 0.0 0.0 +4.92 0.0 0.0 +4.93 0.0 0.0 +4.94 0.0 0.0 +4.95 0.0 0.0 +4.96 0.0 0.0 +4.97 0.0 0.0 +4.98 0.0 0.0 +4.99 0.0 0.0 +5.0 0.0 0.0 +5.01 0.0 0.0 +5.02 0.0 0.0 +5.03 0.0 0.0 +5.04 0.0 0.0 +5.05 0.0 0.0 +5.06 0.0 0.0 +5.07 0.0 0.0 +5.08 0.0 0.0 +5.09 0.0 0.0 +5.1 0.0 0.0 +5.11 0.0 0.0 +5.12 0.0 0.0 +5.13 0.0 0.0 +5.14 0.0 0.0 +5.15 0.0 0.0 +5.16 0.0 0.0 +5.17 0.0 0.0 +5.18 0.0 0.0 +5.19 0.0 0.0 +5.2 0.0 0.0 +5.21 0.0 0.0 +5.22 0.0 0.0 +5.23 0.0 0.0 +5.24 0.0 0.0 +5.25 0.0 0.0 +5.26 0.0 0.0 +5.27 0.0 0.0 +5.28 0.0 0.0 +5.29 0.0 0.0 +5.3 0.0 0.0 +5.31 0.0 0.0 +5.32 0.0 0.0 +5.33 0.0 0.0 +5.34 0.0 0.0 +5.35 0.0 0.0 +5.36 0.0 0.0 +5.37 0.0 0.0 +5.38 0.0 0.0 +5.39 0.0 0.0 +5.4 0.0 0.0 +5.41 0.0 0.0 +5.42 0.0 0.0 +5.43 0.0 0.0 +5.44 0.0 0.0 +5.45 0.0 0.0 +5.46 0.0 0.0 +5.47 0.0 0.0 +5.48 0.0 0.0 +5.49 0.0 0.0 +5.5 0.0 0.0 +5.51 0.0 0.0 +5.52 0.0 0.0 +5.53 0.0 0.0 +5.54 0.0 0.0 +5.55 0.0 0.0 +5.56 0.0 0.0 +5.57 0.0 0.0 +5.58 0.0 0.0 +5.59 0.0 0.0 +5.6 0.0 0.0 +5.61 0.0 0.0 +5.62 0.0 0.0 +5.63 0.0 0.0 +5.64 0.0 0.0 +5.65 0.0 0.0 +5.66 0.0 0.0 +5.67 0.0 0.0 +5.68 0.0 0.0 +5.69 0.0 0.0 +5.7 0.0 0.0 +5.71 0.0 0.0 +5.72 0.0 0.0 +5.73 0.0 0.0 +5.74 0.0 0.0 +5.75 0.0 0.0 +5.76 0.0 0.0 +5.77 0.0 0.0 +5.78 0.0 0.0 +5.79 0.0 0.0 +5.8 0.0 0.0 +5.81 0.0 0.0 +5.82 0.0 0.0 +5.83 0.0 0.0 +5.84 0.0 0.0 +5.85 0.0 0.0 +5.86 0.0 0.0 +5.87 0.0 0.0 +5.88 0.0 0.0 +5.89 0.0 0.0 +5.9 0.0 0.0 +5.91 0.0 0.0 +5.92 0.0 0.0 +5.93 0.0 0.0 +5.94 0.0 0.0 +5.95 0.0 0.0 +5.96 0.0 0.0 +5.97 0.0 0.0 +5.98 0.0 0.0 +5.99 0.0 0.0 +6.0 0.0 0.0 +6.01 0.0 0.0 +# l= 1 +0.0 0.0 0.0 +0.01 0.29066480787 -0.12766802783 +0.02 0.58059469457 -0.25370368503 +0.03 0.86905459105 -0.37649810362 +0.04 1.1553091839 -0.49448908497 +0.05 1.4386229206 -0.60618359966 +0.06 1.7182601654 -0.71017929906 +0.07 1.9934855498 -0.80518473143 +0.08 2.2635645602 -0.89003797427 +0.09 2.5277643986 -0.96372341716 +0.1 2.7853551471 -1.0253864565 +0.11 3.0356112595 -1.0743458929 +0.12 3.2778133999 -1.1101038562 +0.13 3.5112506355 -1.132353118 +0.14 3.7352229868 -1.1409816885 +0.15 3.9490443308 -1.1360746358 +0.16 4.1520456404 -1.1179131031 +0.17 4.3435785405 -1.0869705414 +0.18 4.5230191502 -1.043906215 +0.19 4.6897721746 -0.98955607228 +0.2 4.8432751971 -0.92492112794 +0.21 4.9830031356 -0.85115349716 +0.22 5.1084727845 -0.76954032576 +0.23 5.219247403 -0.68148580784 +0.24 5.3149412635 -0.58849160184 +0.25 5.3952241285 -0.49213581023 +0.26 5.4598255167 -0.39405106569 +0.27 5.5085387903 -0.29590166122 +0.28 5.541224879 -0.19936048846 +0.29 5.5578156616 -0.10608575913 +0.3 5.5583168965 -0.017698005217 +0.31 5.5428106467 0.06424234575 +0.32 5.5114571261 0.1382563903 +0.33 5.4644959792 0.20296728607 +0.34 5.4022468522 0.25711902545 +0.35 5.325109272 0.29959336152 +0.36 5.2335618541 0.32942535652 +0.37 5.1281607036 0.34581612529 +0.38 5.0095370906 0.34814322996 +0.39 4.8783943935 0.33596941846 +0.4 4.7355042613 0.30904776792 +0.41 4.5817020837 0.26732465609 +0.42 4.4178817653 0.21094005537 +0.43 4.2449898494 0.14022512555 +0.44 4.0640190434 0.055697201533 +0.45 3.8760012064 -0.041947731618 +0.46 3.6819998663 -0.15184494879 +0.47 3.4831023478 -0.27297311531 +0.48 3.2804117938 -0.40416833694 +0.49 3.0750382363 -0.54414020192 +0.5 2.8680904422 -0.69148938787 +0.51 2.6606674962 -0.84472613781 +0.52 2.4538500308 -1.0022901441 +0.53 2.2486913509 -1.1625719248 +0.54 2.0462109192 -1.323931525 +0.55 1.8473843041 -1.4847227682 +0.56 1.653138125 -1.6433107038 +0.57 1.4643414037 -1.7980948263 +0.58 1.281800928 -1.947525902 +0.59 1.1062538905 -2.0901278939 +0.6 0.93836579773 -2.2245096048 +0.61 0.77872264836 -2.3493884415 +0.62 0.62783196803 -2.4635949995 +0.63 0.48611725026 -2.5660924179 +0.64 0.35391756909 -2.6559840633 +0.65 0.23148844511 -2.7325176066 +0.66 0.11899991646 -2.7950966015 +0.67 0.016538750486 -2.8432814671 +0.68 -0.075888903222 -2.8767889702 +0.69 -0.15835461263 -2.8954907864 +0.7 -0.23100553111 -2.8994156318 +0.71 -0.29405849442 -2.8887392669 +0.72 -0.34779536494 -2.8637786145 +0.73 -0.39255757684 -2.8249835899 +0.74 -0.42874014339 -2.7729270037 +0.75 -0.45678532652 -2.7082941525 +0.76 -0.47717572969 -2.6318706129 +0.77 -0.49042753238 -2.5445289574 +0.78 -0.4970835879 -2.447216249 +0.79 -0.49770653893 -2.3409410988 +0.8 -0.49287207593 -2.2267610065 +0.81 -0.48316244483 -2.1057702977 +0.82 -0.46916030733 -1.9790889658 +0.83 -0.45144305243 -1.8478527201 +0.84 -0.43057765246 -1.7132045264 +0.85 -0.40711614974 -1.57628791 +0.86 -0.38159185278 -1.4382422733 +0.87 -0.35451631218 -1.3002004539 +0.88 -0.32637713717 -1.1632887249 +0.89 -0.29763676098 -1.0286299149 +0.9 -0.26873225925 -0.89735010789 +0.91 -0.24007124473 -0.77054582678 +0.92 -0.21202347663 -0.64921538628 +0.93 -0.18491499163 -0.53421702583 +0.94 -0.15902690746 -0.42626728118 +0.95 -0.13459495726 -0.32594113668 +0.96 -0.11181120497 -0.23367796686 +0.97 -0.090820056384 -0.14977127266 +0.98 -0.071724461213 -0.074387465455 +0.99 -0.05458647007 -0.0075684657988 +1.0 -0.039429767264 0.050760037837 +1.01 -0.026242635758 0.10077910624 +1.02 -0.014981534149 0.1427654233 +1.03 -0.0055743761777 0.17708106785 +1.04 0.0020772571196 0.20416531617 +1.05 0.0080912125581 0.2245148205 +1.06 0.012601877251 0.23867416975 +1.07 0.015756248457 0.24722388705 +1.08 0.017710687794 0.25077081678 +1.09 0.01862572475 0.24993011099 +1.1 0.018662681782 0.24531711912 +1.11 0.017980644302 0.23753738864 +1.12 0.016735251642 0.22718372487 +1.13 0.015071425965 0.21481153372 +1.14 0.013123583416 0.20094132022 +1.15 0.011014308211 0.18605454018 +1.16 0.0088554209722 0.17059765277 +1.17 0.0067375505894 0.15494682671 +1.18 0.0047378191094 0.13943452685 +1.19 0.0029193524458 0.1243477471 +1.2 0.001328262614 0.10991769715 +1.21 -7.6992942738e-06 0.096314572727 +1.22 -0.0010722476971 0.083670780504 +1.23 -0.0018609144401 0.07207986554 +1.24 -0.0023860261071 0.06157688782 +1.25 -0.0026664363232 0.052173793152 +1.26 -0.0027284955274 0.043856886321 +1.27 -0.0026075696808 0.036575485324 +1.28 -0.0023404917442 0.030270092219 +1.29 -0.0019675321965 0.024867237243 +1.3 -0.0015277073539 0.020284143588 +1.31 -0.0010579085108 0.016439036083 +1.32 -0.00059366211672 0.013247738376 +1.33 -0.00016101283768 0.010637243204 +1.34 0.00021686756196 0.0085367398153 +1.35 0.00052471216314 0.0068851056495 +1.36 0.00075756576343 0.0056302865355 +1.37 0.00091293900599 0.0047214403435 +1.38 0.0010007352126 0.0041128940905 +1.39 0.0010335306184 0.0037478485963 +1.4 0.0010886959567 0.0037639012113 +1.41 0.00091991355586 0.0031664945096 +1.42 0.00029615236778 0.0010139679602 +1.43 -6.1278603913e-05 -0.00021121956431 +1.44 -2.2865795105e-05 -7.88154914e-05 +1.45 0.0 0.0 +1.46 0.0 0.0 +1.47 0.0 0.0 +1.48 0.0 0.0 +1.49 0.0 0.0 +1.5 0.0 0.0 +1.51 0.0 0.0 +1.52 0.0 0.0 +1.53 0.0 0.0 +1.54 0.0 0.0 +1.55 0.0 0.0 +1.56 0.0 0.0 +1.57 0.0 0.0 +1.58 0.0 0.0 +1.59 0.0 0.0 +1.6 0.0 0.0 +1.61 0.0 0.0 +1.62 0.0 0.0 +1.63 0.0 0.0 +1.64 0.0 0.0 +1.65 0.0 0.0 +1.66 0.0 0.0 +1.67 0.0 0.0 +1.68 0.0 0.0 +1.69 0.0 0.0 +1.7 0.0 0.0 +1.71 0.0 0.0 +1.72 0.0 0.0 +1.73 0.0 0.0 +1.74 0.0 0.0 +1.75 0.0 0.0 +1.76 0.0 0.0 +1.77 0.0 0.0 +1.78 0.0 0.0 +1.79 0.0 0.0 +1.8 0.0 0.0 +1.81 0.0 0.0 +1.82 0.0 0.0 +1.83 0.0 0.0 +1.84 0.0 0.0 +1.85 0.0 0.0 +1.86 0.0 0.0 +1.87 0.0 0.0 +1.88 0.0 0.0 +1.89 0.0 0.0 +1.9 0.0 0.0 +1.91 0.0 0.0 +1.92 0.0 0.0 +1.93 0.0 0.0 +1.94 0.0 0.0 +1.95 0.0 0.0 +1.96 0.0 0.0 +1.97 0.0 0.0 +1.98 0.0 0.0 +1.99 0.0 0.0 +2.0 0.0 0.0 +2.01 0.0 0.0 +2.02 0.0 0.0 +2.03 0.0 0.0 +2.04 0.0 0.0 +2.05 0.0 0.0 +2.06 0.0 0.0 +2.07 0.0 0.0 +2.08 0.0 0.0 +2.09 0.0 0.0 +2.1 0.0 0.0 +2.11 0.0 0.0 +2.12 0.0 0.0 +2.13 0.0 0.0 +2.14 0.0 0.0 +2.15 0.0 0.0 +2.16 0.0 0.0 +2.17 0.0 0.0 +2.18 0.0 0.0 +2.19 0.0 0.0 +2.2 0.0 0.0 +2.21 0.0 0.0 +2.22 0.0 0.0 +2.23 0.0 0.0 +2.24 0.0 0.0 +2.25 0.0 0.0 +2.26 0.0 0.0 +2.27 0.0 0.0 +2.28 0.0 0.0 +2.29 0.0 0.0 +2.3 0.0 0.0 +2.31 0.0 0.0 +2.32 0.0 0.0 +2.33 0.0 0.0 +2.34 0.0 0.0 +2.35 0.0 0.0 +2.36 0.0 0.0 +2.37 0.0 0.0 +2.38 0.0 0.0 +2.39 0.0 0.0 +2.4 0.0 0.0 +2.41 0.0 0.0 +2.42 0.0 0.0 +2.43 0.0 0.0 +2.44 0.0 0.0 +2.45 0.0 0.0 +2.46 0.0 0.0 +2.47 0.0 0.0 +2.48 0.0 0.0 +2.49 0.0 0.0 +2.5 0.0 0.0 +2.51 0.0 0.0 +2.52 0.0 0.0 +2.53 0.0 0.0 +2.54 0.0 0.0 +2.55 0.0 0.0 +2.56 0.0 0.0 +2.57 0.0 0.0 +2.58 0.0 0.0 +2.59 0.0 0.0 +2.6 0.0 0.0 +2.61 0.0 0.0 +2.62 0.0 0.0 +2.63 0.0 0.0 +2.64 0.0 0.0 +2.65 0.0 0.0 +2.66 0.0 0.0 +2.67 0.0 0.0 +2.68 0.0 0.0 +2.69 0.0 0.0 +2.7 0.0 0.0 +2.71 0.0 0.0 +2.72 0.0 0.0 +2.73 0.0 0.0 +2.74 0.0 0.0 +2.75 0.0 0.0 +2.76 0.0 0.0 +2.77 0.0 0.0 +2.78 0.0 0.0 +2.79 0.0 0.0 +2.8 0.0 0.0 +2.81 0.0 0.0 +2.82 0.0 0.0 +2.83 0.0 0.0 +2.84 0.0 0.0 +2.85 0.0 0.0 +2.86 0.0 0.0 +2.87 0.0 0.0 +2.88 0.0 0.0 +2.89 0.0 0.0 +2.9 0.0 0.0 +2.91 0.0 0.0 +2.92 0.0 0.0 +2.93 0.0 0.0 +2.94 0.0 0.0 +2.95 0.0 0.0 +2.96 0.0 0.0 +2.97 0.0 0.0 +2.98 0.0 0.0 +2.99 0.0 0.0 +3.0 0.0 0.0 +3.01 0.0 0.0 +3.02 0.0 0.0 +3.03 0.0 0.0 +3.04 0.0 0.0 +3.05 0.0 0.0 +3.06 0.0 0.0 +3.07 0.0 0.0 +3.08 0.0 0.0 +3.09 0.0 0.0 +3.1 0.0 0.0 +3.11 0.0 0.0 +3.12 0.0 0.0 +3.13 0.0 0.0 +3.14 0.0 0.0 +3.15 0.0 0.0 +3.16 0.0 0.0 +3.17 0.0 0.0 +3.18 0.0 0.0 +3.19 0.0 0.0 +3.2 0.0 0.0 +3.21 0.0 0.0 +3.22 0.0 0.0 +3.23 0.0 0.0 +3.24 0.0 0.0 +3.25 0.0 0.0 +3.26 0.0 0.0 +3.27 0.0 0.0 +3.28 0.0 0.0 +3.29 0.0 0.0 +3.3 0.0 0.0 +3.31 0.0 0.0 +3.32 0.0 0.0 +3.33 0.0 0.0 +3.34 0.0 0.0 +3.35 0.0 0.0 +3.36 0.0 0.0 +3.37 0.0 0.0 +3.38 0.0 0.0 +3.39 0.0 0.0 +3.4 0.0 0.0 +3.41 0.0 0.0 +3.42 0.0 0.0 +3.43 0.0 0.0 +3.44 0.0 0.0 +3.45 0.0 0.0 +3.46 0.0 0.0 +3.47 0.0 0.0 +3.48 0.0 0.0 +3.49 0.0 0.0 +3.5 0.0 0.0 +3.51 0.0 0.0 +3.52 0.0 0.0 +3.53 0.0 0.0 +3.54 0.0 0.0 +3.55 0.0 0.0 +3.56 0.0 0.0 +3.57 0.0 0.0 +3.58 0.0 0.0 +3.59 0.0 0.0 +3.6 0.0 0.0 +3.61 0.0 0.0 +3.62 0.0 0.0 +3.63 0.0 0.0 +3.64 0.0 0.0 +3.65 0.0 0.0 +3.66 0.0 0.0 +3.67 0.0 0.0 +3.68 0.0 0.0 +3.69 0.0 0.0 +3.7 0.0 0.0 +3.71 0.0 0.0 +3.72 0.0 0.0 +3.73 0.0 0.0 +3.74 0.0 0.0 +3.75 0.0 0.0 +3.76 0.0 0.0 +3.77 0.0 0.0 +3.78 0.0 0.0 +3.79 0.0 0.0 +3.8 0.0 0.0 +3.81 0.0 0.0 +3.82 0.0 0.0 +3.83 0.0 0.0 +3.84 0.0 0.0 +3.85 0.0 0.0 +3.86 0.0 0.0 +3.87 0.0 0.0 +3.88 0.0 0.0 +3.89 0.0 0.0 +3.9 0.0 0.0 +3.91 0.0 0.0 +3.92 0.0 0.0 +3.93 0.0 0.0 +3.94 0.0 0.0 +3.95 0.0 0.0 +3.96 0.0 0.0 +3.97 0.0 0.0 +3.98 0.0 0.0 +3.99 0.0 0.0 +4.0 0.0 0.0 +4.01 0.0 0.0 +4.02 0.0 0.0 +4.03 0.0 0.0 +4.04 0.0 0.0 +4.05 0.0 0.0 +4.06 0.0 0.0 +4.07 0.0 0.0 +4.08 0.0 0.0 +4.09 0.0 0.0 +4.1 0.0 0.0 +4.11 0.0 0.0 +4.12 0.0 0.0 +4.13 0.0 0.0 +4.14 0.0 0.0 +4.15 0.0 0.0 +4.16 0.0 0.0 +4.17 0.0 0.0 +4.18 0.0 0.0 +4.19 0.0 0.0 +4.2 0.0 0.0 +4.21 0.0 0.0 +4.22 0.0 0.0 +4.23 0.0 0.0 +4.24 0.0 0.0 +4.25 0.0 0.0 +4.26 0.0 0.0 +4.27 0.0 0.0 +4.28 0.0 0.0 +4.29 0.0 0.0 +4.3 0.0 0.0 +4.31 0.0 0.0 +4.32 0.0 0.0 +4.33 0.0 0.0 +4.34 0.0 0.0 +4.35 0.0 0.0 +4.36 0.0 0.0 +4.37 0.0 0.0 +4.38 0.0 0.0 +4.39 0.0 0.0 +4.4 0.0 0.0 +4.41 0.0 0.0 +4.42 0.0 0.0 +4.43 0.0 0.0 +4.44 0.0 0.0 +4.45 0.0 0.0 +4.46 0.0 0.0 +4.47 0.0 0.0 +4.48 0.0 0.0 +4.49 0.0 0.0 +4.5 0.0 0.0 +4.51 0.0 0.0 +4.52 0.0 0.0 +4.53 0.0 0.0 +4.54 0.0 0.0 +4.55 0.0 0.0 +4.56 0.0 0.0 +4.57 0.0 0.0 +4.58 0.0 0.0 +4.59 0.0 0.0 +4.6 0.0 0.0 +4.61 0.0 0.0 +4.62 0.0 0.0 +4.63 0.0 0.0 +4.64 0.0 0.0 +4.65 0.0 0.0 +4.66 0.0 0.0 +4.67 0.0 0.0 +4.68 0.0 0.0 +4.69 0.0 0.0 +4.7 0.0 0.0 +4.71 0.0 0.0 +4.72 0.0 0.0 +4.73 0.0 0.0 +4.74 0.0 0.0 +4.75 0.0 0.0 +4.76 0.0 0.0 +4.77 0.0 0.0 +4.78 0.0 0.0 +4.79 0.0 0.0 +4.8 0.0 0.0 +4.81 0.0 0.0 +4.82 0.0 0.0 +4.83 0.0 0.0 +4.84 0.0 0.0 +4.85 0.0 0.0 +4.86 0.0 0.0 +4.87 0.0 0.0 +4.88 0.0 0.0 +4.89 0.0 0.0 +4.9 0.0 0.0 +4.91 0.0 0.0 +4.92 0.0 0.0 +4.93 0.0 0.0 +4.94 0.0 0.0 +4.95 0.0 0.0 +4.96 0.0 0.0 +4.97 0.0 0.0 +4.98 0.0 0.0 +4.99 0.0 0.0 +5.0 0.0 0.0 +5.01 0.0 0.0 +5.02 0.0 0.0 +5.03 0.0 0.0 +5.04 0.0 0.0 +5.05 0.0 0.0 +5.06 0.0 0.0 +5.07 0.0 0.0 +5.08 0.0 0.0 +5.09 0.0 0.0 +5.1 0.0 0.0 +5.11 0.0 0.0 +5.12 0.0 0.0 +5.13 0.0 0.0 +5.14 0.0 0.0 +5.15 0.0 0.0 +5.16 0.0 0.0 +5.17 0.0 0.0 +5.18 0.0 0.0 +5.19 0.0 0.0 +5.2 0.0 0.0 +5.21 0.0 0.0 +5.22 0.0 0.0 +5.23 0.0 0.0 +5.24 0.0 0.0 +5.25 0.0 0.0 +5.26 0.0 0.0 +5.27 0.0 0.0 +5.28 0.0 0.0 +5.29 0.0 0.0 +5.3 0.0 0.0 +5.31 0.0 0.0 +5.32 0.0 0.0 +5.33 0.0 0.0 +5.34 0.0 0.0 +5.35 0.0 0.0 +5.36 0.0 0.0 +5.37 0.0 0.0 +5.38 0.0 0.0 +5.39 0.0 0.0 +5.4 0.0 0.0 +5.41 0.0 0.0 +5.42 0.0 0.0 +5.43 0.0 0.0 +5.44 0.0 0.0 +5.45 0.0 0.0 +5.46 0.0 0.0 +5.47 0.0 0.0 +5.48 0.0 0.0 +5.49 0.0 0.0 +5.5 0.0 0.0 +5.51 0.0 0.0 +5.52 0.0 0.0 +5.53 0.0 0.0 +5.54 0.0 0.0 +5.55 0.0 0.0 +5.56 0.0 0.0 +5.57 0.0 0.0 +5.58 0.0 0.0 +5.59 0.0 0.0 +5.6 0.0 0.0 +5.61 0.0 0.0 +5.62 0.0 0.0 +5.63 0.0 0.0 +5.64 0.0 0.0 +5.65 0.0 0.0 +5.66 0.0 0.0 +5.67 0.0 0.0 +5.68 0.0 0.0 +5.69 0.0 0.0 +5.7 0.0 0.0 +5.71 0.0 0.0 +5.72 0.0 0.0 +5.73 0.0 0.0 +5.74 0.0 0.0 +5.75 0.0 0.0 +5.76 0.0 0.0 +5.77 0.0 0.0 +5.78 0.0 0.0 +5.79 0.0 0.0 +5.8 0.0 0.0 +5.81 0.0 0.0 +5.82 0.0 0.0 +5.83 0.0 0.0 +5.84 0.0 0.0 +5.85 0.0 0.0 +5.86 0.0 0.0 +5.87 0.0 0.0 +5.88 0.0 0.0 +5.89 0.0 0.0 +5.9 0.0 0.0 +5.91 0.0 0.0 +5.92 0.0 0.0 +5.93 0.0 0.0 +5.94 0.0 0.0 +5.95 0.0 0.0 +5.96 0.0 0.0 +5.97 0.0 0.0 +5.98 0.0 0.0 +5.99 0.0 0.0 +6.0 0.0 0.0 +6.01 0.0 0.0 +# local +0.0 -7.3824473704E+00 +0.01 -7.3812697659E+00 +0.02 -7.3777245837E+00 +0.03 -7.3718243749E+00 +0.04 -7.3635816905E+00 +0.05 -7.3530139295E+00 +0.06 -7.3401431711E+00 +0.07 -7.3249959683E+00 +0.08 -7.3076031080E+00 +0.09 -7.2879993436E+00 +0.1 -7.2662231076E+00 +0.11 -7.2423162111E+00 +0.12 -7.2163235363E+00 +0.13 -7.1882927290E+00 +0.14 -7.1582738976E+00 +0.15 -7.1263193213E+00 +0.16 -7.0924831749E+00 +0.17 -7.0568212706E+00 +0.18 -7.0193908209E+00 +0.19 -6.9802502231E+00 +0.2 -6.9394588668E+00 +0.21 -6.8970769636E+00 +0.22 -6.8531653983E+00 +0.23 -6.8077856012E+00 +0.24 -6.7609994384E+00 +0.25 -6.7128691203E+00 +0.26 -6.6634571231E+00 +0.27 -6.6128261248E+00 +0.28 -6.5610389507E+00 +0.29 -6.5081585274E+00 +0.3 -6.4542478436E+00 +0.31 -6.3993699152E+00 +0.32 -6.3435877532E+00 +0.33 -6.2869643332E+00 +0.34 -6.2295625648E+00 +0.35 -6.1714452603E+00 +0.36 -6.1126751004E+00 +0.37 -6.0533145978E+00 +0.38 -5.9934260574E+00 +0.39 -5.9330715308E+00 +0.4 -5.8723127672E+00 +0.41 -5.8112111589E+00 +0.42 -5.7498276813E+00 +0.43 -5.6882228258E+00 +0.44 -5.6264565276E+00 +0.45 -5.5645880861E+00 +0.46 -5.5026760781E+00 +0.47 -5.4407782641E+00 +0.48 -5.3789514911E+00 +0.49 -5.3172515715E+00 +0.5 -5.2557331733E+00 +0.51 -5.1944496983E+00 +0.52 -5.1334531413E+00 +0.53 -5.0727939327E+00 +0.54 -5.0125208304E+00 +0.55 -4.9526806984E+00 +0.56 -4.8933184053E+00 +0.57 -4.8344766015E+00 +0.58 -4.7761955947E+00 +0.59 -4.7185131158E+00 +0.6 -4.6614642490E+00 +0.61 -4.6050811188E+00 +0.62 -4.5493929153E+00 +0.63 -4.4944256168E+00 +0.64 -4.4402019202E+00 +0.65 -4.3867412382E+00 +0.66 -4.3340595177E+00 +0.67 -4.2821692757E+00 +0.68 -4.2310796697E+00 +0.69 -4.1807965917E+00 +0.7 -4.1313226501E+00 +0.71 -4.0826574704E+00 +0.72 -4.0347978888E+00 +0.73 -3.9877382038E+00 +0.74 -3.9414704800E+00 +0.75 -3.8959848485E+00 +0.76 -3.8512698499E+00 +0.77 -3.8073128101E+00 +0.78 -3.7641001660E+00 +0.79 -3.7216177873E+00 +0.8 -3.6798512666E+00 +0.81 -3.6387861674E+00 +0.82 -3.5984082176E+00 +0.83 -3.5587034427E+00 +0.84 -3.5196582305E+00 +0.85 -3.4812593249E+00 +0.86 -3.4434937476E+00 +0.87 -3.4063486484E+00 +0.88 -3.3698110888E+00 +0.89 -3.3338677493E+00 +0.9 -3.2985045609E+00 +0.91 -3.2637075627E+00 +0.92 -3.2294647053E+00 +0.93 -3.1957666631E+00 +0.94 -3.1626064086E+00 +0.95 -3.1299788012E+00 +0.96 -3.0978801763E+00 +0.97 -3.0663080784E+00 +0.98 -3.0352607939E+00 +0.99 -3.0047370809E+00 +1.0 -2.9747358888E+00 +1.01 -2.9452561151E+00 +1.02 -2.9162964073E+00 +1.03 -2.8878550086E+00 +1.04 -2.8599295343E+00 +1.05 -2.8325169529E+00 +1.06 -2.8056135042E+00 +1.07 -2.7792146759E+00 +1.08 -2.7533152247E+00 +1.09 -2.7279090107E+00 +1.1 -2.7029892279E+00 +1.11 -2.6785483969E+00 +1.12 -2.6545784706E+00 +1.13 -2.6310707370E+00 +1.14 -2.6080160518E+00 +1.15 -2.5854049000E+00 +1.16 -2.5632274752E+00 +1.17 -2.5414736594E+00 +1.18 -2.5201332293E+00 +1.19 -2.4991958733E+00 +1.2 -2.4786512316E+00 +1.21 -2.4584890008E+00 +1.22 -2.4386989558E+00 +1.23 -2.4192709473E+00 +1.24 -2.4001950501E+00 +1.25 -2.3814614823E+00 +1.26 -2.3630605841E+00 +1.27 -2.3449830012E+00 +1.28 -2.3272195138E+00 +1.29 -2.3097610277E+00 +1.3 -2.2925987334E+00 +1.31 -2.2757238961E+00 +1.32 -2.2591279027E+00 +1.33 -2.2428023049E+00 +1.34 -2.2267386815E+00 +1.35 -2.2109286973E+00 +1.36 -2.1953640511E+00 +1.37 -2.1800364538E+00 +1.38 -2.1649376273E+00 +1.39 -2.1500592840E+00 +1.4 -2.1353931293E+00 +1.41 -2.1209308669E+00 +1.42 -2.1066642024E+00 +1.43 -2.0925848762E+00 +1.44 -2.0786847192E+00 +1.45 -2.0649555800E+00 +1.46 -2.0513896667E+00 +1.47 -2.0379790929E+00 +1.48 -2.0247167043E+00 +1.49 -2.0115954015E+00 +1.5 -1.9986087910E+00 +1.51 -1.9857511909E+00 +1.52 -1.9730172462E+00 +1.53 -1.9604033418E+00 +1.54 -1.9479054899E+00 +1.55 -1.9355225868E+00 +1.56 -1.9232526393E+00 +1.57 -1.9110969000E+00 +1.58 -1.8990559460E+00 +1.59 -1.8871329022E+00 +1.6 -1.8753306466E+00 +1.61 -1.8636535889E+00 +1.62 -1.8521057833E+00 +1.63 -1.8406918545E+00 +1.64 -1.8294152997E+00 +1.65 -1.8182794584E+00 +1.66 -1.8072855050E+00 +1.67 -1.7964339215E+00 +1.68 -1.7857226516E+00 +1.69 -1.7751484466E+00 +1.7 -1.7647067259E+00 +1.71 -1.7543913992E+00 +1.72 -1.7441975973E+00 +1.73 -1.7341198736E+00 +1.74 -1.7241551769E+00 +1.75 -1.7143025836E+00 +1.76 -1.7045606872E+00 +1.77 -1.6949290865E+00 +1.78 -1.6854065806E+00 +1.79 -1.6759901769E+00 +1.8 -1.6666775032E+00 +1.81 -1.6574675168E+00 +1.82 -1.6483588188E+00 +1.83 -1.6393496966E+00 +1.84 -1.6304384659E+00 +1.85 -1.6216236523E+00 +1.86 -1.6129037164E+00 +1.87 -1.6042771878E+00 +1.88 -1.5957425861E+00 +1.89 -1.5872984657E+00 +1.9 -1.5789434174E+00 +1.91 -1.5706760161E+00 +1.92 -1.5624949244E+00 +1.93 -1.5543987361E+00 +1.94 -1.5463861762E+00 +1.95 -1.5384559029E+00 +1.96 -1.5306066597E+00 +1.97 -1.5228371950E+00 +1.98 -1.5151462616E+00 +1.99 -1.5075327030E+00 +2.0 -1.4999952835E+00 +2.01 -1.4925329139E+00 +2.02 -1.4851444346E+00 +2.03 -1.4778287552E+00 +2.04 -1.4705848143E+00 +2.05 -1.4634115164E+00 +2.06 -1.4563078779E+00 +2.07 -1.4492728445E+00 +2.08 -1.4423054491E+00 +2.09 -1.4354047221E+00 +2.1 -1.4285696813E+00 +2.11 -1.4217994342E+00 +2.12 -1.4150930238E+00 +2.13 -1.4084495764E+00 +2.14 -1.4018682130E+00 +2.15 -1.3953480370E+00 +2.16 -1.3888882403E+00 +2.17 -1.3824879562E+00 +2.18 -1.3761463800E+00 +2.19 -1.3698627240E+00 +2.2 -1.3636361553E+00 +2.21 -1.3574659450E+00 +2.22 -1.3513513123E+00 +2.23 -1.3452915018E+00 +2.24 -1.3392858081E+00 +2.25 -1.3333334744E+00 +2.26 -1.3274338163E+00 +2.27 -1.3215861463E+00 +2.28 -1.3157897421E+00 +2.29 -1.3100439737E+00 +2.3 -1.3043481656E+00 +2.31 -1.2987016554E+00 +2.32 -1.2931038351E+00 +2.33 -1.2875540522E+00 +2.34 -1.2820516981E+00 +2.35 -1.2765961859E+00 +2.36 -1.2711868863E+00 +2.37 -1.2658232390E+00 +2.38 -1.2605046772E+00 +2.39 -1.2552305958E+00 +2.4 -1.2500004767E+00 +2.41 -1.2448137690E+00 +2.42 -1.2396699045E+00 +2.43 -1.2345683891E+00 +2.44 -1.2295086930E+00 +2.45 -1.2244902830E+00 +2.46 -1.2195126858E+00 +2.47 -1.2145753938E+00 +2.48 -1.2096779044E+00 +2.49 -1.2048197648E+00 +2.5 -1.2000004895E+00 +2.51 -1.1952196023E+00 +2.52 -1.1904766709E+00 +2.53 -1.1857712322E+00 +2.54 -1.1811028319E+00 +2.55 -1.1764710586E+00 +2.56 -1.1718754712E+00 +2.57 -1.1673156341E+00 +2.58 -1.1627911562E+00 +2.59 -1.1583016187E+00 +2.6 -1.1538466012E+00 +2.61 -1.1494257329E+00 +2.62 -1.1450386175E+00 +2.63 -1.1406848461E+00 +2.64 -1.1363640685E+00 +2.65 -1.1320759107E+00 +2.66 -1.1278199728E+00 +2.67 -1.1235959244E+00 +2.68 -1.1194034091E+00 +2.69 -1.1152420487E+00 +2.7 -1.1111115170E+00 +2.71 -1.1070114792E+00 +2.72 -1.1029415795E+00 +2.73 -1.0989014927E+00 +2.74 -1.0948909057E+00 +2.75 -1.0909094852E+00 +2.76 -1.0869569047E+00 +2.77 -1.0830328722E+00 +2.78 -1.0791370775E+00 +2.79 -1.0752691907E+00 +2.8 -1.0714289403E+00 +2.81 -1.0676160318E+00 +2.82 -1.0638301530E+00 +2.83 -1.0600710295E+00 +2.84 -1.0563383877E+00 +2.85 -1.0526319386E+00 +2.86 -1.0489513980E+00 +2.87 -1.0452965144E+00 +2.88 -1.0416670199E+00 +2.89 -1.0380626259E+00 +2.9 -1.0344830951E+00 +2.91 -1.0309281744E+00 +2.92 -1.0273975975E+00 +2.93 -1.0238911131E+00 +2.94 -1.0204084904E+00 +2.95 -1.0169494860E+00 +2.96 -1.0135138357E+00 +2.97 -1.0101013272E+00 +2.98 -1.0067117293E+00 +2.99 -1.0033448003E+00 +3.0 -1.0000003097E+00 +3.01 -9.9667804911E-01 +3.02 -9.9337779726E-01 +3.03 -9.9009931515E-01 +3.04 -9.8684240459E-01 +3.05 -9.8360685818E-01 +3.06 -9.8039246029E-01 +3.07 -9.7719899065E-01 +3.08 -9.7402626440E-01 +3.09 -9.7087408053E-01 +3.1 -9.6774222652E-01 +3.11 -9.6463050840E-01 +3.12 -9.6153874392E-01 +3.13 -9.5846674140E-01 +3.14 -9.5541429264E-01 +3.15 -9.5238122645E-01 +3.16 -9.4936736350E-01 +3.17 -9.4637251945E-01 +3.18 -9.4339649617E-01 +3.19 -9.4043913696E-01 +3.2 -9.3750026760E-01 +3.21 -9.3457970844E-01 +3.22 -9.3167727899E-01 +3.23 -9.2879282694E-01 +3.24 -9.2592618620E-01 +3.25 -9.2307718251E-01 +3.26 -9.2024565026E-01 +3.27 -9.1743144193E-01 +3.28 -9.1463439914E-01 +3.29 -9.1185435366E-01 +3.3 -9.0909115210E-01 +3.31 -9.0634465218E-01 +3.32 -9.0361470291E-01 +3.33 -9.0090114265E-01 +3.34 -8.9820382763E-01 +3.35 -8.9552262138E-01 +3.36 -8.9285737995E-01 +3.37 -8.9020794879E-01 +3.38 -8.8757419136E-01 +3.39 -8.8495597752E-01 +3.4 -8.8235317003E-01 +3.41 -8.7976562191E-01 +3.42 -8.7719320159E-01 +3.43 -8.7463578579E-01 +3.44 -8.7209324365E-01 +3.45 -8.6956543622E-01 +3.46 -8.6705223479E-01 +3.47 -8.6455352341E-01 +3.48 -8.6206917731E-01 +3.49 -8.5959906600E-01 +3.5 -8.5714306169E-01 +3.51 -8.5470105614E-01 +3.52 -8.5227293040E-01 +3.53 -8.4985856290E-01 +3.54 -8.4745782498E-01 +3.55 -8.4507061639E-01 +3.56 -8.4269682374E-01 +3.57 -8.4033633363E-01 +3.58 -8.3798901852E-01 +3.59 -8.3565478279E-01 +3.6 -8.3333351949E-01 +3.61 -8.3102512052E-01 +3.62 -8.2872946795E-01 +3.63 -8.2644646108E-01 +3.64 -8.2417600238E-01 +3.65 -8.2191798877E-01 +3.66 -8.1967231254E-01 +3.67 -8.1743886634E-01 +3.68 -8.1521756218E-01 +3.69 -8.1300830181E-01 +3.7 -8.1081098693E-01 +3.71 -8.0862550619E-01 +3.72 -8.0645177730E-01 +3.73 -8.0428970779E-01 +3.74 -8.0213920396E-01 +3.75 -8.0000016527E-01 +3.76 -7.9787249939E-01 +3.77 -7.9575612448E-01 +3.78 -7.9365095120E-01 +3.79 -7.9155689023E-01 +3.8 -7.8947383894E-01 +3.81 -7.8740172506E-01 +3.82 -7.8534046368E-01 +3.83 -7.8328996962E-01 +3.84 -7.8125015177E-01 +3.85 -7.7922092485E-01 +3.86 -7.7720221538E-01 +3.87 -7.7519394216E-01 +3.88 -7.7319602398E-01 +3.89 -7.7120836876E-01 +3.9 -7.6923090699E-01 +3.91 -7.6726356352E-01 +3.92 -7.6530626095E-01 +3.93 -7.6335891957E-01 +3.94 -7.6142145427E-01 +3.95 -7.5949380177E-01 +3.96 -7.5757588828E-01 +3.97 -7.5566763999E-01 +3.98 -7.5376897721E-01 +3.99 -7.5187982665E-01 +4.0 -7.5000012482E-01 +4.01 -7.4812980134E-01 +4.02 -7.4626878584E-01 +4.03 -7.4441699946E-01 +4.04 -7.4257437862E-01 +4.05 -7.4074086012E-01 +4.06 -7.3891637689E-01 +4.07 -7.3710086183E-01 +4.08 -7.3529423757E-01 +4.09 -7.3349644823E-01 +4.1 -7.3170743153E-01 +4.11 -7.2992712353E-01 +4.12 -7.2815546025E-01 +4.13 -7.2639236646E-01 +4.14 -7.2463779202E-01 +4.15 -7.2289167623E-01 +4.16 -7.2115395812E-01 +4.17 -7.1942457633E-01 +4.18 -7.1770345938E-01 +4.19 -7.1599056003E-01 +4.2 -7.1428582013E-01 +4.21 -7.1258918153E-01 +4.22 -7.1090058579E-01 +4.23 -7.0921996440E-01 +4.24 -7.0754727262E-01 +4.25 -7.0588245501E-01 +4.26 -7.0422545614E-01 +4.27 -7.0257622058E-01 +4.28 -7.0093468249E-01 +4.29 -6.9930079886E-01 +4.3 -6.9767451726E-01 +4.31 -6.9605578483E-01 +4.32 -6.9444454872E-01 +4.33 -6.9284074715E-01 +4.34 -6.9124433646E-01 +4.35 -6.8965526802E-01 +4.36 -6.8807349144E-01 +4.37 -6.8649895633E-01 +4.38 -6.8493160544E-01 +4.39 -6.8337139305E-01 +4.4 -6.8181827480E-01 +4.41 -6.8027220264E-01 +4.42 -6.7873312854E-01 +4.43 -6.7720100026E-01 +4.44 -6.7567576872E-01 +4.45 -6.7415739420E-01 +4.46 -6.7264583091E-01 +4.47 -6.7114103304E-01 +4.48 -6.6964295385E-01 +4.49 -6.6815153971E-01 +4.5 -6.6666675588E-01 +4.51 -6.6518855869E-01 +4.52 -6.6371690447E-01 +4.53 -6.6225174956E-01 +4.54 -6.6079304326E-01 +4.55 -6.5934074757E-01 +4.56 -6.5789482363E-01 +4.57 -6.5645522981E-01 +4.58 -6.5502192447E-01 +4.59 -6.5359486328E-01 +4.6 -6.5217400096E-01 +4.61 -6.5075930467E-01 +4.62 -6.4935073471E-01 +4.63 -6.4794825138E-01 +4.64 -6.4655181499E-01 +4.65 -6.4516137866E-01 +4.66 -6.4377690915E-01 +4.67 -6.4239837075E-01 +4.68 -6.4102572560E-01 +4.69 -6.3965893587E-01 +4.7 -6.3829796182E-01 +4.71 -6.3694276063E-01 +4.72 -6.3559330341E-01 +4.73 -6.3424955407E-01 +4.74 -6.3291147654E-01 +4.75 -6.3157903473E-01 +4.76 -6.3025218749E-01 +4.77 -6.2893090085E-01 +4.78 -6.2761514429E-01 +4.79 -6.2630488341E-01 +4.8 -6.2500008380E-01 +4.81 -6.2370071106E-01 +4.82 -6.2240672338E-01 +4.83 -6.2111809390E-01 +4.84 -6.1983479105E-01 +4.85 -6.1855678202E-01 +4.86 -6.1728403402E-01 +4.87 -6.1601651350E-01 +4.88 -6.1475418088E-01 +4.89 -6.1349701253E-01 +4.9 -6.1224497718E-01 +4.91 -6.1099804356E-01 +4.92 -6.0975618039E-01 +4.93 -6.0851935437E-01 +4.94 -6.0728753037E-01 +4.95 -6.0606068480E-01 +4.96 -6.0483878785E-01 +4.97 -6.0362180971E-01 +4.98 -6.0240972057E-01 +4.99 -6.0120248790E-01 +5.0 -6.0000007964E-01 +5.01 -5.9880247275E-01 +5.02 -5.9760963882E-01 +5.03 -5.9642154943E-01 +5.04 -5.9523817614E-01 +5.05 -5.9405948777E-01 +5.06 -5.9288545395E-01 +5.07 -5.9171605272E-01 +5.08 -5.9055125698E-01 +5.09 -5.8939103965E-01 +5.1 -5.8823537362E-01 +5.11 -5.8708422951E-01 +5.12 -5.8593757739E-01 +5.13 -5.8479539688E-01 +5.14 -5.8365766214E-01 +5.15 -5.8252434734E-01 +5.16 -5.8139542663E-01 +5.17 -5.8027087294E-01 +5.18 -5.7915065560E-01 +5.19 -5.7803475621E-01 +5.2 -5.7692315016E-01 +5.21 -5.7581581280E-01 +5.22 -5.7471271951E-01 +5.23 -5.7361384566E-01 +5.24 -5.7251915960E-01 +5.25 -5.7142864450E-01 +5.26 -5.7034227717E-01 +5.27 -5.6926003411E-01 +5.28 -5.6818189184E-01 +5.29 -5.6710782688E-01 +5.3 -5.6603781082E-01 +5.31 -5.6497182354E-01 +5.32 -5.6390984492E-01 +5.33 -5.6285185259E-01 +5.34 -5.6179782416E-01 +5.35 -5.6074773722E-01 +5.36 -5.5970156703E-01 +5.37 -5.5865928922E-01 +5.38 -5.5762088710E-01 +5.39 -5.5658633932E-01 +5.4 -5.5555562455E-01 +5.41 -5.5452872144E-01 +5.42 -5.5350560863E-01 +5.43 -5.5248625863E-01 +5.44 -5.5147065646E-01 +5.45 -5.5045878243E-01 +5.46 -5.4945061618E-01 +5.47 -5.4844613737E-01 +5.48 -5.4744532564E-01 +5.49 -5.4644815794E-01 +5.5 -5.4545461295E-01 +5.51 -5.4446467529E-01 +5.52 -5.4347832555E-01 +5.53 -5.4249554433E-01 +5.54 -5.4151631224E-01 +5.55 -5.4054060985E-01 +5.56 -5.3956841245E-01 +5.57 -5.3859970546E-01 +5.58 -5.3763447159E-01 +5.59 -5.3667269235E-01 +5.6 -5.3571434922E-01 +5.61 -5.3475942372E-01 +5.62 -5.3380789633E-01 +5.63 -5.3285974433E-01 +5.64 -5.3191495541E-01 +5.65 -5.3097351196E-01 +5.66 -5.3003539632E-01 +5.67 -5.2910059086E-01 +5.68 -5.2816907795E-01 +5.69 -5.2724083735E-01 +5.7 -5.2631585067E-01 +5.71 -5.2539410476E-01 +5.72 -5.2447558280E-01 +5.73 -5.2356026797E-01 +5.74 -5.2264814347E-01 +5.75 -5.2173919248E-01 +5.76 -5.2083339456E-01 +5.77 -5.1993073445E-01 +5.78 -5.1903119862E-01 +5.79 -5.1813477104E-01 +5.8 -5.1724143568E-01 +5.81 -5.1635117650E-01 +5.82 -5.1546397748E-01 +5.83 -5.1457981849E-01 +5.84 -5.1369868619E-01 +5.85 -5.1282056719E-01 +5.86 -5.1194544619E-01 +5.87 -5.1107330792E-01 +5.88 -5.1020413708E-01 +5.89 -5.0933791839E-01 +5.9 -5.0847463252E-01 +5.91 -5.0761426693E-01 +5.92 -5.0675680882E-01 +5.93 -5.0590224363E-01 +5.94 -5.0505055676E-01 +5.95 -5.0420173367E-01 +5.96 -5.0335575976E-01 +5.97 -5.0251261692E-01 +5.98 -5.0167229238E-01 +5.99 -5.0083477440E-01 +6.0 -5.0000004908E-01 +6.01 -4.9916810253E-01 diff --git a/potentials/pseudo.Li_pbe b/potentials/pseudo.Li_pbe deleted file mode 100644 index 0f6b1473..00000000 --- a/potentials/pseudo.Li_pbe +++ /dev/null @@ -1,1248 +0,0 @@ -# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at -# the Lawrence Livermore National Laboratory. -# LLNL-CODE-743438 -# All rights reserved. -# This file is part of MGmol. For details, see https://github.com/llnl/mgmol. -# Please also read this link https://github.com/llnl/mgmol/LICENSE -# -# obtained from Tadashi, 6/26/03 -# 3.00 1 2 6 1.80 : z nc nv iexc rnlc -# 1 0 2.00 : n l f -# 2 0 1.00 -# 2 1 0.00 -#2 t : lmax s_pp_def -#Generated on 20020124, at 165109.318, time zone -0800 -# Short description of the species type. One line only! -Li PBE (TM) -#color -Yellow -#radii of balls and covalent bonds -1.0 2.7 -# Nlcc flag -0 -# Atomic number. -3 -# Atomic mass. -6.941 -# Number of valence electrons. -1 -# Gaussian core charge parameter rc. -1. -# Number of potentials -3 -# l-value for state which is local -2 0 -# Local potential radius -3.2 -# Non-local potential radius -3.2 -# number of points in radial grid -401 -# log mesh parameter -0. -# radial grid, potential, and reference state for l=0 -0.00 0.565792564331E+00 0.135186864319E+00 -0.01 0.565791454518E+00 0.135189278893E+00 -0.02 0.565791845509E+00 0.135197551572E+00 -0.03 0.565791763062E+00 0.135211380681E+00 -0.04 0.565790306539E+00 0.135230751862E+00 -0.05 0.565786677278E+00 0.135255663484E+00 -0.06 0.565780127388E+00 0.135286116357E+00 -0.07 0.565769959504E+00 0.135322112269E+00 -0.08 0.565755529963E+00 0.135363653613E+00 -0.09 0.565736250559E+00 0.135410743262E+00 -0.10 0.565711588291E+00 0.135463384514E+00 -0.11 0.565681063234E+00 0.135521581061E+00 -0.12 0.565644244804E+00 0.135585336970E+00 -0.13 0.565600746712E+00 0.135654656666E+00 -0.14 0.565550220900E+00 0.135729544918E+00 -0.15 0.565492350674E+00 0.135810006826E+00 -0.16 0.565426843285E+00 0.135896047804E+00 -0.17 0.565353422123E+00 0.135987673570E+00 -0.18 0.565271818696E+00 0.136084890124E+00 -0.19 0.565181764520E+00 0.136187703737E+00 -0.20 0.565082983059E+00 0.136296120931E+00 -0.21 0.564975181781E+00 0.136410148461E+00 -0.22 0.564858044434E+00 0.136529793298E+00 -0.23 0.564731223610E+00 0.136655062607E+00 -0.24 0.564594333622E+00 0.136785963726E+00 -0.25 0.564446943740E+00 0.136922504144E+00 -0.26 0.564288571936E+00 0.137064691479E+00 -0.27 0.564118678928E+00 0.137212533455E+00 -0.28 0.563936662662E+00 0.137366037873E+00 -0.29 0.563741853541E+00 0.137525212589E+00 -0.30 0.563533509606E+00 0.137690065485E+00 -0.31 0.563310812655E+00 0.137860604445E+00 -0.32 0.563072864807E+00 0.138036837317E+00 -0.33 0.562818684862E+00 0.138218771894E+00 -0.34 0.562547206034E+00 0.138406415876E+00 -0.35 0.562257273690E+00 0.138599776840E+00 -0.36 0.561947643382E+00 0.138798862207E+00 -0.37 0.561616979300E+00 0.139003679209E+00 -0.38 0.561263853377E+00 0.139214234852E+00 -0.39 0.560886744621E+00 0.139430535882E+00 -0.40 0.560484038770E+00 0.139652588747E+00 -0.41 0.560054028277E+00 0.139880399559E+00 -0.42 0.559594912621E+00 0.140113974057E+00 -0.43 0.559104798899E+00 0.140353317563E+00 -0.44 0.558581702687E+00 0.140598434946E+00 -0.45 0.558023549151E+00 0.140849330574E+00 -0.46 0.557428174385E+00 0.141106008280E+00 -0.47 0.556793326974E+00 0.141368471307E+00 -0.48 0.556116669765E+00 0.141636722275E+00 -0.49 0.555395781616E+00 0.141910763126E+00 -0.50 0.554628159196E+00 0.142190595081E+00 -0.51 0.553811219296E+00 0.142476218590E+00 -0.52 0.552942301215E+00 0.142767633284E+00 -0.53 0.552018669332E+00 0.143064837933E+00 -0.54 0.551037515446E+00 0.143367830394E+00 -0.55 0.549995959902E+00 0.143676607543E+00 -0.56 0.548891055043E+00 0.143991165230E+00 -0.57 0.547719788628E+00 0.144311498246E+00 -0.58 0.546479086011E+00 0.144637600267E+00 -0.59 0.545165810161E+00 0.144969463751E+00 -0.60 0.543776766801E+00 0.145307079924E+00 -0.61 0.542308708013E+00 0.145650438766E+00 -0.62 0.540758330765E+00 0.145999528858E+00 -0.63 0.539122281362E+00 0.146354337355E+00 -0.64 0.537397160708E+00 0.146714850032E+00 -0.65 0.535579521189E+00 0.147081051057E+00 -0.66 0.533665872432E+00 0.147452923007E+00 -0.67 0.531652685710E+00 0.147830446936E+00 -0.68 0.529536389730E+00 0.148213602015E+00 -0.69 0.527313380424E+00 0.148602365785E+00 -0.70 0.524980019018E+00 0.148996713944E+00 -0.71 0.522532634142E+00 0.149396620169E+00 -0.72 0.519967529235E+00 0.149802056452E+00 -0.73 0.517280977558E+00 0.150212992433E+00 -0.74 0.514469233415E+00 0.150629396008E+00 -0.75 0.511528527844E+00 0.151051232676E+00 -0.76 0.508455077338E+00 0.151478465879E+00 -0.77 0.505245083810E+00 0.151911056790E+00 -0.78 0.501894740707E+00 0.152348964123E+00 -0.79 0.498400235520E+00 0.152792144466E+00 -0.80 0.494757756833E+00 0.153240551576E+00 -0.81 0.490963495756E+00 0.153694137184E+00 -0.82 0.487013658197E+00 0.154152849868E+00 -0.83 0.482904461604E+00 0.154616636225E+00 -0.84 0.478632156517E+00 0.155085439443E+00 -0.85 0.474193015478E+00 0.155559200707E+00 -0.86 0.469583367112E+00 0.156037857623E+00 -0.87 0.464799575834E+00 0.156521345662E+00 -0.88 0.459838088204E+00 0.157009596699E+00 -0.89 0.454695408356E+00 0.157502540157E+00 -0.90 0.449368146101E+00 0.158000102088E+00 -0.91 0.443853007998E+00 0.158502205487E+00 -0.92 0.438146816772E+00 0.159008770539E+00 -0.93 0.432246560465E+00 0.159519713461E+00 -0.94 0.426149332525E+00 0.160034948440E+00 -0.95 0.419852488361E+00 0.160554384787E+00 -0.96 0.413353478708E+00 0.161077930227E+00 -0.97 0.406650093670E+00 0.161605487462E+00 -0.98 0.399740278545E+00 0.162136956937E+00 -0.99 0.392622296566E+00 0.162672235269E+00 -1.00 0.385294753890E+00 0.163211214957E+00 -1.01 0.377756446910E+00 0.163753786565E+00 -1.02 0.370006748510E+00 0.164299834901E+00 -1.03 0.362045188428E+00 0.164849243263E+00 -1.04 0.353871866418E+00 0.165401890109E+00 -1.05 0.345487329269E+00 0.165957650099E+00 -1.06 0.336892389378E+00 0.166516395913E+00 -1.07 0.328088698274E+00 0.167077994011E+00 -1.08 0.319078106137E+00 0.167642309399E+00 -1.09 0.309863131855E+00 0.168209202623E+00 -1.10 0.300447014480E+00 0.168778529429E+00 -1.11 0.290833172522E+00 0.169350144431E+00 -1.12 0.281026035523E+00 0.169923895960E+00 -1.13 0.271030466904E+00 0.170499629654E+00 -1.14 0.260851691962E+00 0.171077188959E+00 -1.15 0.250496025034E+00 0.171656410739E+00 -1.16 0.239969993102E+00 0.172237130569E+00 -1.17 0.229280689189E+00 0.172819180503E+00 -1.18 0.218436152166E+00 0.173402386705E+00 -1.19 0.207444532175E+00 0.173986574636E+00 -1.20 0.196314615787E+00 0.174571565342E+00 -1.21 0.185055919212E+00 0.175157174817E+00 -1.22 0.173678024343E+00 0.175743218552E+00 -1.23 0.162191051234E+00 0.176329507323E+00 -1.24 0.150605631539E+00 0.176915847422E+00 -1.25 0.138932431680E+00 0.177502044754E+00 -1.26 0.127182423849E+00 0.178087900753E+00 -1.27 0.115366871347E+00 0.178673212598E+00 -1.28 0.103497049682E+00 0.179257777174E+00 -1.29 0.915842538990E-01 0.179841387652E+00 -1.30 0.796398016438E-01 0.180423832900E+00 -1.31 0.676750093549E-01 0.181004901516E+00 -1.32 0.557009920576E-01 0.181584379560E+00 -1.33 0.437285173400E-01 0.182162048711E+00 -1.34 0.317683183110E-01 0.182737690216E+00 -1.35 0.198308852390E-01 0.183311084098E+00 -1.36 0.792590861738E-02 0.183882006356E+00 -1.37 -0.393708546836E-02 0.184450232159E+00 -1.38 -0.157486995633E-01 0.185015536359E+00 -1.39 -0.275006304569E-01 0.185577691084E+00 -1.40 -0.391851278030E-01 0.186136467090E+00 -1.41 -0.507944647328E-01 0.186691635103E+00 -1.42 -0.623218306353E-01 0.187242964906E+00 -1.43 -0.737617135724E-01 0.187790224949E+00 -1.44 -0.851087002317E-01 0.188333183579E+00 -1.45 -0.963577282132E-01 0.188871609148E+00 -1.46 -0.107505672167E+00 0.189405270031E+00 -1.47 -0.118550071471E+00 0.189933934611E+00 -1.48 -0.129488490830E+00 0.190457371286E+00 -1.49 -0.140319972566E+00 0.190975349528E+00 -1.50 -0.151045845842E+00 0.191487640461E+00 -1.51 -0.161667636136E+00 0.191994015352E+00 -1.52 -0.172187159032E+00 0.192494245800E+00 -1.53 -0.182609370427E+00 0.192988106996E+00 -1.54 -0.192941147155E+00 0.193475376329E+00 -1.55 -0.203189393657E+00 0.193955831219E+00 -1.56 -0.213362239971E+00 0.194429250486E+00 -1.57 -0.223473528480E+00 0.194895419469E+00 -1.58 -0.233538764526E+00 0.195354125410E+00 -1.59 -0.243573455806E+00 0.195805155550E+00 -1.60 -0.253594887442E+00 0.196248300351E+00 -1.61 -0.263625336286E+00 0.196683359312E+00 -1.62 -0.273687948990E+00 0.197110133506E+00 -1.63 -0.283805890193E+00 0.197528424027E+00 -1.64 -0.294006393132E+00 0.197938037034E+00 -1.65 -0.304325896966E+00 0.198338790136E+00 -1.66 -0.314802106618E+00 0.198730502522E+00 -1.67 -0.325472685410E+00 0.199112993395E+00 -1.68 -0.336362632216E+00 0.199486087955E+00 -1.69 -0.347466516274E+00 0.199849625803E+00 -1.70 -0.358774430088E+00 0.200203448658E+00 -1.71 -0.370276466935E+00 0.200547398238E+00 -1.72 -0.381968774596E+00 0.200881321501E+00 -1.73 -0.393868162029E+00 0.201205083278E+00 -1.74 -0.405995841654E+00 0.201518552209E+00 -1.75 -0.418373025891E+00 0.201821596933E+00 -1.76 -0.430971618014E+00 0.202114089057E+00 -1.77 -0.443433002131E+00 0.202395920073E+00 -1.78 -0.455262722590E+00 0.202666989649E+00 -1.79 -0.465965906466E+00 0.202927197475E+00 -1.80 -0.475063781985E+00 0.203176443982E+00 -1.81 -0.482440981673E+00 0.203414646287E+00 -1.82 -0.488341737783E+00 0.203641738020E+00 -1.83 -0.493025622782E+00 0.203857653514E+00 -1.84 -0.496752196125E+00 0.204062327106E+00 -1.85 -0.499735487640E+00 0.204255700760E+00 -1.86 -0.502041964605E+00 0.204437741166E+00 -1.87 -0.503708252819E+00 0.204608420015E+00 -1.88 -0.504770978080E+00 0.204767708995E+00 -1.89 -0.505268020163E+00 0.204915580746E+00 -1.90 -0.505262699874E+00 0.205052027155E+00 -1.91 -0.504842022132E+00 0.205177058030E+00 -1.92 -0.504093893884E+00 0.205290683859E+00 -1.93 -0.503106222076E+00 0.205392915133E+00 -1.94 -0.501959562238E+00 0.205483766602E+00 -1.95 -0.500691161675E+00 0.205563278114E+00 -1.96 -0.499322462220E+00 0.205631498679E+00 -1.97 -0.497874882550E+00 0.205688477319E+00 -1.98 -0.496369837873E+00 0.205734263063E+00 -1.99 -0.494824369849E+00 0.205768912960E+00 -2.00 -0.493242596565E+00 0.205792507752E+00 -2.01 -0.491626260514E+00 0.205805132539E+00 -2.02 -0.489977104191E+00 0.205806872422E+00 -2.03 -0.488296862439E+00 0.205797812550E+00 -2.04 -0.486585900698E+00 0.205778047088E+00 -2.05 -0.484841644787E+00 0.205747689552E+00 -2.06 -0.483061140110E+00 0.205706855964E+00 -2.07 -0.481241432069E+00 0.205655662344E+00 -2.08 -0.479379573812E+00 0.205594224744E+00 -2.09 -0.477474160448E+00 0.205522665662E+00 -2.10 -0.475527179642E+00 0.205441121769E+00 -2.11 -0.473541070780E+00 0.205349731625E+00 -2.12 -0.471518273251E+00 0.205248633790E+00 -2.13 -0.469461226893E+00 0.205137966826E+00 -2.14 -0.467373835494E+00 0.205017871416E+00 -2.15 -0.465264719927E+00 0.204888495095E+00 -2.16 -0.463143449330E+00 0.204749986773E+00 -2.17 -0.461019592836E+00 0.204602495359E+00 -2.18 -0.458902719582E+00 0.204446169763E+00 -2.19 -0.456801544181E+00 0.204281158571E+00 -2.20 -0.454718725723E+00 0.204107608072E+00 -2.21 -0.452654307658E+00 0.203925663565E+00 -2.22 -0.450608323068E+00 0.203735470343E+00 -2.23 -0.448580805037E+00 0.203537173700E+00 -2.24 -0.446571782181E+00 0.203330918796E+00 -2.25 -0.444581131791E+00 0.203116846265E+00 -2.26 -0.442608546555E+00 0.202895091216E+00 -2.27 -0.440653707955E+00 0.202665788424E+00 -2.28 -0.438716297476E+00 0.202429072664E+00 -2.29 -0.436795996602E+00 0.202185078711E+00 -2.30 -0.434892501747E+00 0.201933940230E+00 -2.31 -0.433005589696E+00 0.201675784912E+00 -2.32 -0.431135064151E+00 0.201410738448E+00 -2.33 -0.429280728838E+00 0.201138926526E+00 -2.34 -0.427442387480E+00 0.200860474835E+00 -2.35 -0.425619843791E+00 0.200575509039E+00 -2.36 -0.423812900243E+00 0.200284151693E+00 -2.37 -0.422021356849E+00 0.199986519187E+00 -2.38 -0.420245013334E+00 0.199682727186E+00 -2.39 -0.418483669421E+00 0.199372891356E+00 -2.40 -0.416737124834E+00 0.199057127363E+00 -2.41 -0.415005179673E+00 0.198735550730E+00 -2.42 -0.413287645439E+00 0.198408272685E+00 -2.43 -0.411584346817E+00 0.198075399487E+00 -2.44 -0.409895109227E+00 0.197737037116E+00 -2.45 -0.408219758089E+00 0.197393291555E+00 -2.46 -0.406558118823E+00 0.197044268786E+00 -2.47 -0.404910017175E+00 0.196690074596E+00 -2.48 -0.403275286321E+00 0.196330810374E+00 -2.49 -0.401653766815E+00 0.195966573139E+00 -2.50 -0.400045299528E+00 0.195597459721E+00 -2.51 -0.398449725331E+00 0.195223566953E+00 -2.52 -0.396866885095E+00 0.194844991664E+00 -2.53 -0.395296619878E+00 0.194461830587E+00 -2.54 -0.393738777444E+00 0.194074176827E+00 -2.55 -0.392193213951E+00 0.193682118959E+00 -2.56 -0.390659786087E+00 0.193285745271E+00 -2.57 -0.389138350540E+00 0.192885144050E+00 -2.58 -0.387628763998E+00 0.192480403585E+00 -2.59 -0.386130883161E+00 0.192071612156E+00 -2.60 -0.384644568387E+00 0.191658855933E+00 -2.61 -0.383169688783E+00 0.191242216039E+00 -2.62 -0.381706114736E+00 0.190821772859E+00 -2.63 -0.380253716635E+00 0.190397606776E+00 -2.64 -0.378812364866E+00 0.189969798175E+00 -2.65 -0.377381929818E+00 0.189538427440E+00 -2.66 -0.375962282872E+00 0.189103574373E+00 -2.67 -0.374553303182E+00 0.188665314231E+00 -2.68 -0.373154873564E+00 0.188223720127E+00 -2.69 -0.371766876854E+00 0.187778865164E+00 -2.70 -0.370389195889E+00 0.187330822443E+00 -2.71 -0.369021713505E+00 0.186879665066E+00 -2.72 -0.367664312563E+00 0.186425466121E+00 -2.73 -0.366316879306E+00 0.185968296666E+00 -2.74 -0.364979306898E+00 0.185508223615E+00 -2.75 -0.363651489347E+00 0.185045313373E+00 -2.76 -0.362333320662E+00 0.184579632343E+00 -2.77 -0.361024694850E+00 0.184111246933E+00 -2.78 -0.359725505921E+00 0.183640223546E+00 -2.79 -0.358435648116E+00 0.183166628446E+00 -2.80 -0.357155020623E+00 0.182690524873E+00 -2.81 -0.355883527338E+00 0.182211973191E+00 -2.82 -0.354621072343E+00 0.181731033651E+00 -2.83 -0.353367559722E+00 0.181247766503E+00 -2.84 -0.352122893557E+00 0.180762231996E+00 -2.85 -0.350886977931E+00 0.180274490380E+00 -2.86 -0.349659717323E+00 0.179784601659E+00 -2.87 -0.348441021306E+00 0.179292622661E+00 -2.88 -0.347230802975E+00 0.178798608020E+00 -2.89 -0.346028975496E+00 0.178302612328E+00 -2.90 -0.344835452034E+00 0.177804690173E+00 -2.91 -0.343650145755E+00 0.177304896145E+00 -2.92 -0.342472969825E+00 0.176803284836E+00 -2.93 -0.341303837694E+00 0.176299910654E+00 -2.94 -0.340142667189E+00 0.175794825228E+00 -2.95 -0.338989379581E+00 0.175288078000E+00 -2.96 -0.337843896235E+00 0.174779718349E+00 -2.97 -0.336706138515E+00 0.174269795659E+00 -2.98 -0.335576027785E+00 0.173758359311E+00 -2.99 -0.334453485410E+00 0.173245458687E+00 -3.00 -0.333338432816E+00 0.172731143128E+00 -3.01 -0.332230794350E+00 0.172215460083E+00 -3.02 -0.331130498471E+00 0.171698454339E+00 -3.03 -0.330037473950E+00 0.171180170481E+00 -3.04 -0.328951649553E+00 0.170660653095E+00 -3.05 -0.327872954052E+00 0.170139946765E+00 -3.06 -0.326801316215E+00 0.169618096076E+00 -3.07 -0.325736664810E+00 0.169095145616E+00 -3.08 -0.324678929621E+00 0.168571139299E+00 -3.09 -0.323628044884E+00 0.168046118099E+00 -3.10 -0.322583946062E+00 0.167520122183E+00 -3.11 -0.321546568616E+00 0.166993191715E+00 -3.12 -0.320515848007E+00 0.166465366863E+00 -3.13 -0.319491719697E+00 0.165936687792E+00 -3.14 -0.318474119147E+00 0.165407194667E+00 -3.15 -0.317462981877E+00 0.164876927617E+00 -3.16 -0.316458245854E+00 0.164345925118E+00 -3.17 -0.315459852357E+00 0.163814223423E+00 -3.18 -0.314467742901E+00 0.163281858622E+00 -3.19 -0.313481858997E+00 0.162748866810E+00 -3.20 -0.312502142161E+00 0.162215284076E+00 -3.21 -0.311528533905E+00 0.161681146515E+00 -3.22 -0.310560975743E+00 0.161146490217E+00 -3.23 -0.309599409400E+00 0.160611351132E+00 -3.24 -0.308643779592E+00 0.160075763154E+00 -3.25 -0.307694033256E+00 0.159539758657E+00 -3.26 -0.306750117382E+00 0.159003369978E+00 -3.27 -0.305811978958E+00 0.158466629455E+00 -3.28 -0.304879564973E+00 0.157929569425E+00 -3.29 -0.303952822418E+00 0.157392222226E+00 -3.30 -0.303031698282E+00 0.156854620193E+00 -3.31 -0.302116139781E+00 0.156316795507E+00 -3.32 -0.301206096938E+00 0.155778778385E+00 -3.33 -0.300301521664E+00 0.155240597728E+00 -3.34 -0.299402365905E+00 0.154702282413E+00 -3.35 -0.298508581607E+00 0.154163861316E+00 -3.36 -0.297620120714E+00 0.153625363312E+00 -3.37 -0.296736935173E+00 0.153086817278E+00 -3.38 -0.295858976930E+00 0.152548252090E+00 -3.39 -0.294986198018E+00 0.152009696560E+00 -3.40 -0.294118552598E+00 0.151471177992E+00 -3.41 -0.293255997006E+00 0.150932722140E+00 -3.42 -0.292398487674E+00 0.150394354689E+00 -3.43 -0.291545981037E+00 0.149856101326E+00 -3.44 -0.290698433528E+00 0.149317987736E+00 -3.45 -0.289855801582E+00 0.148780039604E+00 -3.46 -0.289018041630E+00 0.148242282617E+00 -3.47 -0.288185110110E+00 0.147704742457E+00 -3.48 -0.287356964427E+00 0.147167444108E+00 -3.49 -0.286533564634E+00 0.146630410633E+00 -3.50 -0.285714871231E+00 0.146093664778E+00 -3.51 -0.284900844716E+00 0.145557229285E+00 -3.52 -0.284091445587E+00 0.145021126897E+00 -3.53 -0.283286634344E+00 0.144485380359E+00 -3.54 -0.282486371485E+00 0.143950012413E+00 -3.55 -0.281690617509E+00 0.143415045802E+00 -3.56 -0.280899333023E+00 0.142880503190E+00 -3.57 -0.280112480541E+00 0.142346405835E+00 -3.58 -0.279330024193E+00 0.141812773802E+00 -3.59 -0.278551928160E+00 0.141279627120E+00 -3.60 -0.277778156624E+00 0.140746985815E+00 -3.61 -0.277008673767E+00 0.140214869917E+00 -3.62 -0.276243443770E+00 0.139683299452E+00 -3.63 -0.275482430816E+00 0.139152294447E+00 -3.64 -0.274725599086E+00 0.138621874932E+00 -3.65 -0.273972913029E+00 0.138092060732E+00 -3.66 -0.273224339180E+00 0.137562870106E+00 -3.67 -0.272479845051E+00 0.137034320576E+00 -3.68 -0.271739398163E+00 0.136506429658E+00 -3.69 -0.271002966034E+00 0.135979214870E+00 -3.70 -0.270270516185E+00 0.135452693728E+00 -3.71 -0.269542016133E+00 0.134926883751E+00 -3.72 -0.268817433400E+00 0.134401802456E+00 -3.73 -0.268096735504E+00 0.133877467359E+00 -3.74 -0.267379890210E+00 0.133353895789E+00 -3.75 -0.266666867178E+00 0.132831103617E+00 -3.76 -0.265957636947E+00 0.132309106040E+00 -3.77 -0.265252170062E+00 0.131787918247E+00 -3.78 -0.264550437068E+00 0.131267555430E+00 -3.79 -0.263852408508E+00 0.130748032780E+00 -3.80 -0.263158054928E+00 0.130229365489E+00 -3.81 -0.262467346872E+00 0.129711568747E+00 -3.82 -0.261780254885E+00 0.129194657745E+00 -3.83 -0.261096749594E+00 0.128678647609E+00 -3.84 -0.260416803058E+00 0.128163552338E+00 -3.85 -0.259740388525E+00 0.127649384992E+00 -3.86 -0.259067479284E+00 0.127136158600E+00 -3.87 -0.258398048620E+00 0.126623886192E+00 -3.88 -0.257732069821E+00 0.126112580800E+00 -3.89 -0.257069516173E+00 0.125602255452E+00 -3.90 -0.256410360964E+00 0.125092923179E+00 -3.91 -0.255754577480E+00 0.124584597011E+00 -3.92 -0.255102139010E+00 0.124077289978E+00 -3.93 -0.254453019423E+00 0.123571014637E+00 -3.94 -0.253807194213E+00 0.123065782233E+00 -3.95 -0.253164639154E+00 0.122561603780E+00 -3.96 -0.252525330019E+00 0.122058490298E+00 -3.97 -0.251889242580E+00 0.121556452803E+00 -3.98 -0.251256352611E+00 0.121055502313E+00 -3.99 -0.250626635885E+00 0.120555649843E+00 -4.00 -0.250000068176E+00 0.120056906411E+00 -# l=1 -0.00 -0.199396910523E+01 0.161556966777E-02 -0.01 -0.199397020146E+01 0.775445182415E-02 -0.02 -0.199396960647E+01 0.155070355277E-01 -0.03 -0.199396880528E+01 0.232558780395E-01 -0.04 -0.199396788390E+01 0.309991092173E-01 -0.05 -0.199396650225E+01 0.387348606998E-01 -0.06 -0.199396394707E+01 0.464612663670E-01 -0.07 -0.199395913436E+01 0.541764628573E-01 -0.08 -0.199395060889E+01 0.618785901110E-01 -0.09 -0.199393654565E+01 0.695657919318E-01 -0.10 -0.199391475371E+01 0.772362165607E-01 -0.11 -0.199388268249E+01 0.848880172637E-01 -0.12 -0.199383743011E+01 0.925193529328E-01 -0.13 -0.199377575352E+01 0.100128388700E+00 -0.14 -0.199369408000E+01 0.107713296571E+00 -0.15 -0.199358852012E+01 0.115272256065E+00 -0.16 -0.199345488154E+01 0.122803454885E+00 -0.17 -0.199328868373E+01 0.130305089590E+00 -0.18 -0.199308517320E+01 0.137775366304E+00 -0.19 -0.199283933949E+01 0.145212501424E+00 -0.20 -0.199254593134E+01 0.152614722362E+00 -0.21 -0.199219947317E+01 0.159980268301E+00 -0.22 -0.199179428182E+01 0.167307390972E+00 -0.23 -0.199132448336E+01 0.174594355449E+00 -0.24 -0.199078402990E+01 0.181839440973E+00 -0.25 -0.199016671646E+01 0.189040941790E+00 -0.26 -0.198946619790E+01 0.196197167994E+00 -0.27 -0.198867600548E+01 0.203306446418E+00 -0.28 -0.198778956345E+01 0.210367121553E+00 -0.29 -0.198680020574E+01 0.217377556401E+00 -0.30 -0.198570119197E+01 0.224336133490E+00 -0.31 -0.198448572374E+01 0.231241255780E+00 -0.32 -0.198314696062E+01 0.238091347585E+00 -0.33 -0.198167803570E+01 0.244884855696E+00 -0.34 -0.198007207131E+01 0.251620250260E+00 -0.35 -0.197832219425E+01 0.258296025794E+00 -0.36 -0.197642155090E+01 0.264910702238E+00 -0.37 -0.197436332220E+01 0.271462826027E+00 -0.38 -0.197214073826E+01 0.277950971084E+00 -0.39 -0.196974709290E+01 0.284373739869E+00 -0.40 -0.196717575780E+01 0.290729764430E+00 -0.41 -0.196442019662E+01 0.297017707467E+00 -0.42 -0.196147397879E+01 0.303236263381E+00 -0.43 -0.195833079314E+01 0.309384159336E+00 -0.44 -0.195498446129E+01 0.315460156303E+00 -0.45 -0.195142895085E+01 0.321463050107E+00 -0.46 -0.194765838835E+01 0.327391672453E+00 -0.47 -0.194366707193E+01 0.333244891941E+00 -0.48 -0.193944948369E+01 0.339021615038E+00 -0.49 -0.193500030219E+01 0.344720787118E+00 -0.50 -0.193031441521E+01 0.350341393559E+00 -0.51 -0.192538693137E+01 0.355882460626E+00 -0.52 -0.192021319136E+01 0.361343056332E+00 -0.53 -0.191478877826E+01 0.366722291166E+00 -0.54 -0.190910952830E+01 0.372019318922E+00 -0.55 -0.190317154700E+01 0.377233338412E+00 -0.56 -0.189697121724E+01 0.382363593773E+00 -0.57 -0.189050520500E+01 0.387409374539E+00 -0.58 -0.188377046931E+01 0.392370016395E+00 -0.59 -0.187676428580E+01 0.397244903961E+00 -0.60 -0.186948424447E+01 0.402033469561E+00 -0.61 -0.186192824914E+01 0.406735192459E+00 -0.62 -0.185409455027E+01 0.411349602971E+00 -0.63 -0.184598174535E+01 0.415876281622E+00 -0.64 -0.183758876224E+01 0.420314856160E+00 -0.65 -0.182891490833E+01 0.424665007830E+00 -0.66 -0.181995985754E+01 0.428926468681E+00 -0.67 -0.181072363066E+01 0.433099018381E+00 -0.68 -0.180120666873E+01 0.437182493449E+00 -0.69 -0.179140976784E+01 0.441176777749E+00 -0.70 -0.178133412087E+01 0.445081807632E+00 -0.71 -0.177098134335E+01 0.448897574436E+00 -0.72 -0.176035339895E+01 0.452624114457E+00 -0.73 -0.174945272362E+01 0.456261524190E+00 -0.74 -0.173828209531E+01 0.459809943221E+00 -0.75 -0.172684475409E+01 0.463269568952E+00 -0.76 -0.171514432153E+01 0.466640645863E+00 -0.77 -0.170318483710E+01 0.469923469825E+00 -0.78 -0.169097077176E+01 0.473118389105E+00 -0.79 -0.167850696631E+01 0.476225796780E+00 -0.80 -0.166579873072E+01 0.479246142230E+00 -0.81 -0.165285170541E+01 0.482179914441E+00 -0.82 -0.163967201851E+01 0.485027660551E+00 -0.83 -0.162626609932E+01 0.487789963618E+00 -0.84 -0.161264086369E+01 0.490467464757E+00 -0.85 -0.159880350863E+01 0.493060838713E+00 -0.86 -0.158476169516E+01 0.495570816242E+00 -0.87 -0.157052335695E+01 0.497998161131E+00 -0.88 -0.155609684261E+01 0.500343688639E+00 -0.89 -0.154149078077E+01 0.502608248877E+00 -0.90 -0.152671413504E+01 0.504792735689E+00 -0.91 -0.151177617157E+01 0.506898082503E+00 -0.92 -0.149668638735E+01 0.508925255651E+00 -0.93 -0.148145460197E+01 0.510875267637E+00 -0.94 -0.146609076644E+01 0.512749152416E+00 -0.95 -0.145060513209E+01 0.514547994413E+00 -0.96 -0.143500803218E+01 0.516272893547E+00 -0.97 -0.141931000002E+01 0.517924995303E+00 -0.98 -0.140352165567E+01 0.519505465520E+00 -0.99 -0.138765368171E+01 0.521015499950E+00 -1.00 -0.137171682784E+01 0.522456326589E+00 -1.01 -0.135572183211E+01 0.523829186432E+00 -1.02 -0.133967938259E+01 0.525135358413E+00 -1.03 -0.132360014615E+01 0.526376131045E+00 -1.04 -0.130749462328E+01 0.527552818877E+00 -1.05 -0.129137318396E+01 0.528666756881E+00 -1.06 -0.127524608249E+01 0.529719288442E+00 -1.07 -0.125912320203E+01 0.530711782976E+00 -1.08 -0.124301433504E+01 0.531645615780E+00 -1.09 -0.122692890393E+01 0.532522175531E+00 -1.10 -0.121087593259E+01 0.533342865316E+00 -1.11 -0.119486433371E+01 0.534109090559E+00 -1.12 -0.117890237696E+01 0.534822268114E+00 -1.13 -0.116299805607E+01 0.535483819740E+00 -1.14 -0.114715911229E+01 0.536095168582E+00 -1.15 -0.113139252677E+01 0.536657741919E+00 -1.16 -0.111570513188E+01 0.537172967781E+00 -1.17 -0.110010333816E+01 0.537642272106E+00 -1.18 -0.108459285123E+01 0.538067077333E+00 -1.19 -0.106917929299E+01 0.538448805292E+00 -1.20 -0.105386778682E+01 0.538788869531E+00 -1.21 -0.103866288459E+01 0.539088674102E+00 -1.22 -0.102356908576E+01 0.539349621806E+00 -1.23 -0.100859044861E+01 0.539573100004E+00 -1.24 -0.993730611991E+00 0.539760481388E+00 -1.25 -0.978993185348E+00 0.539913137038E+00 -1.26 -0.964381508513E+00 0.540032416287E+00 -1.27 -0.949898664625E+00 0.540119647767E+00 -1.28 -0.935547725070E+00 0.540176158455E+00 -1.29 -0.921331713931E+00 0.540203250042E+00 -1.30 -0.907253600564E+00 0.540202194960E+00 -1.31 -0.893316352845E+00 0.540174263722E+00 -1.32 -0.879523067054E+00 0.540120703001E+00 -1.33 -0.865877060969E+00 0.540042718355E+00 -1.34 -0.852381674353E+00 0.539941511267E+00 -1.35 -0.839040414012E+00 0.539818266920E+00 -1.36 -0.825857336897E+00 0.539674116833E+00 -1.37 -0.812836613054E+00 0.539510181490E+00 -1.38 -0.799982500876E+00 0.539327574741E+00 -1.39 -0.787300010343E+00 0.539127353999E+00 -1.40 -0.774794531171E+00 0.538910548161E+00 -1.41 -0.762471468991E+00 0.538678185071E+00 -1.42 -0.750336848795E+00 0.538431252708E+00 -1.43 -0.738397573437E+00 0.538170682558E+00 -1.44 -0.726660612437E+00 0.537897401812E+00 -1.45 -0.715133170526E+00 0.537612324623E+00 -1.46 -0.703823750797E+00 0.537316293152E+00 -1.47 -0.692741301895E+00 0.537010124859E+00 -1.48 -0.681894790076E+00 0.536694636456E+00 -1.49 -0.671294209853E+00 0.536370601513E+00 -1.50 -0.660951147129E+00 0.536038726824E+00 -1.51 -0.650877324419E+00 0.535699713452E+00 -1.52 -0.641084675033E+00 0.535354255938E+00 -1.53 -0.631587414063E+00 0.535002978224E+00 -1.54 -0.622401152369E+00 0.534646461066E+00 -1.55 -0.613541520876E+00 0.534285284598E+00 -1.56 -0.605025127151E+00 0.533920011360E+00 -1.57 -0.596873130790E+00 0.533551121884E+00 -1.58 -0.589108016559E+00 0.533179072829E+00 -1.59 -0.581752270965E+00 0.532804320813E+00 -1.60 -0.574829684327E+00 0.532427294001E+00 -1.61 -0.568367708612E+00 0.532048340644E+00 -1.62 -0.562394433817E+00 0.531667795063E+00 -1.63 -0.556937965053E+00 0.531285991434E+00 -1.64 -0.552029825754E+00 0.530903229834E+00 -1.65 -0.547709273908E+00 0.530519733195E+00 -1.66 -0.544016630963E+00 0.530135713842E+00 -1.67 -0.540992174493E+00 0.529751384009E+00 -1.68 -0.538662825517E+00 0.529366928122E+00 -1.69 -0.537023412450E+00 0.528982463790E+00 -1.70 -0.536064042318E+00 0.528598098794E+00 -1.71 -0.535774822856E+00 0.528213940913E+00 -1.72 -0.536151364706E+00 0.527830072080E+00 -1.73 -0.537208057373E+00 0.527446486041E+00 -1.74 -0.538963292648E+00 0.527063157749E+00 -1.75 -0.541435462322E+00 0.526680062154E+00 -1.76 -0.544593363494E+00 0.526297176205E+00 -1.77 -0.548073360083E+00 0.525914490233E+00 -1.78 -0.551375189607E+00 0.525532000072E+00 -1.79 -0.553998169894E+00 0.525149701572E+00 -1.80 -0.555457655643E+00 0.524767580730E+00 -1.81 -0.555630955028E+00 0.524385401135E+00 -1.82 -0.554753540315E+00 0.524002706297E+00 -1.83 -0.553076162742E+00 0.523619030338E+00 -1.84 -0.550849560367E+00 0.523233907458E+00 -1.85 -0.548278361802E+00 0.522847150465E+00 -1.86 -0.545417753899E+00 0.522459475125E+00 -1.87 -0.542292702000E+00 0.522071779811E+00 -1.88 -0.538928171446E+00 0.521684962897E+00 -1.89 -0.535350323025E+00 0.521299798066E+00 -1.90 -0.531609571092E+00 0.520914529285E+00 -1.91 -0.527778908656E+00 0.520525045499E+00 -1.92 -0.523932188652E+00 0.520127145960E+00 -1.93 -0.520143264012E+00 0.519716629920E+00 -1.94 -0.516478462844E+00 0.519291317612E+00 -1.95 -0.512959783423E+00 0.518860935145E+00 -1.96 -0.509593045721E+00 0.518439553716E+00 -1.97 -0.506384046004E+00 0.518041250886E+00 -1.98 -0.503338577018E+00 0.517680092631E+00 -1.99 -0.500457991743E+00 0.517355538615E+00 -2.00 -0.497730523907E+00 0.517023887711E+00 -2.01 -0.495141995677E+00 0.516633505042E+00 -2.02 -0.492678229224E+00 0.516132755734E+00 -2.03 -0.490325041821E+00 0.515470390391E+00 -2.04 -0.488067375131E+00 0.514664146180E+00 -2.05 -0.485888291177E+00 0.513879849890E+00 -2.06 -0.483770608742E+00 0.513302492540E+00 -2.07 -0.481697146606E+00 0.513117065151E+00 -2.08 -0.479650737031E+00 0.513507336726E+00 -2.09 -0.477616895724E+00 0.514413755885E+00 -2.10 -0.475587042409E+00 0.515241428106E+00 -2.11 -0.473553382938E+00 0.515324177280E+00 -2.12 -0.471508123160E+00 0.513995827296E+00 -2.13 -0.469443469933E+00 0.510590396231E+00 -2.14 -0.467354891330E+00 0.505070826417E+00 -2.15 -0.465248363637E+00 0.499426564744E+00 -2.16 -0.463131975565E+00 0.496054438499E+00 -2.17 -0.461013815826E+00 0.497351274970E+00 -2.18 -0.458901973131E+00 0.505713901448E+00 -2.19 -0.456803865289E+00 0.522551545249E+00 -2.20 -0.454722155776E+00 0.542274836853E+00 -2.21 -0.452657454474E+00 0.556271417976E+00 -2.22 -0.450610363122E+00 0.555916946725E+00 -2.23 -0.448581483460E+00 0.532587081205E+00 -2.24 -0.446571403680E+00 0.477933839130E+00 -2.25 -0.444580252676E+00 0.392976479119E+00 -2.26 -0.442607599052E+00 0.290161422435E+00 -2.27 -0.440652977408E+00 0.182628543819E+00 -2.28 -0.438715922345E+00 0.835177180131E-01 -2.29 -0.436795968465E+00 0.596881975665E-02 -2.30 -0.434892687876E+00 -0.394659416875E-01 -2.31 -0.433005854567E+00 -0.561622612324E-01 -2.32 -0.431135310154E+00 -0.521612906457E-01 -2.33 -0.429280896299E+00 -0.355075455275E-01 -2.34 -0.427442454665E+00 -0.142455414782E-01 -2.35 -0.425619826792E+00 0.360437483446E-02 -2.36 -0.423812838158E+00 0.131641726354E-01 -2.37 -0.422021282430E+00 0.157795085712E-01 -2.38 -0.420244949532E+00 0.135282600823E-01 -2.39 -0.418483629388E+00 0.848830460931E-02 -2.40 -0.416737111923E+00 0.273751959263E-02 -2.41 -0.415005187617E+00 -0.168445464066E-02 -2.42 -0.413287663823E+00 -0.389809672637E-02 -2.43 -0.411584367405E+00 -0.436536722352E-02 -2.44 -0.409895126314E+00 -0.362297646647E-02 -2.45 -0.408219768501E+00 -0.220763478957E-02 -2.46 -0.406558121917E+00 -0.656052527200E-03 -2.47 -0.404910014772E+00 0.509354111090E-03 -2.48 -0.403275281176E+00 0.109088538927E-02 -2.49 -0.401653761093E+00 0.121332473377E-02 -2.50 -0.400045294740E+00 0.101530302696E-02 -2.51 -0.398449722334E+00 0.635451151194E-03 -2.52 -0.396866884094E+00 0.212399988844E-03 -2.53 -0.395296620431E+00 -0.117284954818E-03 -2.54 -0.393738778816E+00 -0.290998757808E-03 -2.55 -0.392193215548E+00 -0.338703843382E-03 -2.56 -0.390659787484E+00 -0.296208207612E-03 -2.57 -0.389138351480E+00 -0.199319846570E-03 -2.58 -0.387628764393E+00 -0.838467563277E-04 -2.59 -0.386130883093E+00 0.144435142549E-04 -2.60 -0.384644568048E+00 0.718128693218E-04 -2.61 -0.383169688343E+00 0.932981649777E-04 -2.62 -0.381706114320E+00 0.881485134032E-04 -2.63 -0.380253716325E+00 0.656130267787E-04 -2.64 -0.378812364701E+00 0.349408172851E-04 -2.65 -0.377381929793E+00 0.538099710268E-05 -2.66 -0.375962282942E+00 -0.147460441603E-04 -2.67 -0.374553303297E+00 -0.243872279759E-04 -2.68 -0.373154873686E+00 -0.259115687832E-04 -2.69 -0.371766876956E+00 -0.217080881840E-04 -2.70 -0.370389195955E+00 -0.141658077800E-04 -2.71 -0.369021713532E+00 -0.567374917294E-05 -2.72 -0.367664312556E+00 0.138527741538E-05 -2.73 -0.366316879280E+00 0.553922824606E-05 -2.74 -0.364979306864E+00 0.717773489865E-05 -2.75 -0.363651489315E+00 0.691787543448E-05 -2.76 -0.362333320637E+00 0.537672791490E-05 -2.77 -0.361024694835E+00 0.317137040124E-05 -2.78 -0.359725505916E+00 0.918880954831E-06 -2.79 -0.358435648119E+00 -0.781728797193E-06 -2.80 -0.357155020631E+00 -0.171253804042E-05 -2.81 -0.355883527348E+00 -0.201851340550E-05 -2.82 -0.354621072352E+00 -0.185904454950E-05 -2.83 -0.353367559729E+00 -0.139352112946E-05 -2.84 -0.352122893561E+00 -0.781332802427E-06 -2.85 -0.350886977932E+00 -0.181869225452E-06 -2.86 -0.349659717322E+00 0.254253774246E-06 -2.87 -0.348441021304E+00 0.488841741047E-06 -2.88 -0.347230802973E+00 0.561508611704E-06 -2.89 -0.346028975493E+00 0.513424319757E-06 -2.90 -0.344835452032E+00 0.385758798747E-06 -2.91 -0.343650145754E+00 0.219681982213E-06 -2.92 -0.342472969825E+00 0.563638036956E-07 -2.93 -0.341303837694E+00 -0.648161282394E-07 -2.94 -0.340142667189E+00 -0.132146637015E-06 -2.95 -0.338989379581E+00 -0.155674930014E-06 -2.96 -0.337843896235E+00 -0.146035093908E-06 -2.97 -0.336706138515E+00 -0.113861215369E-06 -2.98 -0.335576027785E+00 -0.697873810700E-07 -2.99 -0.334453485410E+00 -0.244476776826E-07 -3.00 -0.333338432816E+00 0.116362301878E-07 -3.01 -0.332230794350E+00 0.332200392413E-07 -3.02 -0.331130498471E+00 0.424912932553E-07 -3.03 -0.330037473949E+00 0.421967491380E-07 -3.04 -0.328951649553E+00 0.350831637971E-07 -3.05 -0.327872954052E+00 0.238972941407E-07 -3.06 -0.326801316215E+00 0.113858970767E-07 -3.07 -0.325736664810E+00 0.295729512940E-09 -3.08 -0.324678929621E+00 -0.714980482964E-08 -3.09 -0.323628044884E+00 -0.110280917447E-07 -3.10 -0.322583946062E+00 -0.120485934431E-07 -3.11 -0.321546568616E+00 -0.109207900847E-07 -3.12 -0.320515848007E+00 -0.835416182914E-08 -3.13 -0.319491719697E+00 -0.505818883629E-08 -3.14 -0.318474119147E+00 -0.174235126590E-08 -3.15 -0.317462981877E+00 0.892352064117E-09 -3.16 -0.316458245854E+00 0.250616832160E-08 -3.17 -0.315459852357E+00 0.324772025731E-08 -3.18 -0.314467742901E+00 0.330026474982E-08 -3.19 -0.313481858997E+00 0.284705867768E-08 -3.20 -0.312502142161E+00 0.207135891946E-08 -3.21 -0.311528533905E+00 0.115642235373E-08 -3.22 -0.310560975743E+00 0.285505859048E-09 -3.23 -0.309599409400E+00 -0.367023708206E-09 -3.24 -0.308643779592E+00 -0.752783412892E-09 -3.25 -0.307694033256E+00 -0.916895328784E-09 -3.26 -0.306750117382E+00 -0.906694230615E-09 -3.27 -0.305811978958E+00 -0.769514893121E-09 -3.28 -0.304879564973E+00 -0.552692091033E-09 -3.29 -0.303952822418E+00 -0.303560599085E-09 -3.30 -0.303031698282E+00 -0.694551920121E-10 -3.31 -0.302116139781E+00 0.105027159242E-09 -3.32 -0.301206096938E+00 0.209005933782E-09 -3.33 -0.300301521664E+00 0.254285326489E-09 -3.34 -0.299402365905E+00 0.253091786466E-09 -3.35 -0.298508581607E+00 0.217651762822E-09 -3.36 -0.297620120714E+00 0.160191704660E-09 -3.37 -0.296736935173E+00 0.929380610875E-10 -3.38 -0.295858976930E+00 0.281172812103E-10 -3.39 -0.294986198018E+00 -0.223482577758E-10 -3.40 -0.294118552598E+00 -0.538239964999E-10 -3.41 -0.293255997006E+00 -0.691304635957E-10 -3.42 -0.292398487674E+00 -0.714257190431E-10 -3.43 -0.291545981037E+00 -0.638678228218E-10 -3.44 -0.290698433528E+00 -0.496148349116E-10 -3.45 -0.289855801582E+00 -0.318248152925E-10 -3.46 -0.289018041630E+00 -0.136558239441E-10 -3.47 -0.288185110110E+00 0.173553880751E-11 -3.48 -0.287356964427E+00 0.121428059404E-10 -3.49 -0.286533564634E+00 0.179457771920E-10 -3.50 -0.285714871231E+00 0.199601712347E-10 -3.51 -0.284900844716E+00 0.190017067414E-10 -3.52 -0.284091445587E+00 0.158861023847E-10 -3.53 -0.283286634344E+00 0.114290768372E-10 -3.54 -0.282486371485E+00 0.644634877170E-11 -3.55 -0.281690617509E+00 0.175363686074E-11 -3.56 -0.280899333023E+00 -0.186377893362E-11 -3.57 -0.280112480541E+00 -0.415138041519E-11 -3.58 -0.279330024193E+00 -0.530553537258E-11 -3.59 -0.278551928160E+00 -0.553694181571E-11 -3.60 -0.277778156624E+00 -0.505629775449E-11 -3.61 -0.277008673767E+00 -0.407430119883E-11 -3.62 -0.276243443770E+00 -0.280165015865E-11 -3.63 -0.275482430816E+00 -0.144904264387E-11 -3.64 -0.274725599086E+00 -0.227176664388E-12 -3.65 -0.273972913029E+00 0.674473239547E-12 -3.66 -0.273224339180E+00 0.123206378389E-11 -3.67 -0.272479845051E+00 0.149956709632E-11 -3.68 -0.271739398163E+00 0.153140592438E-11 -3.69 -0.271002966034E+00 0.138200301561E-11 -3.70 -0.270270516185E+00 0.110578111753E-11 -3.71 -0.269542016133E+00 0.757162977679E-12 -3.72 -0.268817433400E+00 0.390571343597E-12 -3.73 -0.268096735504E+00 0.604289628162E-13 -3.74 -0.267379890210E+00 -0.184404496849E-12 -3.75 -0.266666867178E+00 -0.337946397110E-12 -3.76 -0.265957636947E+00 -0.414143678035E-12 -3.77 -0.265252170062E+00 -0.427053593655E-12 -3.78 -0.264550437068E+00 -0.390733398003E-12 -3.79 -0.263852408508E+00 -0.319240345108E-12 -3.80 -0.263158054928E+00 -0.226631689003E-12 -3.81 -0.262467346872E+00 -0.126964683718E-12 -3.82 -0.261780254885E+00 -0.342965832851E-13 -3.83 -0.261096749594E+00 0.378573739506E-13 -3.84 -0.260416803058E+00 0.851925298170E-13 -3.85 -0.259740388525E+00 0.111103192424E-12 -3.86 -0.259067479284E+00 0.119220313898E-12 -3.87 -0.258398048620E+00 0.113174846367E-12 -3.88 -0.257732069821E+00 0.965977419553E-13 -3.89 -0.257069516173E+00 0.731199527911E-13 -3.90 -0.256410360964E+00 0.463724310006E-13 -3.91 -0.255754577480E+00 0.199861287103E-13 -3.92 -0.255102139010E+00 -0.240940416128E-14 -3.93 -0.254453019423E+00 -0.182562022275E-13 -3.94 -0.253807194213E+00 -0.279777451605E-13 -3.95 -0.253164639154E+00 -0.325118984437E-13 -3.96 -0.252525330019E+00 -0.327965275603E-13 -3.97 -0.251889242580E+00 -0.297694979937E-13 -3.98 -0.251256352611E+00 -0.243686752270E-13 -3.99 -0.250626635885E+00 -0.175319247436E-13 -4.00 -0.25 -0.101971120268E-13 -# l=2 -0.00 -0.118599184851E+01 0.774278678775E-06 -0.01 -0.118599295299E+01 0.178391526709E-04 -0.02 -0.118599248226E+01 0.713530367340E-04 -0.03 -0.118599221954E+01 0.160530674096E-03 -0.04 -0.118599274729E+01 0.285353702617E-03 -0.05 -0.118599441913E+01 0.445796398141E-03 -0.06 -0.118599741149E+01 0.641825684216E-03 -0.07 -0.118600172437E+01 0.873401140746E-03 -0.08 -0.118600717886E+01 0.114047501335E-02 -0.09 -0.118601341613E+01 0.144299222372E-02 -0.10 -0.118601989861E+01 0.178089038166E-02 -0.11 -0.118602591312E+01 0.215409979855E-02 -0.12 -0.118603057573E+01 0.256254350225E-02 -0.13 -0.118603283812E+01 0.300613725468E-02 -0.14 -0.118603149491E+01 0.348478956938E-02 -0.15 -0.118602519211E+01 0.399840173309E-02 -0.16 -0.118601243609E+01 0.454686782801E-02 -0.17 -0.118599160311E+01 0.513007475739E-02 -0.18 -0.118596094915E+01 0.574790226940E-02 -0.19 -0.118591861998E+01 0.640022298997E-02 -0.20 -0.118586266129E+01 0.708690245283E-02 -0.21 -0.118579102873E+01 0.780779913197E-02 -0.22 -0.118570159794E+01 0.856276447957E-02 -0.23 -0.118559217439E+01 0.935164296550E-02 -0.24 -0.118546050291E+01 0.101742721171E-01 -0.25 -0.118530427703E+01 0.110304825623E-01 -0.26 -0.118512114806E+01 0.119200980854E-01 -0.27 -0.118490873370E+01 0.128429356703E-01 -0.28 -0.118466462639E+01 0.137988055489E-01 -0.29 -0.118438640131E+01 0.147875112788E-01 -0.30 -0.118407162396E+01 0.158088497765E-01 -0.31 -0.118371785741E+01 0.168626113979E-01 -0.32 -0.118332266912E+01 0.179485800175E-01 -0.33 -0.118288363751E+01 0.190665330607E-01 -0.34 -0.118239835803E+01 0.202162416123E-01 -0.35 -0.118186444894E+01 0.213974704936E-01 -0.36 -0.118127955678E+01 0.226099783354E-01 -0.37 -0.118064136152E+01 0.238535176583E-01 -0.38 -0.117994758136E+01 0.251278349733E-01 -0.39 -0.117919597724E+01 0.264326708779E-01 -0.40 -0.117838435709E+01 0.277677601537E-01 -0.41 -0.117751057983E+01 0.291328318698E-01 -0.42 -0.117657255911E+01 0.305276094907E-01 -0.43 -0.117556826687E+01 0.319518109889E-01 -0.44 -0.117449573665E+01 0.334051489623E-01 -0.45 -0.117335306676E+01 0.348873307561E-01 -0.46 -0.117213842326E+01 0.363980585899E-01 -0.47 -0.117085004272E+01 0.379370296909E-01 -0.48 -0.116948623495E+01 0.395039364323E-01 -0.49 -0.116804538562E+01 0.410984664675E-01 -0.50 -0.116652595902E+01 0.427203028635E-01 -0.51 -0.116492650039E+01 0.443691242607E-01 -0.52 -0.116324563814E+01 0.460446050359E-01 -0.53 -0.116148208578E+01 0.477464154752E-01 -0.54 -0.115963464417E+01 0.494742219334E-01 -0.55 -0.115770220551E+01 0.512276869327E-01 -0.56 -0.115568375463E+01 0.530064693895E-01 -0.57 -0.115357836979E+01 0.548102248388E-01 -0.58 -0.115138522496E+01 0.566386055940E-01 -0.59 -0.114910359649E+01 0.584912608079E-01 -0.60 -0.114673286157E+01 0.603678368013E-01 -0.61 -0.114427249751E+01 0.622679773005E-01 -0.62 -0.114172209175E+01 0.641913234457E-01 -0.63 -0.113908134152E+01 0.661375141023E-01 -0.64 -0.113635004853E+01 0.681061861730E-01 -0.65 -0.113352813436E+01 0.700969745702E-01 -0.66 -0.113061563654E+01 0.721095125922E-01 -0.67 -0.112761270245E+01 0.741434321723E-01 -0.68 -0.112451961301E+01 0.761983638946E-01 -0.69 -0.112133676246E+01 0.782739374573E-01 -0.70 -0.111806467206E+01 0.803697817228E-01 -0.71 -0.111470399934E+01 0.824855250157E-01 -0.72 -0.111125551451E+01 0.846207953489E-01 -0.73 -0.110772014168E+01 0.867752206302E-01 -0.74 -0.110409891719E+01 0.889484289087E-01 -0.75 -0.110039302964E+01 0.911400486017E-01 -0.76 -0.109660379445E+01 0.933497087518E-01 -0.77 -0.109273266620E+01 0.955770391764E-01 -0.78 -0.108878124434E+01 0.978216708938E-01 -0.79 -0.108475125258E+01 0.100083236023E+00 -0.80 -0.108064457300E+01 0.102361368527E+00 -0.81 -0.107646319922E+01 0.104655703748E+00 -0.82 -0.107220928930E+01 0.106965879546E+00 -0.83 -0.106788510260E+01 0.109291535421E+00 -0.84 -0.106349306065E+01 0.111632314057E+00 -0.85 -0.105903567814E+01 0.113987860060E+00 -0.86 -0.105451561995E+01 0.116357821825E+00 -0.87 -0.104993563846E+01 0.118741850042E+00 -0.88 -0.104529861245E+01 0.121139599630E+00 -0.89 -0.104060750540E+01 0.123550728404E+00 -0.90 -0.103586537117E+01 0.125974898498E+00 -0.91 -0.103107534491E+01 0.128411775973E+00 -0.92 -0.102624060978E+01 0.130861030766E+00 -0.93 -0.102136441501E+01 0.133322338292E+00 -0.94 -0.101645002333E+01 0.135795376934E+00 -0.95 -0.101150072073E+01 0.138279832265E+00 -0.96 -0.100651979077E+01 0.140775392257E+00 -0.97 -0.100151047176E+01 0.143281752716E+00 -0.98 -0.996475979421E+00 0.145798612987E+00 -0.99 -0.991419435787E+00 0.148325678685E+00 -1.00 -0.986343861752E+00 0.150862662186E+00 -1.01 -0.981252201559E+00 0.153409279164E+00 -1.02 -0.976147161015E+00 0.155965255168E+00 -1.03 -0.971031377836E+00 0.158530318359E+00 -1.04 -0.965907204398E+00 0.161104205544E+00 -1.05 -0.960776769296E+00 0.163686660312E+00 -1.06 -0.955642052293E+00 0.166277429919E+00 -1.07 -0.950504560562E+00 0.168876273159E+00 -1.08 -0.945365688317E+00 0.171482951534E+00 -1.09 -0.940226424541E+00 0.174097235116E+00 -1.10 -0.935087321811E+00 0.176718903205E+00 -1.11 -0.929948820315E+00 0.179347737340E+00 -1.12 -0.924810721985E+00 0.181983531558E+00 -1.13 -0.919672554850E+00 0.184626085265E+00 -1.14 -0.914533615729E+00 0.187275202376E+00 -1.15 -0.909392505591E+00 0.189930700384E+00 -1.16 -0.904247689640E+00 0.192592399440E+00 -1.17 -0.899097279267E+00 0.195260127233E+00 -1.18 -0.893938794645E+00 0.197933724053E+00 -1.19 -0.888769687417E+00 0.200613031716E+00 -1.20 -0.883587041851E+00 0.203297901874E+00 -1.21 -0.878387521033E+00 0.205988197461E+00 -1.22 -0.873167753247E+00 0.208683782551E+00 -1.23 -0.867924121485E+00 0.211384532258E+00 -1.24 -0.862652775564E+00 0.214090332188E+00 -1.25 -0.857349855067E+00 0.216801068889E+00 -1.26 -0.852011478658E+00 0.219516640013E+00 -1.27 -0.846633745083E+00 0.222236953779E+00 -1.28 -0.841212759747E+00 0.224961919159E+00 -1.29 -0.835744854312E+00 0.227691455089E+00 -1.30 -0.830226622222E+00 0.230425492041E+00 -1.31 -0.824654676165E+00 0.233163961223E+00 -1.32 -0.819026007648E+00 0.235906801405E+00 -1.33 -0.813338261648E+00 0.238653964406E+00 -1.34 -0.807589147957E+00 0.241405403337E+00 -1.35 -0.801776712525E+00 0.244161075563E+00 -1.36 -0.795900108388E+00 0.246920952465E+00 -1.37 -0.789958716164E+00 0.249685008302E+00 -1.38 -0.783952073702E+00 0.252453218773E+00 -1.39 -0.777881056375E+00 0.255225571812E+00 -1.40 -0.771747215345E+00 0.258002061534E+00 -1.41 -0.765552128743E+00 0.260782682254E+00 -1.42 -0.759298412548E+00 0.263567435444E+00 -1.43 -0.752990153738E+00 0.266356332725E+00 -1.44 -0.746631551008E+00 0.269149386489E+00 -1.45 -0.740227178084E+00 0.271946611041E+00 -1.46 -0.733783678902E+00 0.274748031246E+00 -1.47 -0.727308407815E+00 0.277553675597E+00 -1.48 -0.720808745004E+00 0.280363572671E+00 -1.49 -0.714293575678E+00 0.283177756057E+00 -1.50 -0.707774114302E+00 0.285996267098E+00 -1.51 -0.701261775297E+00 0.288819147803E+00 -1.52 -0.694768258888E+00 0.291646440747E+00 -1.53 -0.688308359037E+00 0.294478194613E+00 -1.54 -0.681898762150E+00 0.297314461822E+00 -1.55 -0.675556181832E+00 0.300155294851E+00 -1.56 -0.669298520249E+00 0.303000747167E+00 -1.57 -0.663149219302E+00 0.305850876870E+00 -1.58 -0.657133333599E+00 0.308705743408E+00 -1.59 -0.651275919965E+00 0.311565406231E+00 -1.60 -0.645603703842E+00 0.314429925500E+00 -1.61 -0.640148096818E+00 0.317299363386E+00 -1.62 -0.634941327033E+00 0.320173782409E+00 -1.63 -0.630015639745E+00 0.323053245087E+00 -1.64 -0.625407152057E+00 0.325937813610E+00 -1.65 -0.621160741805E+00 0.328827549434E+00 -1.66 -0.617322491376E+00 0.331722513911E+00 -1.67 -0.613938440710E+00 0.334622768388E+00 -1.68 -0.611041707738E+00 0.337528372705E+00 -1.69 -0.608634361883E+00 0.340439383077E+00 -1.70 -0.606713904788E+00 0.343355855186E+00 -1.71 -0.605277838843E+00 0.346277844714E+00 -1.72 -0.604329475981E+00 0.349205405355E+00 -1.73 -0.603891953410E+00 0.352138584023E+00 -1.74 -0.603992633641E+00 0.355077426184E+00 -1.75 -0.604658879184E+00 0.358021977306E+00 -1.76 -0.605868593113E+00 0.360972281430E+00 -1.77 -0.607268151920E+00 0.363928373028E+00 -1.78 -0.608367678298E+00 0.366890282644E+00 -1.79 -0.608676876397E+00 0.369858040806E+00 -1.80 -0.607721510863E+00 0.372831677651E+00 -1.81 -0.605389833040E+00 0.375811214428E+00 -1.82 -0.601928786013E+00 0.378796663588E+00 -1.83 -0.597600614342E+00 0.381788037212E+00 -1.84 -0.592667549446E+00 0.384785347376E+00 -1.85 -0.587345834007E+00 0.387788601131E+00 -1.86 -0.581702660181E+00 0.390797789237E+00 -1.87 -0.575775077731E+00 0.393812899158E+00 -1.88 -0.569600136421E+00 0.396833918362E+00 -1.89 -0.563216076623E+00 0.399860833869E+00 -1.90 -0.556685294198E+00 0.402893623702E+00 -1.91 -0.550092672352E+00 0.405932257504E+00 -1.92 -0.543523950736E+00 0.408976704600E+00 -1.93 -0.537064869001E+00 0.412026934315E+00 -1.94 -0.530793491582E+00 0.415082910183E+00 -1.95 -0.524742667102E+00 0.418144561635E+00 -1.96 -0.518928742540E+00 0.421211805653E+00 -1.97 -0.513368040699E+00 0.424284559202E+00 -1.98 -0.508076880344E+00 0.427362739258E+00 -1.99 -0.503066489896E+00 0.430446277573E+00 -2.00 -0.498333056113E+00 0.433535149559E+00 -2.01 -0.493870000820E+00 0.436629338653E+00 -2.02 -0.489670745841E+00 0.439728828291E+00 -2.03 -0.485728699578E+00 0.442833601330E+00 -2.04 -0.482034868339E+00 0.445943536599E+00 -2.05 -0.478575101993E+00 0.449058289616E+00 -2.06 -0.475334583118E+00 0.452177487002E+00 -2.07 -0.472298494288E+00 0.455300755377E+00 -2.08 -0.469452020677E+00 0.458427723058E+00 -2.09 -0.466780864566E+00 0.461558355965E+00 -2.10 -0.464271865955E+00 0.464693362803E+00 -2.11 -0.461912016329E+00 0.467833551179E+00 -2.12 -0.459688307179E+00 0.470979728700E+00 -2.13 -0.457587730167E+00 0.474132702697E+00 -2.14 -0.455597849837E+00 0.477292392253E+00 -2.15 -0.453708076644E+00 0.480455854340E+00 -2.16 -0.451908192121E+00 0.483619570573E+00 -2.17 -0.450187977798E+00 0.486780022569E+00 -2.18 -0.448537215209E+00 0.489933691943E+00 -2.19 -0.446945950304E+00 0.493078451733E+00 -2.20 -0.445406102831E+00 0.496222035250E+00 -2.21 -0.443910401906E+00 0.499376434876E+00 -2.22 -0.442451579858E+00 0.502553659872E+00 -2.23 -0.441022369013E+00 0.505765719502E+00 -2.24 -0.439615529106E+00 0.509023913694E+00 -2.25 -0.438224748900E+00 0.512315499422E+00 -2.26 -0.436844850485E+00 0.515598403486E+00 -2.27 -0.435470724726E+00 0.518828772795E+00 -2.28 -0.434097262488E+00 0.521962754260E+00 -2.29 -0.432719354637E+00 0.524956494790E+00 -2.30 -0.431332196959E+00 0.527790701720E+00 -2.31 -0.429932626448E+00 0.530578276920E+00 -2.32 -0.428518029861E+00 0.533476403724E+00 -2.33 -0.427085794348E+00 0.536642297387E+00 -2.34 -0.425633307061E+00 0.540233173170E+00 -2.35 -0.424157962667E+00 0.544404519130E+00 -2.36 -0.422658132843E+00 0.549087262024E+00 -2.37 -0.421134124346E+00 0.553767561315E+00 -2.38 -0.419586471595E+00 0.557879249305E+00 -2.39 -0.418015709009E+00 0.560856158301E+00 -2.40 -0.416422371007E+00 0.562132120607E+00 -2.41 -0.414807067238E+00 0.561179036658E+00 -2.42 -0.413172689876E+00 0.558623798888E+00 -2.43 -0.411524770445E+00 0.556428853186E+00 -2.44 -0.409868987540E+00 0.556631064871E+00 -2.45 -0.408211019755E+00 0.561267299260E+00 -2.46 -0.406556545684E+00 0.572374421674E+00 -2.47 -0.404911126755E+00 0.591791053897E+00 -2.48 -0.403277662713E+00 0.616852363173E+00 -2.49 -0.401656409930E+00 0.640421035737E+00 -2.50 -0.400047511271E+00 0.655167709086E+00 -2.51 -0.398451109603E+00 0.653763020717E+00 -2.52 -0.396867347789E+00 0.628877608129E+00 -2.53 -0.395296364384E+00 0.573399635392E+00 -2.54 -0.393738143530E+00 0.488006833101E+00 -2.55 -0.392192476116E+00 0.383126172082E+00 -2.56 -0.390659140825E+00 0.269800281882E+00 -2.57 -0.389137916340E+00 0.159071792047E+00 -2.58 -0.387628581345E+00 0.619833321270E-01 -2.59 -0.386130914625E+00 -0.104383816387E-01 -2.60 -0.384644724824E+00 -0.518994285825E-01 -2.61 -0.383169892024E+00 -0.674269319116E-01 -2.62 -0.381706306759E+00 -0.637052595060E-01 -2.63 -0.380253859567E+00 -0.474187792459E-01 -2.64 -0.378812440982E+00 -0.252518590111E-01 -2.65 -0.377381941540E+00 -0.388886668184E-02 -2.66 -0.375962250749E+00 0.106570211301E-01 -2.67 -0.374553250057E+00 0.176247406435E-01 -2.68 -0.373154817118E+00 0.187263874320E-01 -2.69 -0.371766829565E+00 0.156885163204E-01 -2.70 -0.370389165030E+00 0.102376821332E-01 -2.71 -0.369021701145E+00 0.410043969523E-02 -2.72 -0.367664315580E+00 -0.100114515637E-02 -2.73 -0.366316891373E+00 -0.400322091950E-02 -2.74 -0.364979322534E+00 -0.518737579036E-02 -2.75 -0.363651504417E+00 -0.499957438611E-02 -2.76 -0.362333332375E+00 -0.388578132391E-02 -2.77 -0.361024701759E+00 -0.229196122091E-02 -2.78 -0.359725507922E+00 -0.664078694271E-03 -2.79 -0.358435646413E+00 0.564958318250E-03 -2.80 -0.357155016892E+00 0.123765763105E-02 -2.81 -0.355883522941E+00 0.145878716894E-02 -2.82 -0.354621068294E+00 0.134353843176E-02 -2.83 -0.353367556687E+00 0.100710291929E-02 -2.84 -0.352122891855E+00 0.564672131355E-03 -2.85 -0.350886977534E+00 0.131437567762E-03 -2.86 -0.349659717877E+00 -0.183750151232E-03 -2.87 -0.348441022371E+00 -0.353287750054E-03 -2.88 -0.347230804198E+00 -0.405804368587E-03 -2.89 -0.346028976614E+00 -0.371053671402E-03 -2.90 -0.344835452874E+00 -0.278789323065E-03 -2.91 -0.343650146234E+00 -0.158764988147E-03 -2.92 -0.342472969948E+00 -0.407343312161E-04 -2.93 -0.341303837553E+00 0.468428576984E-04 -2.94 -0.340142666901E+00 0.955028675911E-04 -2.95 -0.338989379242E+00 0.112506852722E-03 -2.96 -0.337843895916E+00 0.105540107203E-03 -2.97 -0.336706138267E+00 0.822879251472E-04 -2.98 -0.335576027633E+00 0.504356006660E-04 -2.99 -0.334453485357E+00 0.176684278720E-04 -3.00 -0.333338432841E+00 -0.840954696988E-05 -3.01 -0.332230794422E+00 -0.240082462989E-04 -3.02 -0.331130498564E+00 -0.307086161646E-04 -3.03 -0.330037474041E+00 -0.304957480321E-04 -3.04 -0.328951649630E+00 -0.253547333665E-04 -3.05 -0.327872954104E+00 -0.172706636329E-04 -3.06 -0.326801316239E+00 -0.822863029647E-05 -3.07 -0.325736664811E+00 -0.213724822327E-06 -3.08 -0.324678929605E+00 0.516719062528E-05 -3.09 -0.323628044860E+00 0.797004304815E-05 -3.10 -0.322583946036E+00 0.870756343295E-05 -3.11 -0.321546568592E+00 0.789249573812E-05 -3.12 -0.320515847989E+00 0.603758392211E-05 -3.13 -0.319491719686E+00 0.365557194337E-05 -3.14 -0.318474119143E+00 0.125920376033E-05 -3.15 -0.317462981879E+00 -0.644906165976E-06 -3.16 -0.316458245860E+00 -0.181121719618E-05 -3.17 -0.315459852365E+00 -0.234713954674E-05 -3.18 -0.314467742908E+00 -0.238511364751E-05 -3.19 -0.313481859003E+00 -0.205757992833E-05 -3.20 -0.312502142165E+00 -0.149697881904E-05 -3.21 -0.311528533907E+00 -0.835750749488E-06 -3.22 -0.310560975743E+00 -0.206336149515E-06 -3.23 -0.309599409399E+00 0.265249403234E-06 -3.24 -0.308643779590E+00 0.544039381025E-06 -3.25 -0.307694033254E+00 0.662643674918E-06 -3.26 -0.306750117380E+00 0.655271303213E-06 -3.27 -0.305811978956E+00 0.556131284209E-06 -3.28 -0.304879564972E+00 0.399432636205E-06 -3.29 -0.303952822418E+00 0.219384377500E-06 -3.30 -0.303031698281E+00 0.501955263946E-07 -3.31 -0.302116139781E+00 -0.759035198255E-07 -3.32 -0.301206096939E+00 -0.151049368116E-06 -3.33 -0.300301521665E+00 -0.183772954156E-06 -3.34 -0.299402365906E+00 -0.182910378329E-06 -3.35 -0.298508581607E+00 -0.157297741019E-06 -3.36 -0.297620120714E+00 -0.115771142610E-06 -3.37 -0.296736935173E+00 -0.671666834866E-07 -3.38 -0.295858976930E+00 -0.203204640323E-07 -3.39 -0.294986198018E+00 0.161511692727E-07 -3.40 -0.294118552598E+00 0.388988030802E-07 -3.41 -0.293255997005E+00 0.499608439566E-07 -3.42 -0.292398487674E+00 0.516196336316E-07 -3.43 -0.291545981037E+00 0.461575138351E-07 -3.44 -0.290698433528E+00 0.358568262965E-07 -3.45 -0.289855801582E+00 0.229999127457E-07 -3.46 -0.289018041630E+00 0.986911491232E-08 -3.47 -0.288185110110E+00 -0.125428037123E-08 -3.48 -0.287356964427E+00 -0.877565115624E-08 -3.49 -0.286533564634E+00 -0.129694801298E-07 -3.50 -0.285714871231E+00 -0.144252902199E-07 -3.51 -0.284900844716E+00 -0.137326043547E-07 -3.52 -0.284091445587E+00 -0.114809454622E-07 -3.53 -0.283286634344E+00 -0.825983647053E-08 -3.54 -0.282486371485E+00 -0.465880030772E-08 -3.55 -0.281690617509E+00 -0.126735990183E-08 -3.56 -0.280899333023E+00 0.134695998882E-08 -3.57 -0.280112480541E+00 0.300021811427E-08 -3.58 -0.279330024193E+00 0.383433020796E-08 -3.59 -0.278551928160E+00 0.400156850776E-08 -3.60 -0.277778156624E+00 0.365420525150E-08 -3.61 -0.277008673767E+00 0.294451267703E-08 -3.62 -0.276243443770E+00 0.202476302222E-08 -3.63 -0.275482430816E+00 0.104722852490E-08 -3.64 -0.274725599086E+00 0.164181422917E-09 -3.65 -0.273972913029E+00 -0.487444326584E-09 -3.66 -0.273224339180E+00 -0.890417093271E-09 -3.67 -0.272479845051E+00 -0.108374273518E-08 -3.68 -0.271739398163E+00 -0.110675277501E-08 -3.69 -0.271002966034E+00 -0.998778735436E-09 -3.70 -0.270270516185E+00 -0.799152139149E-09 -3.71 -0.269542016133E+00 -0.547204508836E-09 -3.72 -0.268817433400E+00 -0.282267367184E-09 -3.73 -0.268096735504E+00 -0.436722368792E-10 -3.74 -0.267379890210E+00 0.133269817860E-09 -3.75 -0.266666867178E+00 0.244235122021E-09 -3.76 -0.265957636947E+00 0.299303181227E-09 -3.77 -0.265252170062E+00 0.308633225411E-09 -3.78 -0.264550437068E+00 0.282384484507E-09 -3.79 -0.263852408508E+00 0.230716188450E-09 -3.80 -0.263158054928E+00 0.163787567173E-09 -3.81 -0.262467346872E+00 0.917578506105E-10 -3.82 -0.261780254885E+00 0.247862686958E-10 -3.83 -0.261096749594E+00 -0.273596653946E-10 -3.84 -0.260416803058E+00 -0.615689591399E-10 -3.85 -0.259740388525E+00 -0.802946916751E-10 -3.86 -0.259067479284E+00 -0.861609656484E-10 -3.87 -0.258398048620E+00 -0.817918837084E-10 -3.88 -0.257732069821E+00 -0.698115485036E-10 -3.89 -0.257069516173E+00 -0.528440626823E-10 -3.90 -0.256410360964E+00 -0.335135288931E-10 -3.91 -0.255754577480E+00 -0.144440497843E-10 -3.92 -0.255102139010E+00 0.174128537650E-11 -3.93 -0.254453019423E+00 0.131938254611E-10 -3.94 -0.253807194213E+00 0.202196208085E-10 -3.95 -0.253164639154E+00 0.234964703026E-10 -3.96 -0.252525330019E+00 0.237021728270E-10 -3.97 -0.251889242580E+00 0.215145272658E-10 -3.98 -0.251256352611E+00 0.176113325026E-10 -3.99 -0.250626635885E+00 0.126703874213E-10 -4.00 -0.25 0.736949090574E-11 From 5e9fcb405e97c69d1dbc6c09a0e3932ae156f43a Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Tue, 9 Sep 2025 16:31:26 -0400 Subject: [PATCH 60/99] Tune 2 pythons scripts for better looking output --- util/compareTimers.py | 6 +++--- util/replicateCoords.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/util/compareTimers.py b/util/compareTimers.py index e31b2df6..002ccf0b 100644 --- a/util/compareTimers.py +++ b/util/compareTimers.py @@ -54,7 +54,7 @@ print('-----------------------------------------------------------------------------') for timer in reversed(sorted_timers): key=timer[0] - print(key.ljust(50)), - print(str(eval(timers1[key])).ljust(10)), - print(str(eval(timers2[key])).ljust(10)), + print(key.ljust(50), end="") + print(str(eval(timers1[key])).ljust(10), end="") + print(str(eval(timers2[key])).ljust(10), end="") print(str(100.*timer[1]).ljust(20)) diff --git a/util/replicateCoords.py b/util/replicateCoords.py index 37450511..8b95f865 100644 --- a/util/replicateCoords.py +++ b/util/replicateCoords.py @@ -53,11 +53,11 @@ vy=eval(word[7]) vz=eval(word[8]) for i in range(nx): - x=eval(word[2])+i*lx + x=round(eval(word[2])+i*lx,12) for j in range(ny): - y=eval(word[3])+j*ly + y=round(eval(word[3])+j*ly,12) for k in range(nz): - z=eval(word[4])+k*lz + z=round(eval(word[4])+k*lz,12) count_atom = count_atom + 1 name = myspecies + str(count_atom) sp=word[1] From 10f6fab03ccfcdc82cfefd747981b281d8366652 Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L" Date: Wed, 10 Sep 2025 10:14:07 -0400 Subject: [PATCH 61/99] Store wavefunctions in double in KB objects * this allows to used optimized dot functions in mixed precision --- src/KBprojectorSparse.cc | 7 ++++--- src/KBprojectorSparse.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/KBprojectorSparse.cc b/src/KBprojectorSparse.cc index 937b95d2..38b4c9bb 100644 --- a/src/KBprojectorSparse.cc +++ b/src/KBprojectorSparse.cc @@ -15,7 +15,7 @@ const double rthreshold = 1.e-5; -std::vector> KBprojectorSparse::work_nlindex_; +std::vector> KBprojectorSparse::work_nlindex_; std::vector> KBprojectorSparse::work_proj_; KBprojectorSparse::KBprojectorSparse(const Species& sp) : KBprojector(sp) @@ -133,11 +133,12 @@ void KBprojectorSparse::registerPsi(const short iloc, const ORBDTYPE* const psi) assert(work_nlindex_.size() == static_cast(omp_get_max_threads())); assert(static_cast(thread) < work_nlindex_.size()); - std::vector& work(work_nlindex_[thread]); + std::vector& work(work_nlindex_[thread]); for (int i = 0; i < sizenl; i++) { const int j = rnlindex[i]; - work[i] = psi[j]; + // convert to KBPROJDTYPE if necessary to enable optimized dot products + work[i] = (KBPROJDTYPE)psi[j]; } } diff --git a/src/KBprojectorSparse.h b/src/KBprojectorSparse.h index 552132b6..be3af5a8 100644 --- a/src/KBprojectorSparse.h +++ b/src/KBprojectorSparse.h @@ -30,7 +30,7 @@ class KBprojectorSparse : public KBprojector { // work arrays (1 for each thread) - static std::vector> work_nlindex_; + static std::vector> work_nlindex_; static std::vector> work_proj_; From 0d6dbab21e7867e3762148dcefa6e14968cc92a4 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 19 Sep 2025 11:27:27 -0400 Subject: [PATCH 62/99] Remove two unused functions --- src/MGmol.h | 6 ------ src/computeHij.cc | 21 --------------------- 2 files changed, 27 deletions(-) diff --git a/src/MGmol.h b/src/MGmol.h index 9ce4b81f..6eb1c8c0 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -122,12 +122,6 @@ class MGmol : public MGmolInterface void getKBPsiAndHij(OrbitalsType& orbitals_i, OrbitalsType& orbitals_j, Ions& ions, KBPsiMatrixSparse* kbpsi, ProjectedMatricesInterface* projmatrices); - void getKBPsiAndHij(OrbitalsType& orbitals_i, OrbitalsType& orbitals_j, - Ions& ions, KBPsiMatrixSparse* kbpsi, - ProjectedMatricesInterface* projmatrices, - dist_matrix::DistMatrix& hij); - void getKBPsiAndHij(OrbitalsType& orbitals, Ions& ions, - KBPsiMatrixSparse* kbpsi, dist_matrix::DistMatrix& hij); void computeHnlPhiAndAdd2HPhi(Ions& ions, OrbitalsType& phi, OrbitalsType& hphi, const KBPsiMatrixSparse* const kbpsi); int dumpMDrestartFile(OrbitalsType& orbitals, Ions& ions, diff --git a/src/computeHij.cc b/src/computeHij.cc index 50d26d90..93f02899 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -247,20 +247,6 @@ void MGmol::computeHij(OrbitalsType& orbitals_i, hamiltonian_->addHlocalij(orbitals_i, orbitals_j, projmatrices); } -template -void MGmol::getKBPsiAndHij(OrbitalsType& orbitals_i, - OrbitalsType& orbitals_j, Ions& ions, KBPsiMatrixSparse* kbpsi, - ProjectedMatricesInterface* projmatrices, - dist_matrix::DistMatrix& hij) -{ - kbpsi->computeAll(ions, orbitals_i); - - computeHij(orbitals_i, orbitals_j, ions, kbpsi, hij, true); - - projmatrices->setHiterativeIndex(orbitals_j.getIterativeIndex(), - hamiltonian_->potential().getIterativeIndex()); -} - template void MGmol::getKBPsiAndHij(OrbitalsType& orbitals_i, OrbitalsType& orbitals_j, Ions& ions, KBPsiMatrixSparse* kbpsi, @@ -276,13 +262,6 @@ void MGmol::getKBPsiAndHij(OrbitalsType& orbitals_i, hamiltonian_->potential().getIterativeIndex()); } -template -void MGmol::getKBPsiAndHij(OrbitalsType& orbitals, Ions& ions, - KBPsiMatrixSparse* kbpsi, dist_matrix::DistMatrix& hij) -{ - getKBPsiAndHij(orbitals, orbitals, ions, kbpsi, proj_matrices_.get(), hij); -} - template void MGmol::getKBPsiAndHij(OrbitalsType& orbitals, Ions& ions) { From fe2fc04d1d61ff853e73a7d60ffdcb3e99e00e76 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 19 Sep 2025 14:14:53 -0400 Subject: [PATCH 63/99] Simplify and remove unused code --- src/ExtendedGridOrbitals.cc | 32 -------------------------------- src/ExtendedGridOrbitals.h | 4 ---- src/computeHij.cc | 19 +++---------------- 3 files changed, 3 insertions(+), 52 deletions(-) diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 21ec5a7e..27d9eb18 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -1239,38 +1239,6 @@ double ExtendedGridOrbitals::dotProduct( return dot; } -dist_matrix::DistMatrix ExtendedGridOrbitals::computeProduct( - const ExtendedGridOrbitals& orbitals, const bool transpose) -{ - assert(numst_ > 0); - assert(subdivx_ > 0); - assert(subdivx_ < 1000); - - return computeProduct(orbitals.psi(0), numst_, orbitals.lda_, transpose); -} - -dist_matrix::DistMatrix ExtendedGridOrbitals::computeProduct( - const ORBDTYPE* const array, const int ncol, const int lda, - const bool transpose) -{ - assert(lda > 1); - - dot_product_tm_.start(); - - LocalMatrices ss(subdivx_, numst_, ncol); - - computeLocalProduct(array, lda, ss, transpose); - - LocalMatrices2DistMatrix* sl2dm = LocalMatrices2DistMatrix::instance(); - - dist_matrix::DistMatrix tmp("tmp", numst_, numst_); - sl2dm->accumulate(ss, tmp); - - dot_product_tm_.stop(); - - return tmp; -} - void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, SquareLocalMatrices* matrixTransform, const bool update_matrices) diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 1a80d770..244a150f 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -122,8 +122,6 @@ class ExtendedGridOrbitals : public Orbitals void initFourier(); void initRand(); - dist_matrix::DistMatrix computeProduct(const ORBDTYPE* const, - const int, const int, const bool transpose = false); ORBDTYPE* psi(const int i) const { return block_vector_.vect(i); } @@ -317,8 +315,6 @@ class ExtendedGridOrbitals : public Orbitals void computeDiagonalElementsDotProduct(const ExtendedGridOrbitals& orbitals, std::vector& ss) const; - dist_matrix::DistMatrix computeProduct( - const ExtendedGridOrbitals&, const bool transpose = false); void computeLocalProduct(const ExtendedGridOrbitals&, LocalMatrices&, const bool transpose = false); diff --git a/src/computeHij.cc b/src/computeHij.cc index 93f02899..16f74653 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -43,22 +43,6 @@ void MGmol::addHlocal2matrix(LocGridOrbitals& orbitalsi, computeHij_tm_.stop(); } -template <> -template <> -void MGmol::addHlocal2matrix(LocGridOrbitals& orbitalsi, - LocGridOrbitals& orbitalsj, dist_matrix::DistMatrix& H) -{ - computeHij_tm_.start(); - -#if DEBUG - os_ << " addHlocal2matrix()" << endl; -#endif - - hamiltonian_->addHlocal2matrix(orbitalsi, orbitalsj, H); - - computeHij_tm_.stop(); -} - template <> template <> void MGmol::computeHij(LocGridOrbitals& orbitals_i, @@ -439,6 +423,9 @@ template class MGmol; template void MGmol::addHlocal2matrix( ExtendedGridOrbitals& orbitalsi, ExtendedGridOrbitals& orbitalsj, dist_matrix::DistMatrix&); +template void MGmol::addHlocal2matrix( + LocGridOrbitals& orbitalsi, LocGridOrbitals& orbitalsj, + dist_matrix::DistMatrix&); #ifdef HAVE_MAGMA template void MGmol::addHlocal2matrix( ExtendedGridOrbitals& orbitalsi, ExtendedGridOrbitals& orbitalsj, From 2b0c4c31c907792cb5a6806eb57b0b7983298dee Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 26 Sep 2025 10:31:15 -0400 Subject: [PATCH 64/99] Script to get matrix of wavefunctions from hdf5 --- util/hdf5toMatrix.py | 169 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 util/hdf5toMatrix.py diff --git a/util/hdf5toMatrix.py b/util/hdf5toMatrix.py new file mode 100644 index 00000000..7aa16ff5 --- /dev/null +++ b/util/hdf5toMatrix.py @@ -0,0 +1,169 @@ +# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +# the Lawrence Livermore National Laboratory. +# LLNL-CODE-743438 +# All rights reserved. +# This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +# Please also read this link https://github.com/llnl/mgmol/LICENSE +# +import sys +import argparse +import h5py +import numpy as np + + +# Read dataset in hdf5 file. +# Returns an array containing the data. +def get_function(filename, datasetname, dims): + + # Check If File is in HDF5 Format + try: + ishdf = h5py.is_hdf5(filename) + except Exception: + print('\nh5py.is_hdf5 unsucessful') + return None + + # If File is not in HDF5 Format, Stop + if( not(ishdf) ): + print('\nInput File ' + filename + ' not in HDF5 Format. Stop.') + return None + + # If Everything Goes Fine, Proceed + else: + + # Open HDF5 File + try: + file_id = h5py.h5f.open(bytes(filename, encoding='utf-8'), + h5py.h5f.ACC_RDONLY, h5py.h5p.DEFAULT) + except Exception: + print('\nHDF5 File: ' + filename + ' Failed to Open') + return None + + # Open Dataset + try: + dset_id = h5py.h5d.open(file_id, bytes(datasetname, encoding='utf-8')) + except Exception: + print('\nHDF5 Dataset: ' + datasetname + ' Failed to Open') + return None + + # Copy of Dataspace for Dataset + try: + filespace = dset_id.get_space() + + except Exception: + print('\ndset_id.get_space() Failed.') + return None + + # Get Dataspace Dimension + ndims = filespace.get_simple_extent_ndims() + # If Dataspace Dimension is not 3, Stop. + if( not(ndims == 3) ): + print('\nProblem with Dataspace Dimension, ndims = ' + str(ndims)) + return None + + # Shape of Dataspace (dims) + dims = dims.tolist() + dims = filespace.get_simple_extent_dims() + + print('Dataspace: Dimensions ' + str( int(dims[0]) ) + ' x ' + + str( int(dims[1]) ) + ' x ' + + str( int(dims[2])) ) + + print('Size: ' + str( int(dims[0] * dims[1] * dims[2]) )) + + # If Size < 1, Stop. + if( int( dims[0] * dims[1] * dims[2] ) < 1 ): + return None + + # Read data -> data + data = np.array(0.0, h5py.h5t.NATIVE_FLOAT) + data.resize( int(dims[0] * dims[1] * dims[2]) , refcheck = False) + + # Dump Data into Numpy Array (data) + try: + status = dset_id.read(h5py.h5s.ALL, h5py.h5s.ALL, data) + except Exception: + print('\ndataset_id.read Failed.') + return 0 + + return ( data, dims ) + + +''' MAIN ''' +# USAGE: +# python hdf5toMM.py file.hdf5 + +def main(): + + h5filename = sys.argv[1] + basename = 'Function' + + # Remove File Extension ( .hdf5 ) + base_filename = h5filename.split('.')[0].strip() + base_filename = base_filename.split('/')[-1] + + # Use base_filename to make .dat filename + output_data_filename = base_filename + '.dat' + print('\noutput_data_filename = ' + output_data_filename) + + arrays = [] + + i = 0 + while i<1000: + # Get data - Call get_function + number = str(i) + while len(number)<4: + number = '0'+number + + datasetname = basename + number + print('\nDataset: ' + datasetname) + + dims = np.arange(0, dtype = h5py.h5t.NATIVE_INT32) # Turns Into a TUPLE + + try: + data, dims = get_function(h5filename, datasetname, dims) + except Exception: + print('\nRead Failed. \nEither the HDF5 File ' + + 'or the Dataset are not Present. Stop.\n') + break + + # If data Empty, Stop. + if( data is None or dims is None ): + print('\nRead Failed.') + return -1 + + arrays.append(data) + + dim = [ int(dims[0]), int(dims[1]), int(dims[2]) ] + + # More Variables + incx = dim[1] * dim[2] + incy = dim[2] + + i = i+1 + + + print('\nWrite data...\n') + + with open(output_data_filename, 'w') as tfile: + nrows = dim[0]*dim[1]*dim[2] + ncols = len(arrays) + tfile.write(str(nrows) + '\t' + str(ncols) ) + + for i in range( dim[0] ): + for j in range( dim[1] ): + for k in range( dim[2] ): + row = (i * incx) + (j * incy) + k + tfile.write('\n') + for l in range(len(arrays)): + data = arrays[l] + if l>0: + tfile.write('\t') + tfile.write(str(data[row] )) + tfile.write('\n') + + return 0 + +# Executes Main Function +if __name__ == '__main__': + + main() From 595070581f7cfc57badde4540f7a2664c869b953 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Sat, 27 Sep 2025 19:25:47 -0400 Subject: [PATCH 65/99] Opimize code a little bit --- util/hdf5toMatrix.py | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/util/hdf5toMatrix.py b/util/hdf5toMatrix.py index 7aa16ff5..a11e169c 100644 --- a/util/hdf5toMatrix.py +++ b/util/hdf5toMatrix.py @@ -95,7 +95,7 @@ def get_function(filename, datasetname, dims): def main(): h5filename = sys.argv[1] - basename = 'Function' + field = 'Function' # Remove File Extension ( .hdf5 ) base_filename = h5filename.split('.')[0].strip() @@ -105,7 +105,7 @@ def main(): output_data_filename = base_filename + '.dat' print('\noutput_data_filename = ' + output_data_filename) - arrays = [] + columns = [] i = 0 while i<1000: @@ -114,52 +114,41 @@ def main(): while len(number)<4: number = '0'+number - datasetname = basename + number + datasetname = field + number print('\nDataset: ' + datasetname) dims = np.arange(0, dtype = h5py.h5t.NATIVE_INT32) # Turns Into a TUPLE try: - data, dims = get_function(h5filename, datasetname, dims) + column, dims = get_function(h5filename, datasetname, dims) except Exception: print('\nRead Failed. \nEither the HDF5 File ' + 'or the Dataset are not Present. Stop.\n') break # If data Empty, Stop. - if( data is None or dims is None ): + if( column is None or dims is None ): print('\nRead Failed.') return -1 - arrays.append(data) + #add data just read as a column in list of columns + columns.append(column) dim = [ int(dims[0]), int(dims[1]), int(dims[2]) ] - # More Variables - incx = dim[1] * dim[2] - incy = dim[2] - i = i+1 + #build numpy 2d array from all the columns + matrix = columns[0] + for i in range(len(columns)-1): + matrix = np.column_stack((matrix,columns[i+1])) print('\nWrite data...\n') - with open(output_data_filename, 'w') as tfile: - nrows = dim[0]*dim[1]*dim[2] - ncols = len(arrays) - tfile.write(str(nrows) + '\t' + str(ncols) ) - - for i in range( dim[0] ): - for j in range( dim[1] ): - for k in range( dim[2] ): - row = (i * incx) + (j * incy) + k - tfile.write('\n') - for l in range(len(arrays)): - data = arrays[l] - if l>0: - tfile.write('\t') - tfile.write(str(data[row] )) - tfile.write('\n') + nrows = dim[0]*dim[1]*dim[2] + ncols = len(columns) + + np.savetxt('matrix.dat', matrix, delimiter='\t', fmt='%le', header=str(nrows) + '\t' + str(ncols)) return 0 From ef8f6a58a4ace92f7ca52728e07899bfcdf5760b Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 1 Oct 2025 09:45:59 -0400 Subject: [PATCH 66/99] Rm unused/duplicated functions in LocGridOrbitals --- src/LocGridOrbitals.cc | 45 +----------------------------------------- src/LocGridOrbitals.h | 4 ---- 2 files changed, 1 insertion(+), 48 deletions(-) diff --git a/src/LocGridOrbitals.cc b/src/LocGridOrbitals.cc index 3d14e051..11f2e291 100644 --- a/src/LocGridOrbitals.cc +++ b/src/LocGridOrbitals.cc @@ -1659,16 +1659,7 @@ void LocGridOrbitals::computeDiagonalElementsDotProductLocal( void LocGridOrbitals::computeGram( dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - getLocalOverlap(ss); - - LocalMatrices2DistMatrix* sl2dm = LocalMatrices2DistMatrix::instance(); - - gram_mat.clear(); - - sl2dm->accumulate(ss, gram_mat); + computeGram(*this, gram_mat); } void LocGridOrbitals::computeGram(const LocGridOrbitals& orbitals, @@ -1835,40 +1826,6 @@ double LocGridOrbitals::dotProduct( return dot; } -dist_matrix::DistMatrix LocGridOrbitals::product( - const LocGridOrbitals& orbitals, const bool transpose) -{ - assert(numst_ > 0); - assert(subdivx_ > 0); - assert(subdivx_ < 1000); - - return product( - orbitals.psi(0), orbitals.chromatic_number_, orbitals.lda_, transpose); -} - -dist_matrix::DistMatrix LocGridOrbitals::product( - const ORBDTYPE* const array, const int ncol, const int lda, - const bool transpose) -{ - assert(lda > 1); - - dot_product_tm_.start(); - - LocalMatrices ss( - subdivx_, chromatic_number_, ncol); - - if (chromatic_number_ != 0) computeLocalProduct(array, lda, ss, transpose); - - LocalMatrices2DistMatrix* sl2dm = LocalMatrices2DistMatrix::instance(); - - dist_matrix::DistMatrix tmp("tmp", numst_, numst_); - sl2dm->accumulate(ss, tmp); - - dot_product_tm_.stop(); - - return tmp; -} - void LocGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, SquareLocalMatrices* matrixTransform, const bool update_matrices) diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index 951e418f..68d865f3 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -141,8 +141,6 @@ class LocGridOrbitals : public Orbitals void initFourier(); void initRand(); - dist_matrix::DistMatrix product(const ORBDTYPE* const, - const int, const int, const bool transpose = false); ORBDTYPE* psi(const int i) const { return block_vector_.vect(i); } @@ -359,8 +357,6 @@ class LocGridOrbitals : public Orbitals void computeDiagonalElementsDotProductLocal( const LocGridOrbitals& orbitals, std::vector& ss); - dist_matrix::DistMatrix product( - const LocGridOrbitals&, const bool transpose = false); void computeLocalProduct(const LocGridOrbitals&, LocalMatrices&, const bool transpose = false); From 792fcebb0c4bf46921e62f3acdd17781595d132f Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Sun, 19 Oct 2025 10:48:54 -0400 Subject: [PATCH 67/99] Replace some ptr with vector in HDFrestart --- src/HDFrestart.cc | 63 ++++++++++++++++++++--------------------------- src/HDFrestart.h | 5 ++-- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index 1b2c7fb7..34efde9a 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -66,8 +66,6 @@ HDFrestart::~HDFrestart() { if (!closed_) close(); - closeWorkSpace(); - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); mmpi.barrier(); @@ -1147,13 +1145,13 @@ int HDFrestart::getLRs(std::shared_ptr lrs, template <> void HDFrestart::getWorkspace(float*& work_space) { - work_space = work_space_float_; + work_space = work_space_float_.data(); } template <> void HDFrestart::getWorkspace(double*& work_space) { - work_space = work_space_double_; + work_space = work_space_double_.data(); } template <> @@ -1567,12 +1565,12 @@ int HDFrestart::readData( if (precision == 1) { status = H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace, filespace, - plist_id, work_space_float_); + plist_id, work_space_float_.data()); } else { status = H5Dread(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, - plist_id, work_space_double_); + plist_id, work_space_double_.data()); } if (status < 0) @@ -1604,15 +1602,15 @@ int HDFrestart::readData( { //(*MPIdata::sout)<<"PE: "<= 0); if (precision == 1) status = H5Dwrite(dset_id, H5T_NATIVE_FLOAT, memspace, space_id, - plist_id, work_space_float_); + plist_id, work_space_float_.data()); else status = H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, space_id, - plist_id, work_space_double_); + plist_id, work_space_double_.data()); if (status < 0) { MGMOL_HDFRESTART_FAIL("H5Dwrite failed!!!"); @@ -1851,21 +1851,12 @@ void HDFrestart::setupWorkSpace() { // if( active_ ) { - const int n = block_[0] * block_[1] * block_[2]; - work_space_double_ = new double[n]; - memset(work_space_double_, 0, n * sizeof(double)); - - work_space_float_ = new float[n]; - memset(work_space_float_, 0, n * sizeof(float)); - } -} + const int n = block_[0] * block_[1] * block_[2]; + work_space_double_.resize(n); + memset(work_space_double_.data(), 0, n * sizeof(double)); -void HDFrestart::closeWorkSpace() -{ - // if( active_ ) - { - delete[] work_space_double_; - delete[] work_space_float_; + work_space_float_.resize(n); + memset(work_space_float_.data(), 0, n * sizeof(float)); } } diff --git a/src/HDFrestart.h b/src/HDFrestart.h index 7bba69d0..bf49a0e8 100644 --- a/src/HDFrestart.h +++ b/src/HDFrestart.h @@ -60,8 +60,8 @@ class HDFrestart int bsize_; - double* work_space_double_; - float* work_space_float_; + std::vector work_space_double_; + std::vector work_space_float_; #ifdef MGMOL_USE_HDF5P bool use_hdf5p_; @@ -87,7 +87,6 @@ class HDFrestart template void gatherDataXdir(std::vector& data); - void closeWorkSpace(); void setupWorkSpace(); template From 229ece7553bfc7fd0588633a6f3b6801257f7425 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 22 Oct 2025 14:17:25 -0400 Subject: [PATCH 68/99] Replicated Projected Matrices on CPU (#367) * Implement option of using ReplicatedMatrix on CPU --- CMakeLists.txt | 1 + src/CMakeLists.txt | 2 + src/ChebyshevApproximation.cc | 2 - src/Control.cc | 74 +-- src/Control.h | 3 + src/DMStrategyFactory.cc | 24 +- src/DavidsonSolver.cc | 3 +- src/DensityMatrix.cc | 2 - src/DistMatrix/DistMatrix.cc | 4 + src/ExtendedGridOrbitals.cc | 29 +- src/Forces.cc | 2 - src/GramMatrix.cc | 5 +- src/Hamiltonian.cc | 22 +- src/HamiltonianMVPSolver.cc | 2 - src/HamiltonianMVP_DMStrategy.cc | 3 - src/KBPsiMatrixSparse.cc | 15 +- src/LocGridOrbitals.cc | 1 - src/LocalMatrices2ReplicatedMatrix.cc | 88 ++++ src/LocalMatrices2ReplicatedMatrix.h | 62 +++ src/MGmol.cc | 106 ++-- src/MVPSolver.cc | 4 +- src/MVP_DMStrategy.cc | 4 +- src/Power.cc | 3 + src/PowerGen.cc | 2 - src/ProjectedMatrices.cc | 43 +- src/ProjectedMatrices2N.cc | 2 - src/ProjectedMatricesMehrstellen.cc | 2 - src/ReplicatedMatrix.cc | 515 +++++++++++++++----- src/ReplicatedMatrix.h | 11 +- src/ReplicatedMatrix2SquareLocalMatrices.cc | 52 ++ src/ReplicatedMatrix2SquareLocalMatrices.h | 53 ++ src/ReplicatedVector.cc | 84 +++- src/ReplicatedVector.h | 7 +- src/ReplicatedWorkSpace.cc | 8 +- src/ReplicatedWorkSpace.h | 8 +- src/Rho.cc | 4 +- src/SP2.cc | 2 - src/computeHij.cc | 40 +- src/linear_algebra/lapack_c.h | 9 +- src/local_matrices/LocalMatrices.h | 2 +- src/quench.cc | 29 +- src/read_config.cc | 4 +- src/setup.cc | 25 +- tests/CMakeLists.txt | 9 + tests/DavidsonReplicated/davidson.cfg | 35 ++ 45 files changed, 1052 insertions(+), 355 deletions(-) create mode 100644 src/LocalMatrices2ReplicatedMatrix.cc create mode 100644 src/LocalMatrices2ReplicatedMatrix.h create mode 100644 src/ReplicatedMatrix2SquareLocalMatrices.cc create mode 100644 src/ReplicatedMatrix2SquareLocalMatrices.h create mode 100644 tests/DavidsonReplicated/davidson.cfg diff --git a/CMakeLists.txt b/CMakeLists.txt index e931b5cd..c1a853f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -217,6 +217,7 @@ FortranCInterface_HEADER( DGEMM SGEMM DSYMM DSYRK SSYRK DTRMM DTRSM STRSM #lapack DSYEV DPOTRF DPOTRS DPOTRI DSYGST DTRTRS DPOCON DSYGV DLANGE + DGETRF DGETRS DLACPY ) FortranCInterface_HEADER( diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3e1188fa..27caa790 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,8 @@ set(SOURCES mgmol_run.cc read_config.cc jade.cc + LocalMatrices2ReplicatedMatrix.cc + ReplicatedMatrix2SquareLocalMatrices.cc DielectricControl.cc ReplicatedMatrix.cc ReplicatedVector.cc diff --git a/src/ChebyshevApproximation.cc b/src/ChebyshevApproximation.cc index 093f1266..9ef65322 100644 --- a/src/ChebyshevApproximation.cc +++ b/src/ChebyshevApproximation.cc @@ -218,6 +218,4 @@ MatrixType ChebyshevApproximation::computeChebyshevApproximation( } template class ChebyshevApproximation>; -#ifdef HAVE_MAGMA template class ChebyshevApproximation; -#endif diff --git a/src/Control.cc b/src/Control.cc index 2980ce33..d6d26107 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -385,7 +385,7 @@ void Control::sync(void) short_buffer[47] = out_restart_file_naming_strategy; short_buffer[48] = enforceVmass0; short_buffer[49] = dm_inner_steps; - short_buffer[50] = -1; + short_buffer[50] = rmatrices; short_buffer[51] = fgmres_kim; short_buffer[52] = fgmres_maxits; short_buffer[53] = ilu_type; @@ -598,41 +598,41 @@ void Control::sync(void) out_restart_file_naming_strategy = short_buffer[47]; enforceVmass0 = short_buffer[48]; dm_inner_steps = short_buffer[49]; - //... = short_buffer[50]; - fgmres_kim = short_buffer[51]; - fgmres_maxits = short_buffer[52]; - ilu_type = short_buffer[53]; - ilu_lof = short_buffer[54]; - ilu_maxfil = short_buffer[55]; - coloring_algo_ = short_buffer[56]; - diel_flag_ = short_buffer[57]; - poisson_pc_nu1 = short_buffer[58]; - poisson_pc_nu2 = short_buffer[59]; - poisson_pc_nlev = short_buffer[60]; - system_charge_ = short_buffer[61]; - md_print_freq = short_buffer[62]; - use_kernel_functions = short_buffer[63]; - ngpts_[0] = short_buffer[64]; - ngpts_[1] = short_buffer[65]; - ngpts_[2] = short_buffer[66]; - computeCondGram_ = short_buffer[67]; - lrs_extrapolation = short_buffer[68]; - parallel_transport = (bool)short_buffer[69]; - with_spin_ = (bool)short_buffer[70]; - conv_criterion_ = short_buffer[71]; - load_balancing_max_iterations = short_buffer[72]; - load_balancing_modulo = short_buffer[73]; - write_clusters = short_buffer[74]; - DM_solver_ = short_buffer[75]; - dm_algo_ = short_buffer[80]; - dm_approx_order = short_buffer[81]; - dm_approx_ndigits = short_buffer[82]; - dm_approx_power_maxits = short_buffer[83]; - spread_penalty_type_ = short_buffer[84]; - dm_use_old_ = short_buffer[85]; - max_electronic_steps_tight_ = short_buffer[86]; - hartree_reset_ = short_buffer[88]; - MD_last_step_ = short_buffer[89]; + rmatrices = short_buffer[50]; + fgmres_kim = short_buffer[51]; + fgmres_maxits = short_buffer[52]; + ilu_type = short_buffer[53]; + ilu_lof = short_buffer[54]; + ilu_maxfil = short_buffer[55]; + coloring_algo_ = short_buffer[56]; + diel_flag_ = short_buffer[57]; + poisson_pc_nu1 = short_buffer[58]; + poisson_pc_nu2 = short_buffer[59]; + poisson_pc_nlev = short_buffer[60]; + system_charge_ = short_buffer[61]; + md_print_freq = short_buffer[62]; + use_kernel_functions = short_buffer[63]; + ngpts_[0] = short_buffer[64]; + ngpts_[1] = short_buffer[65]; + ngpts_[2] = short_buffer[66]; + computeCondGram_ = short_buffer[67]; + lrs_extrapolation = short_buffer[68]; + parallel_transport = (bool)short_buffer[69]; + with_spin_ = (bool)short_buffer[70]; + conv_criterion_ = short_buffer[71]; + load_balancing_max_iterations = short_buffer[72]; + load_balancing_modulo = short_buffer[73]; + write_clusters = short_buffer[74]; + DM_solver_ = short_buffer[75]; + dm_algo_ = short_buffer[80]; + dm_approx_order = short_buffer[81]; + dm_approx_ndigits = short_buffer[82]; + dm_approx_power_maxits = short_buffer[83]; + spread_penalty_type_ = short_buffer[84]; + dm_use_old_ = short_buffer[85]; + max_electronic_steps_tight_ = short_buffer[86]; + hartree_reset_ = short_buffer[88]; + MD_last_step_ = short_buffer[89]; poisson_lap_type_ = static_cast(short_buffer[90]); numst = int_buffer[0]; @@ -829,6 +829,7 @@ int Control::checkState() assert(wannier_transform_type == 0 || wannier_transform_type == 1 || wannier_transform_type == 2); assert(tmatrices == 1 || tmatrices == 0); + assert(rmatrices == 1 || rmatrices == 0); assert(mg_levels_ >= -1); assert(rho0_ > 0.); assert(drho0_ > 0.); @@ -1419,6 +1420,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) if (str.compare("exact") == 0) short_sighted = 0; tmatrices = vm["ProjectedMatrices.printMM"].as() ? 1 : 0; + rmatrices = vm["ProjectedMatrices.replicated"].as() ? 1 : 0; if (short_sighted) { diff --git a/src/Control.h b/src/Control.h index c9a128b3..7b1bca65 100644 --- a/src/Control.h +++ b/src/Control.h @@ -477,6 +477,9 @@ class Control // transfer matrix flag short tmatrices; + // replicated matrices + short rmatrices; + // Initialization with localized orbitals (1) or not (0) short init_loc; diff --git a/src/DMStrategyFactory.cc b/src/DMStrategyFactory.cc index 6dae6870..d7b1274d 100644 --- a/src/DMStrategyFactory.cc +++ b/src/DMStrategyFactory.cc @@ -33,6 +33,19 @@ DMStrategy* DMStrategyFactory +DMStrategy* DMStrategyFactory::createHamiltonianMVP_DMStrategy(MPI_Comm comm, + std::ostream& os, Ions& ions, Rho* rho, + Energy* energy, Electrostatic* electrostat, + MGmol* mgmol_strategy, + ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* orbitals, + const bool short_sighted) +{ + std::cerr << "Not implemented" << std::endl; + assert(0 == 1); +} + template <> DMStrategy* DMStrategyFactory>::createHamiltonianMVP_DMStrategy(MPI_Comm @@ -54,24 +67,21 @@ DMStrategy* DMStrategyFactory DMStrategy* DMStrategyFactory::createHamiltonianMVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, MGmol* mgmol_strategy, - ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* orbitals, - const bool short_sighted) + ProjectedMatricesInterface* /*proj_matrices*/, + ExtendedGridOrbitals* orbitals, const bool short_sighted) { (void)short_sighted; DMStrategy* dm_strategy = new HamiltonianMVP_DMStrategy, ExtendedGridOrbitals>(comm, os, - ions, rho, energy, electrostat, mgmol_strategy, - orbitals->getOverlappingGids()); + ProjectedMatrices, ExtendedGridOrbitals>( + comm, os, ions, rho, energy, electrostat, mgmol_strategy, orbitals); return dm_strategy; } -#endif diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index b5557f7e..b3c22d54 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -483,6 +483,7 @@ int DavidsonSolver::solve( ProjectedMatrices* projmatrices = dynamic_cast*>( orbitals.getProjMatrices()); + assert(projmatrices != nullptr); // get H*psi stored in work_orbitals // h11 computed at the same time @@ -850,6 +851,4 @@ void DavidsonSolver::printTimers(std::ostream& os) template class DavidsonSolver>; -#ifdef HAVE_MAGMA template class DavidsonSolver; -#endif diff --git a/src/DensityMatrix.cc b/src/DensityMatrix.cc index 7be19aab..0214f0c4 100644 --- a/src/DensityMatrix.cc +++ b/src/DensityMatrix.cc @@ -478,6 +478,4 @@ int DensityMatrix::read(HDFrestart& h5f_file, std::string& name) } template class DensityMatrix>; -#ifdef HAVE_MAGMA template class DensityMatrix; -#endif diff --git a/src/DistMatrix/DistMatrix.cc b/src/DistMatrix/DistMatrix.cc index 743bd2a5..a8064fcb 100644 --- a/src/DistMatrix/DistMatrix.cc +++ b/src/DistMatrix/DistMatrix.cc @@ -78,6 +78,8 @@ DistMatrix::DistMatrix(const std::string& name, const int m, const int n) bc_(*default_bc_), comm_global_(default_bc_->comm_global()) { + assert(default_bc_ != nullptr); + resize(m, n, distmatrix_def_block_size_, distmatrix_def_block_size_); } @@ -87,6 +89,8 @@ DistMatrix::DistMatrix(const std::string& name, const int m) bc_(*default_bc_), comm_global_(default_bc_->comm_global()) { + assert(default_bc_ != nullptr); + resize(m, m, distmatrix_def_block_size_, distmatrix_def_block_size_); } diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 27d9eb18..e8c49896 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -551,12 +551,12 @@ void ExtendedGridOrbitals::multiply_by_matrix( prod_matrix_tm_.stop(); } -#ifdef HAVE_MAGMA template <> void ExtendedGridOrbitals::multiply_by_matrix(const ReplicatedMatrix& matrix) { prod_matrix_tm_.start(); +#ifdef HAVE_MAGMA magma_trans_t magma_transa = magma_trans_const('n'); magma_trans_t magma_transb = magma_trans_const('n'); @@ -574,10 +574,19 @@ void ExtendedGridOrbitals::multiply_by_matrix(const ReplicatedMatrix& matrix) tmp, numst_ * lda_, block_vector_.vect(0)); MemorySpace::Memory::free(tmp); +#else + ORBDTYPE* tmp = MemorySpace::Memory::allocate( + numst_ * lda_); + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + block_vector_.vect(0), lda_, matrix.data(), matrix.ld(), 0., tmp, lda_); + + memcpy(block_vector_.vect(0), tmp, numst_ * lda_ * sizeof(ORBDTYPE)); + + MemorySpace::Memory::free(tmp); +#endif prod_matrix_tm_.stop(); } -#endif int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) { @@ -1258,7 +1267,6 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, incrementIterativeIndex(); bool multbymat = false; -#ifdef HAVE_MAGMA // try with ReplicatedMatrix first { ProjectedMatrices* projmatrices @@ -1275,7 +1283,6 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, multbymat = true; } } -#endif if (!multbymat) { ProjectedMatrices>* projmatrices @@ -1687,24 +1694,29 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( addDot_tm_.stop(); } -#ifdef HAVE_MAGMA template <> void ExtendedGridOrbitals::addDotWithNcol2Matrix( ExtendedGridOrbitals& Apsi, ReplicatedMatrix& matrix) const { addDot_tm_.start(); + ReplicatedMatrix tmp("tmp", numst_, numst_); + const double vel = grid_.vel(); + +#ifdef HAVE_MAGMA magma_trans_t magma_transa = magma_trans_const('t'); magma_trans_t magma_transb = magma_trans_const('n'); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - ReplicatedMatrix tmp("tmp", numst_, numst_); - const double vel = grid_.vel(); - magmablas_dgemm(magma_transa, magma_transb, numst_, numst_, numpt_, vel, block_vector_.vect(0), lda_, Apsi.getPsi(0), lda_, 0., tmp.data(), tmp.ld(), magma_singleton.queue_); +#else + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, vel, + block_vector_.vect(0), lda_, Apsi.getPsi(0), lda_, 0., tmp.data(), + tmp.ld()); +#endif tmp.consolidate(); @@ -1712,7 +1724,6 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( addDot_tm_.stop(); } -#endif void ExtendedGridOrbitals::computeGlobalIndexes() { diff --git a/src/Forces.cc b/src/Forces.cc index 04e6ffd7..4a7dfb84 100644 --- a/src/Forces.cc +++ b/src/Forces.cc @@ -416,14 +416,12 @@ void Forces::lforce(Ions& ions, RHODTYPE* rho) template SquareLocalMatrices Forces::getReplicatedDM() { -#ifdef HAVE_MAGMA { ProjectedMatrices* projmatrices = dynamic_cast*>( proj_matrices_); if (projmatrices) return projmatrices->getReplicatedDM(); } -#endif { ProjectedMatrices>* projmatrices = dynamic_cast< diff --git a/src/GramMatrix.cc b/src/GramMatrix.cc index 647ed766..1443d450 100644 --- a/src/GramMatrix.cc +++ b/src/GramMatrix.cc @@ -136,14 +136,13 @@ double GramMatrix>::computeCond() return cond; } -#ifdef HAVE_MAGMA template <> double GramMatrix::computeCond() { const double cond = 1; + return cond; } -#endif // mat is overwritten by inv(ls)*mat*inv(ls**T) template @@ -329,7 +328,5 @@ void GramMatrix::applyInv(VectorType& mat) template class GramMatrix>; template void GramMatrix>::applyInv( dist_matrix::DistVector&); -#ifdef HAVE_MAGMA template class GramMatrix; template void GramMatrix::applyInv(ReplicatedVector&); -#endif diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index 0bd773eb..bdd9b166 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -90,7 +90,7 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) #ifdef PRINT_OPERATIONS if (onpe0) (*MPIdata::sout) << "Hamiltonian::applyLocal() for " << ncolors - << " states" << endl; + << " states" << std::endl; #endif const Control& ct = *(Control::instance()); @@ -170,7 +170,8 @@ void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << std::endl; #endif phi1.addDotWithNcol2Matrix(*hlphi_, hij); @@ -185,7 +186,8 @@ void Hamiltonian::addHlocal2matrix( applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << std::endl; #endif // hij.print(std::cout, 0, 0, 5, 5); @@ -195,7 +197,6 @@ void Hamiltonian::addHlocal2matrix( // hij.print(std::cout, 0, 0, 5, 5); } -#ifdef HAVE_MAGMA template <> template <> void Hamiltonian::addHlocal2matrix( @@ -205,12 +206,13 @@ void Hamiltonian::addHlocal2matrix( applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHlocal2matrix() at line " + << __LINE__ << std::endl; #endif phi1.addDotWithNcol2Matrix(*hlphi_, hij); } -#endif template void Hamiltonian::addHlocalij( @@ -219,7 +221,9 @@ void Hamiltonian::addHlocalij( applyLocal(phi2); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHLocalij()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHLocalij() at line " << __LINE__ + << std::endl; #endif addHlocalij(phi1, proj_matrices); @@ -249,7 +253,9 @@ void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, applyLocal(phi2, force); #ifdef PRINT_OPERATIONS - if (onpe0) (*MPIdata::sout) << "Hamiltonian::addHLocalij()" << endl; + if (onpe0) + (*MPIdata::sout) << "Hamiltonian::addHLocalij() at line " << __LINE__ + << std::endl; #endif SquareLocalMatrices ss( diff --git a/src/HamiltonianMVPSolver.cc b/src/HamiltonianMVPSolver.cc index 23ae9867..891767d1 100644 --- a/src/HamiltonianMVPSolver.cc +++ b/src/HamiltonianMVPSolver.cc @@ -357,7 +357,5 @@ template class HamiltonianMVPSolver, template class HamiltonianMVPSolver, ProjectedMatrices>, ExtendedGridOrbitals>; -#ifdef HAVE_MAGMA template class HamiltonianMVPSolver, ExtendedGridOrbitals>; -#endif diff --git a/src/HamiltonianMVP_DMStrategy.cc b/src/HamiltonianMVP_DMStrategy.cc index be62b894..681568d5 100644 --- a/src/HamiltonianMVP_DMStrategy.cc +++ b/src/HamiltonianMVP_DMStrategy.cc @@ -98,14 +98,11 @@ void HamiltonianMVP_DMStrategy, ProjectedMatrices>, LocGridOrbitals>; - template class HamiltonianMVP_DMStrategy, ProjectedMatricesSparse, LocGridOrbitals>; template class HamiltonianMVP_DMStrategy, ProjectedMatrices>, ExtendedGridOrbitals>; -#ifdef HAVE_MAGMA template class HamiltonianMVP_DMStrategy, ExtendedGridOrbitals>; -#endif diff --git a/src/KBPsiMatrixSparse.cc b/src/KBPsiMatrixSparse.cc index 8f5b3d49..70859870 100644 --- a/src/KBPsiMatrixSparse.cc +++ b/src/KBPsiMatrixSparse.cc @@ -424,8 +424,6 @@ void KBPsiMatrixSparse::computeHvnlMatrix( ss2dm->accumulate(submat, hij, 0.); } -#ifdef HAVE_MAGMA - template <> void KBPsiMatrixSparse::computeHvnlMatrix( const KBPsiMatrixInterface* const kbpsi2, const Ions& ions, @@ -438,8 +436,6 @@ void KBPsiMatrixSparse::computeHvnlMatrix( hij.consolidate(); } -#endif - // build elements, one atom at a time SquareSubMatrix KBPsiMatrixSparse::computeHvnlMatrix( const KBPsiMatrixInterface* const kbpsi2, const Ions& ions) const @@ -624,9 +620,9 @@ double KBPsiMatrixSparse::getEvnl( return evnl * Ry2Ha; } -template <> -double KBPsiMatrixSparse::getEvnl(const Ions& ions, - ProjectedMatrices>* proj_matrices) +template +double KBPsiMatrixSparse::getEvnl( + const Ions& ions, ProjectedMatrices* proj_matrices) { SquareLocalMatrices dm( proj_matrices->getReplicatedDM()); @@ -742,3 +738,8 @@ template void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, ExtendedGridOrbitals& orbitals, const int first_color, const int nb_colors, const bool flag); template void KBPsiMatrixSparse::computeAll(const Ions&, ExtendedGridOrbitals&); + +template double KBPsiMatrixSparse::getEvnl(const Ions& ions, + ProjectedMatrices>* proj_matrices); +template double KBPsiMatrixSparse::getEvnl( + const Ions& ions, ProjectedMatrices* proj_matrices); diff --git a/src/LocGridOrbitals.cc b/src/LocGridOrbitals.cc index 11f2e291..8db9c1e0 100644 --- a/src/LocGridOrbitals.cc +++ b/src/LocGridOrbitals.cc @@ -736,7 +736,6 @@ void LocGridOrbitals::multiply_by_matrix( const dist_matrix::DistMatrix& dmatrix, ORBDTYPE* const product, const int ldp) { - ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); DISTMATDTYPE* work_matrix = wspace.square_matrix(); diff --git a/src/LocalMatrices2ReplicatedMatrix.cc b/src/LocalMatrices2ReplicatedMatrix.cc new file mode 100644 index 00000000..1b0c94d3 --- /dev/null +++ b/src/LocalMatrices2ReplicatedMatrix.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "LocalMatrices2ReplicatedMatrix.h" +#include "MGmol_MPI.h" + +LocalMatrices2ReplicatedMatrix* LocalMatrices2ReplicatedMatrix::pinstance_ + = nullptr; +std::vector> LocalMatrices2ReplicatedMatrix::global_indexes_; +double LocalMatrices2ReplicatedMatrix::tol_mat_elements = 1.e-14; + +Timer LocalMatrices2ReplicatedMatrix::convert_tm_( + "LocalMatrices2ReplicatedMatrix::convert"); + +void LocalMatrices2ReplicatedMatrix::convert( + const LocalMatrices& src, ReplicatedMatrix& dst, + const int numst, const double tol) const +{ + (void)tol; + + assert(!global_indexes_.empty()); + + convert_tm_.start(); + + const int subdiv = static_cast(global_indexes_.size()); + + std::vector val(subdiv); + + const short chromatic_number + = static_cast(global_indexes_[0].size()); + + std::vector data(numst * numst); + + // double loop over colors + for (short icolor = 0; icolor < chromatic_number; icolor++) + { + for (short jcolor = 0; jcolor < chromatic_number; jcolor++) + { + // loop over subdomains + for (short iloc = 0; iloc < subdiv; iloc++) + { + const int st1 = global_indexes_[iloc][icolor]; + //(*MPIdata::sout)<<"icolor="< + +// Add matrix elements corresponding to subdomains at their right place +// into a ReplicatedMatrix +// Important Note: Neglect contributions smaller than tol! +// (may lead to results dependent on number of CPUs) + +class LocalMatrices2ReplicatedMatrix +{ +private: + static LocalMatrices2ReplicatedMatrix* pinstance_; + + static Timer convert_tm_; + + static std::vector> global_indexes_; + + static double tol_mat_elements; + +public: + static LocalMatrices2ReplicatedMatrix* instance() + { + if (pinstance_ == nullptr) + { + pinstance_ = new LocalMatrices2ReplicatedMatrix(); + } + return pinstance_; + } + + LocalMatrices2ReplicatedMatrix() {} + + static void setup(const std::vector>& gids) + { + global_indexes_ = gids; + } + + void convert(const LocalMatrices& src, + ReplicatedMatrix& dst, const int numst, + const double tol = tol_mat_elements) const; + + void accumulate(const LocalMatrices& src, + ReplicatedMatrix& dst, const double tol = tol_mat_elements) const; + + static void printTimers(std::ostream& os) { convert_tm_.print(os); } +}; + +#endif diff --git a/src/MGmol.cc b/src/MGmol.cc index 8afa1331..1de0ef8d 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -39,6 +39,7 @@ #include "KBPsiMatrixSparse.h" #include "LBFGS.h" #include "LocGridOrbitals.h" +#include "LocalMatrices2ReplicatedMatrix.h" #include "LocalizationRegions.h" #include "MDfiles.h" #include "MGkernels.h" @@ -58,6 +59,7 @@ #include "ProjectedMatricesMehrstellen.h" #include "ProjectedMatricesSparse.h" #include "ReplicatedMatrix.h" +#include "ReplicatedMatrix2SquareLocalMatrices.h" #include "ReplicatedVector.h" #include "Rho.h" #include "SP2.h" @@ -236,22 +238,15 @@ int MGmol::initial() // initialize data distribution objects bool with_spin = (mmpi.nspin() > 1); - // we support using ReplicatedMatrix on GPU only for - // a limited set of options -#ifdef HAVE_MAGMA - bool use_replicated_matrix - = !std::is_same::value; -#endif + bool use_replicated_matrix = ct.rmatrices; if (ct.Mehrstellen()) { -#ifdef HAVE_MAGMA if (use_replicated_matrix) proj_matrices_.reset( new ProjectedMatricesMehrstellen( ct.numst, with_spin, ct.occ_width)); else -#endif proj_matrices_.reset(new ProjectedMatricesMehrstellen< dist_matrix::DistMatrix>( ct.numst, with_spin, ct.occ_width)); @@ -259,13 +254,10 @@ int MGmol::initial() else if (ct.short_sighted) proj_matrices_.reset(new ProjectedMatricesSparse( ct.numst, ct.occ_width, lrs_, local_cluster_.get())); - else -#ifdef HAVE_MAGMA - if (use_replicated_matrix) + else if (use_replicated_matrix) proj_matrices_.reset(new ProjectedMatrices( ct.numst, with_spin, ct.occ_width)); else -#endif proj_matrices_.reset( new ProjectedMatrices>( ct.numst, with_spin, ct.occ_width)); @@ -463,14 +455,12 @@ int MGmol::initial() updateHmatrix(*current_orbitals_, *ions_); // HMVP algorithm requires that H is initialized -#ifdef HAVE_MAGMA if (use_replicated_matrix) dm_strategy_.reset( DMStrategyFactory::create(comm_, os_, *ions_, rho_.get(), energy_.get(), electrostat_.get(), this, proj_matrices_.get(), current_orbitals_)); else -#endif dm_strategy_.reset(DMStrategyFactory>::create(comm_, os_, *ions_, rho_.get(), energy_.get(), electrostat_.get(), this, @@ -641,10 +631,12 @@ void MGmol::write_header() << (omp_get_max_threads() > 1 ? "s " : " "); os_ << "active" << std::endl << std::endl; #endif - - os_ << " ScaLapack block size: " - << dist_matrix::DistMatrix::getBlockSize() - << std::endl; + if (!ct.rmatrices) + { + os_ << " ScaLapack block size: " + << dist_matrix::DistMatrix::getBlockSize() + << std::endl; + } if (!ct.short_sighted) { @@ -752,7 +744,6 @@ void MGmol::printEigAndOcc() && onpe0) { bool printflag = false; -#ifdef HAVE_MAGMA // try with ReplicatedMatrix first { std::shared_ptr> projmatrices @@ -765,7 +756,6 @@ void MGmol::printEigAndOcc() printflag = true; } } -#endif if (!printflag) { std::shared_ptr< @@ -882,6 +872,9 @@ void MGmol::printTimers() dist_matrix::DistMatrix::printTimers(os_); + ReplicatedMatrix2SquareLocalMatrices::printTimers(os_); + LocalMatrices2ReplicatedMatrix::printTimers(os_); + MGmol_MPI::printTimers(os_); g_kbpsi_->printTimers(os_); @@ -941,18 +934,27 @@ void MGmol::printTimers() setup_tm_.print(os_); HDFrestart::printTimers(os_); #ifdef HAVE_MAGMA - PowerGen::printTimers(os_); BlockVector::printTimers(os_); +#endif + PowerGen::printTimers(os_); DavidsonSolver::printTimers(os_); ChebyshevApproximation::printTimers(os_); -#endif PowerGen, dist_matrix::DistVector>::printTimers(os_); BlockVector::printTimers(os_); - DavidsonSolver>::printTimers(os_); - ChebyshevApproximation>::printTimers( - os_); + if (ct.rmatrices) + { + DavidsonSolver::printTimers( + os_); + ChebyshevApproximation::printTimers(os_); + } + else + { + DavidsonSolver>::printTimers(os_); + ChebyshevApproximation< + dist_matrix::DistMatrix>::printTimers(os_); + } OrbitalsPreconditioning::printTimers(os_); MDfiles::printTimers(os_); ChebyshevApproximationInterface::printTimers(os_); @@ -1018,14 +1020,26 @@ double MGmol::get_evnl(const Ions& ions) } else { - std::shared_ptr< - ProjectedMatrices>> - projmatrices = std::dynamic_pointer_cast< - ProjectedMatrices>>( - proj_matrices_); - assert(projmatrices); + if (ct.rmatrices) + { + std::shared_ptr> projmatrices + = std::dynamic_pointer_cast< + ProjectedMatrices>(proj_matrices_); + assert(projmatrices); - val = g_kbpsi_->getEvnl(ions, projmatrices.get()); + val = g_kbpsi_->getEvnl(ions, projmatrices.get()); + } + else + { + std::shared_ptr< + ProjectedMatrices>> + projmatrices = std::dynamic_pointer_cast< + ProjectedMatrices>>( + proj_matrices_); + assert(projmatrices); + + val = g_kbpsi_->getEvnl(ions, projmatrices.get()); + } } evnl_tm_.stop(); @@ -1456,6 +1470,8 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, const std::vector& tau, const std::vector& atnumbers, std::vector& forces) { + Control& ct = *(Control::instance()); + OrbitalsType* dorbitals = dynamic_cast(orbitals); // create a new temporary Ions object to be used for @@ -1478,13 +1494,25 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, proj_matrices_->updateThetaAndHB(); // compute DM - std::shared_ptr> dm_strategy( - DMStrategyFactory>::create(comm_, os_, ions, - rho_.get(), energy_.get(), electrostat_.get(), this, - proj_matrices_.get(), dorbitals)); + if (ct.rmatrices) + { + std::shared_ptr> dm_strategy( + DMStrategyFactory::create(comm_, + os_, ions, rho_.get(), energy_.get(), electrostat_.get(), this, + proj_matrices_.get(), dorbitals)); - dm_strategy->update(*dorbitals); + dm_strategy->update(*dorbitals); + } + else + { + std::shared_ptr> dm_strategy( + DMStrategyFactory>::create(comm_, os_, ions, + rho_.get(), energy_.get(), electrostat_.get(), this, + proj_matrices_.get(), dorbitals)); + + dm_strategy->update(*dorbitals); + } // evaluate energy and forces double ts = 0.; diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 699a01f3..8cd4fc8a 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -404,8 +404,8 @@ void MVPSolver::printTimers(std::ostream& os) template class MVPSolver>; +template class MVPSolver; + template class MVPSolver>; -#ifdef HAVE_MAGMA template class MVPSolver; -#endif diff --git a/src/MVP_DMStrategy.cc b/src/MVP_DMStrategy.cc index 3fb53dee..e8737028 100644 --- a/src/MVP_DMStrategy.cc +++ b/src/MVP_DMStrategy.cc @@ -72,8 +72,8 @@ void MVP_DMStrategy::dressDM() } template class MVP_DMStrategy>; +template class MVP_DMStrategy; + template class MVP_DMStrategy>; -#ifdef HAVE_MAGMA template class MVP_DMStrategy; -#endif diff --git a/src/Power.cc b/src/Power.cc index 6a153fa9..58e268d0 100644 --- a/src/Power.cc +++ b/src/Power.cc @@ -9,6 +9,8 @@ #include "GramMatrix.h" #include "LocalVector.h" +#include "ReplicatedMatrix.h" +#include "ReplicatedVector.h" #include "SquareLocalMatrices.h" #include "mputils.h" #include "random.h" @@ -97,3 +99,4 @@ template class Power, SquareLocalMatrices>; template class Power, dist_matrix::DistMatrix>; +// template class Power; diff --git a/src/PowerGen.cc b/src/PowerGen.cc index 5305a8df..89482ca0 100644 --- a/src/PowerGen.cc +++ b/src/PowerGen.cc @@ -187,6 +187,4 @@ void PowerGen::computeGenEigenInterval(MatrixType& mat, template class PowerGen, dist_matrix::DistVector>; -#ifdef HAVE_MAGMA template class PowerGen; -#endif diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index 87c9fd48..29964c33 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -15,11 +15,13 @@ #include "DistMatrixTools.h" #include "HDFrestart.h" #include "LocalMatrices2DistMatrix.h" +#include "LocalMatrices2ReplicatedMatrix.h" #include "MGmol_MPI.h" #include "Orbitals.h" #include "Power.h" #include "PowerGen.h" #include "ReplicatedMatrix.h" +#include "ReplicatedMatrix2SquareLocalMatrices.h" #include "ReplicatedVector.h" #include "ReplicatedWorkSpace.h" #include "SP2.h" @@ -60,6 +62,7 @@ std::string ProjectedMatrices>::getMatrixType() // // conversion functions from one matrix format into another // +#ifndef HAVE_MAGMA void convert_matrix(const dist_matrix::DistMatrix& src, SquareLocalMatrices& dst) { @@ -67,7 +70,7 @@ void convert_matrix(const dist_matrix::DistMatrix& src, = DistMatrix2SquareLocalMatrices::instance(); dm2sl->convert(src, dst); } -#ifdef HAVE_MAGMA +#else void convert_matrix(const dist_matrix::DistMatrix& src, SquareLocalMatrices& dst) { @@ -78,19 +81,24 @@ void convert_matrix(const dist_matrix::DistMatrix& src, dst.assign(tmp); } +#endif +#ifndef HAVE_MAGMA void convert_matrix(const ReplicatedMatrix& src, SquareLocalMatrices& dst) { - src.get(dst.getRawPtr(), dst.m()); -} + assert(dst.m() > 0); + ReplicatedMatrix2SquareLocalMatrices* r2l + = ReplicatedMatrix2SquareLocalMatrices::instance(); + r2l->convert(src, dst); +} +#else void convert_matrix(const ReplicatedMatrix& src, SquareLocalMatrices& dst) { dst.assign(src); } - #endif //=====================================================================// @@ -147,17 +155,16 @@ void ProjectedMatrices>::convert( sl2dm->accumulate(src, dst); } -#ifdef HAVE_MAGMA template <> void ProjectedMatrices::convert( const SquareLocalMatrices& src, ReplicatedMatrix& dst) { - dst.init(src.getSubMatrix(), dim_); + LocalMatrices2ReplicatedMatrix* sl2rm + = LocalMatrices2ReplicatedMatrix::instance(); - dst.consolidate(); + sl2rm->accumulate(src, dst); } -#endif template <> void ProjectedMatrices>::setupMPI( @@ -171,13 +178,17 @@ void ProjectedMatrices>::setupMPI( LocalMatrices2DistMatrix::setup(comm, global_indexes); } -#ifdef HAVE_MAGMA template <> void ProjectedMatrices::setupMPI( const std::vector>& global_indexes) { + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + MPI_Comm comm = mmpi.commSpin(); + + LocalMatrices2ReplicatedMatrix::setup(global_indexes); + + ReplicatedMatrix2SquareLocalMatrices::setup(global_indexes); } -#endif template void ProjectedMatrices::setup( @@ -253,7 +264,7 @@ template void ProjectedMatrices::applyInvS( SquareLocalMatrices& mat) { - // build DistMatrix from SquareLocalMatrices + // build Matrix from SquareLocalMatrices convert(mat, *work_); gm_->applyInv(*work_); @@ -738,6 +749,7 @@ double ProjectedMatrices::checkCond( template int ProjectedMatrices::writeDM(HDFrestart& h5f_file) { + // std::cout << "ProjectedMatrices::writeDM()..." << std::endl; std::string name("/Density_Matrix"); return dm_->write(h5f_file, name); } @@ -1193,7 +1205,6 @@ void ProjectedMatrices>:: power.computeGenEigenInterval(mat, *gm_, interval, maxits, pad); } -#ifdef HAVE_MAGMA template <> void ProjectedMatrices::computeGenEigenInterval( std::vector& interval, const int maxits, const double pad) @@ -1204,7 +1215,6 @@ void ProjectedMatrices::computeGenEigenInterval( power.computeGenEigenInterval(mat, *gm_, interval, maxits, pad); } -#endif template <> void ProjectedMatrices>::consolidateH() @@ -1228,12 +1238,12 @@ void ProjectedMatrices>::consolidateH() consolidate_H_tm_.stop(); } -#ifdef HAVE_MAGMA template <> void ProjectedMatrices::consolidateH() { consolidate_H_tm_.start(); + // assign SquareLocalMatrices to matH_ matH_->assign(*localHl_); matH_->add(*localHnl_); @@ -1242,7 +1252,6 @@ void ProjectedMatrices::consolidateH() consolidate_H_tm_.stop(); } -#endif template void ProjectedMatrices::updateSubMatX(const MatrixType& dm) @@ -1261,7 +1270,6 @@ ProjectedMatrices>::getReplicatedDM() return sldm; } -#ifdef HAVE_MAGMA template <> SquareLocalMatrices ProjectedMatrices::getReplicatedDM() @@ -1272,9 +1280,6 @@ ProjectedMatrices::getReplicatedDM() return sldm; } -#endif template class ProjectedMatrices>; -#ifdef HAVE_MAGMA template class ProjectedMatrices; -#endif diff --git a/src/ProjectedMatrices2N.cc b/src/ProjectedMatrices2N.cc index 479ee4f0..d13196b2 100644 --- a/src/ProjectedMatrices2N.cc +++ b/src/ProjectedMatrices2N.cc @@ -68,6 +68,4 @@ void ProjectedMatrices2N::iterativeUpdateDMwithEigenstates( } template class ProjectedMatrices2N>; -#ifdef HAVE_MAGMA template class ProjectedMatrices2N; -#endif diff --git a/src/ProjectedMatricesMehrstellen.cc b/src/ProjectedMatricesMehrstellen.cc index 0d2893ba..b12aee94 100644 --- a/src/ProjectedMatricesMehrstellen.cc +++ b/src/ProjectedMatricesMehrstellen.cc @@ -99,6 +99,4 @@ void ProjectedMatricesMehrstellen::rotateAll( template class ProjectedMatricesMehrstellen< dist_matrix::DistMatrix>; -#ifdef HAVE_MAGMA template class ProjectedMatricesMehrstellen; -#endif diff --git a/src/ReplicatedMatrix.cc b/src/ReplicatedMatrix.cc index 27c7e31e..48a679a8 100644 --- a/src/ReplicatedMatrix.cc +++ b/src/ReplicatedMatrix.cc @@ -6,25 +6,37 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifdef HAVE_MAGMA - #include "ReplicatedMatrix.h" - +#include "LocalMatrices2ReplicatedMatrix.h" #include "ReplicatedVector.h" #include "memory_space.h" #include "random.h" +#ifdef HAVE_MAGMA #include "magma_v2.h" +using Memory = MemorySpace::Memory; +constexpr double gpuroundup = 32; +#else +#include "blas3_c.h" +#include "fc_mangle.h" +#include "lapack_c.h" +using Memory = MemorySpace::Memory; +#endif #include -using MemoryDev = MemorySpace::Memory; - -constexpr double gpuroundup = 32; - MPI_Comm ReplicatedMatrix::comm_ = MPI_COMM_NULL; bool ReplicatedMatrix::onpe0_ = false; +static int roundup(const int n) +{ +#ifdef HAVE_MAGMA + return magma_roundup(n, gpuroundup); +#else + return n; +#endif +} + void rotateSym(ReplicatedMatrix& mat, const ReplicatedMatrix& rotation_matrix, ReplicatedMatrix& work) { @@ -35,8 +47,8 @@ void rotateSym(ReplicatedMatrix& mat, const ReplicatedMatrix& rotation_matrix, ReplicatedMatrix::ReplicatedMatrix( const std::string name, const int m, const int n) : dim_(m), - ld_(magma_roundup(dim_, gpuroundup)), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free), + ld_(roundup(dim_)), + data_(Memory::allocate(dim_ * ld_), Memory::free), name_(name) { assert(m == n); @@ -46,8 +58,8 @@ ReplicatedMatrix::ReplicatedMatrix( ReplicatedMatrix::ReplicatedMatrix(const std::string name, const int n) : dim_(n), - ld_(magma_roundup(dim_, gpuroundup)), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free), + ld_(roundup(n)), + data_(Memory::allocate(dim_ * ld_), Memory::free), name_(name) { clear(); @@ -56,26 +68,37 @@ ReplicatedMatrix::ReplicatedMatrix(const std::string name, const int n) ReplicatedMatrix::ReplicatedMatrix(const std::string name, const double* const diagonal, const int m, const int n) : dim_(m), - ld_(magma_roundup(dim_, gpuroundup)), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free) + ld_(roundup(dim_)), + data_(Memory::allocate(dim_ * ld_), Memory::free), + name_(name) { clear(); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dsetvector( - dim_, diagonal, 1, device_data_.get(), ld_ + 1, magma_singleton.queue_); + dim_, diagonal, 1, data_.get(), ld_ + 1, magma_singleton.queue_); +#else + int ione = 1; + int incy = ld_ + 1; + DCOPY(&dim_, diagonal, &ione, data_.get(), &incy); +#endif } ReplicatedMatrix::ReplicatedMatrix(const ReplicatedMatrix& mat) : dim_(mat.dim_), ld_(mat.ld_), - device_data_(MemoryDev::allocate(dim_ * ld_), MemoryDev::free) + data_(Memory::allocate(dim_ * ld_), Memory::free) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(dim_, dim_, mat.device_data_.get(), mat.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix(dim_, dim_, mat.data_.get(), mat.ld_, data_.get(), ld_, + magma_singleton.queue_); +#else + memcpy(data_.get(), mat.data_.get(), ld_ * dim_ * sizeof(double)); +#endif } ReplicatedMatrix& ReplicatedMatrix::operator=(const ReplicatedMatrix& rhs) @@ -84,12 +107,16 @@ ReplicatedMatrix& ReplicatedMatrix::operator=(const ReplicatedMatrix& rhs) { ld_ = rhs.ld_; dim_ = rhs.dim_; - device_data_.reset(MemoryDev::allocate(dim_ * ld_)); + data_.reset(Memory::allocate(dim_ * ld_)); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(dim_, dim_, rhs.device_data_.get(), rhs.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix(dim_, dim_, rhs.data_.get(), rhs.ld_, data_.get(), + ld_, magma_singleton.queue_); +#else + memcpy(data_.get(), rhs.data_.get(), ld_ * dim_ * sizeof(double)); +#endif } return *this; } @@ -99,62 +126,119 @@ ReplicatedMatrix::~ReplicatedMatrix() {} void ReplicatedMatrix::getsub( const ReplicatedMatrix& src, int m, int n, int ia, int ja) { +#ifdef HAVE_MAGMA + auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(m, n, src.device_data_.get() + ja * src.ld_ + ia, src.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix(m, n, src.data_.get() + ja * src.ld_ + ia, src.ld_, + data_.get(), ld_, magma_singleton.queue_); +#else + char uplo = 'a'; + int lda = src.ld_; + int ldb = ld_; + DLACPY(&uplo, &m, &n, src.data_.get() + ja * src.ld_ + ia, &lda, + data_.get(), &ldb); +#endif } void ReplicatedMatrix::consolidate() { assert(comm_ != MPI_COMM_NULL); - std::vector mat(dim_ * dim_); - std::vector mat_sum(dim_ * dim_); + std::vector mat(dim_ * ld_); +#ifdef HAVE_MAGMA + std::vector mat_sum(dim_ * ld_); + double* mat_sum_data = mat_sum.data(); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); // copy from GPU to CPU - magma_dgetmatrix(dim_, dim_, device_data_.get(), ld_, mat.data(), dim_, - magma_singleton.queue_); - + magma_dgetmatrix( + dim_, dim_, data_.get(), ld_, mat.data(), ld_, magma_singleton.queue_); +#else + double* mat_sum_data = data_.get(); + memcpy(mat.data(), data_.get(), dim_ * ld_ * sizeof(double)); +#endif MPI_Allreduce( - mat.data(), mat_sum.data(), dim_ * dim_, MPI_DOUBLE, MPI_SUM, comm_); + mat.data(), mat_sum_data, dim_ * ld_, MPI_DOUBLE, MPI_SUM, comm_); +#ifdef HAVE_MAGMA // copy from CPU to GPU - magma_dsetmatrix(dim_, dim_, mat_sum.data(), dim_, device_data_.get(), ld_, + magma_dsetmatrix(dim_, dim_, mat_sum.data(), ld_, data_.get(), ld_, magma_singleton.queue_); +#endif } void ReplicatedMatrix::assign( const ReplicatedMatrix& src, const int ib, const int jb) { assert(this != &src); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(src.dim_, src.dim_, src.device_data_.get(), src.ld_, - device_data_.get() + jb * ld_ + ib, ld_, magma_singleton.queue_); + magma_dcopymatrix(src.dim_, src.dim_, src.data_.get(), src.ld_, + data_.get() + jb * ld_ + ib, ld_, magma_singleton.queue_); +#else + char uplo = 'a'; + int dim = src.dim_; + int lda = src.ld_; + int ldb = ld_; + DLACPY(&uplo, &dim, &dim, src.data_.get(), &lda, + data_.get() + jb * ld_ + ib, &ldb); +#endif } template <> void ReplicatedMatrix::assign( SquareLocalMatrices& src) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetmatrix(src.m(), src.n(), src.getSubMatrix(), src.n(), - device_data_.get(), ld_, magma_singleton.queue_); + magma_dsetmatrix(src.m(), src.n(), src.getSubMatrix(), src.n(), data_.get(), + ld_, magma_singleton.queue_); +#else + LocalMatrices2ReplicatedMatrix* l2r + = LocalMatrices2ReplicatedMatrix::instance(); + l2r->convert(src, *this, dim_, 0.); +#endif } template <> void ReplicatedMatrix::assign( SquareLocalMatrices& src) { + assert(src.n() == dim_); + + // current implementation restriction + assert(src.nmat() == 1); + +#ifdef HAVE_MAGMA + auto& magma_singleton = MagmaSingleton::get_magma_singleton(); + + magma_dcopymatrix(src.n(), src.n(), src.getRawPtr(), src.n(), data_.get(), + ld_, magma_singleton.queue_); +#else + // copy columns of matrix + for (int j = 0; j < dim_; j++) + memcpy(data_.get() + j * ld_, src.getRawPtr() + j * src.n(), + dim_ * sizeof(double)); +#endif +} + +void ReplicatedMatrix::assign(const double* const src, const int ld) +{ +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopymatrix(src.n(), src.n(), src.getRawPtr(), src.n(), - device_data_.get(), ld_, magma_singleton.queue_); + magma_dcopymatrix( + dim_, dim_, src, ld, data_.get(), ld_, magma_singleton.queue_); +#else + // copy columns of matrix + for (int j = 0; j < dim_; j++) + memcpy(data_.get() + j * ld_, src + j * ld, dim_ * sizeof(double)); +#endif } void ReplicatedMatrix::add(const SquareSubMatrix& mat) @@ -175,8 +259,9 @@ void ReplicatedMatrix::add(const SquareSubMatrix& mat) } } +#ifdef HAVE_MAGMA std::unique_ptr src_dev( - MemoryDev::allocate(dim_ * ld_), MemoryDev::free); + Memory::allocate(dim_ * ld_), Memory::free); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); @@ -185,40 +270,69 @@ void ReplicatedMatrix::add(const SquareSubMatrix& mat) magma_singleton.queue_); // add to object data - magmablas_dgeadd(dim_, dim_, 1., src_dev.get(), ld_, device_data_.get(), - ld_, magma_singleton.queue_); + magmablas_dgeadd(dim_, dim_, 1., src_dev.get(), ld_, data_.get(), ld_, + magma_singleton.queue_); +#else + double* data = data_.get(); + for (int j = 0; j < dim_; j++) + for (int i = 0; i < dim_; i++) + data[i + j * ld_] += src[i + j * n]; +#endif } void ReplicatedMatrix::init(const double* const ha, const int lda) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dsetmatrix( - dim_, dim_, ha, lda, device_data_.get(), ld_, magma_singleton.queue_); + dim_, dim_, ha, lda, data_.get(), ld_, magma_singleton.queue_); +#else + for (int j = 0; j < dim_; j++) + memcpy(data_.get() + ld_ * j, ha + lda * j, dim_ * sizeof(double)); +#endif } void ReplicatedMatrix::get(double* ha, const int lda) const { + assert(ha != nullptr); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dgetmatrix( - dim_, dim_, device_data_.get(), ld_, ha, lda, magma_singleton.queue_); + dim_, dim_, data_.get(), ld_, ha, lda, magma_singleton.queue_); +#else + for (int j = 0; j < dim_; j++) + memcpy(ha + lda * j, data_.get() + ld_ * j, dim_ * sizeof(double)); +#endif } void ReplicatedMatrix::getDiagonalValues(double* ha) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dgetvector( - dim_, device_data_.get(), ld_ + 1, ha, 1, magma_singleton.queue_); + magma_dgetvector(dim_, data_.get(), ld_ + 1, ha, 1, magma_singleton.queue_); +#else + int dim = dim_; + int incx = ld_ + 1; + int incy = 1; + DCOPY(&dim, data_.get(), &incx, ha, &incy); +#endif } void ReplicatedMatrix::axpy(const double alpha, const ReplicatedMatrix& a) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dgeadd(dim_, dim_, alpha, a.device_data_.get(), a.ld_, - device_data_.get(), ld_, magma_singleton.queue_); + magmablas_dgeadd(dim_, dim_, alpha, a.data_.get(), a.ld_, data_.get(), ld_, + magma_singleton.queue_); +#else + int dim = dim_ * ld_; + int ione = 1; + DAXPY(&dim, &alpha, a.data_.get(), &ione, data_.get(), &ione); +#endif } void ReplicatedMatrix::setRandom(const double minv, const double maxv) @@ -227,32 +341,52 @@ void ReplicatedMatrix::setRandom(const double minv, const double maxv) generateRandomData(mat, minv, maxv); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetmatrix(dim_, dim_, mat.data(), dim_, device_data_.get(), ld_, - magma_singleton.queue_); + magma_dsetmatrix( + dim_, dim_, mat.data(), dim_, data_.get(), ld_, magma_singleton.queue_); +#else + double* data = data_.get(); + for (int j = 0; j < dim_; j++) + for (int i = 0; i < dim_ * ld_; i++) + data[j * ld_ + i] = mat[j * dim_ + i]; +#endif } void ReplicatedMatrix::identity() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 1.0, device_data_.get(), ld_, + magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 1.0, data_.get(), ld_, magma_singleton.queue_); +#else + double* data = data_.get(); + memset(data, 0, dim_ * ld_ * sizeof(double)); + for (int i = 0; i < dim_; i++) + data[i * ld_ + i] = 1.; +#endif } void ReplicatedMatrix::scal(const double alpha) { + int size = dim_ * ld_; +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dscal( - dim_ * ld_, alpha, device_data_.get(), 1, magma_singleton.queue_); + magma_dscal(size, alpha, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + DSCAL(&size, &alpha, data_.get(), &ione); +#endif } // this = alpha * transpose(A) + beta * this void ReplicatedMatrix::transpose( const double alpha, const ReplicatedMatrix& a, const double beta) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); double* dwork; @@ -262,54 +396,87 @@ void ReplicatedMatrix::transpose( std::cerr << "magma_dmalloc failed!" << std::endl; } - magmablas_dtranspose(dim_, dim_, a.device_data_.get(), a.ld_, dwork, ld_, - magma_singleton.queue_); + magmablas_dtranspose( + dim_, dim_, a.data_.get(), a.ld_, dwork, ld_, magma_singleton.queue_); - magmablas_dgeadd2(dim_, dim_, alpha, dwork, ld_, beta, device_data_.get(), - ld_, magma_singleton.queue_); + magmablas_dgeadd2(dim_, dim_, alpha, dwork, ld_, beta, data_.get(), ld_, + magma_singleton.queue_); magma_singleton.sync(); magma_free(dwork); +#else + double* data = data_.get(); + double* adata = a.data_.get(); + for (int i = 0; i < dim_; i++) + { + for (int j = 0; j < dim_; j++) + { + data[j * ld_ + i] + = beta * data[j * ld_ + i] + alpha * adata[i * ld_ + j]; + } + } +#endif } void ReplicatedMatrix::gemm(const char transa, const char transb, const double alpha, const ReplicatedMatrix& a, const ReplicatedMatrix& b, const double beta) { +#ifdef HAVE_MAGMA magma_trans_t magma_transa = magma_trans_const(transa); magma_trans_t magma_transb = magma_trans_const(transb); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magmablas_dgemm(magma_transa, magma_transb, dim_, dim_, dim_, alpha, - a.device_data_.get(), a.ld_, b.device_data_.get(), b.ld_, beta, - device_data_.get(), ld_, magma_singleton.queue_); + a.data_.get(), a.ld_, b.data_.get(), b.ld_, beta, data_.get(), ld_, + magma_singleton.queue_); +#else + int ld = ld_; + int ald = a.ld_; + int bld = b.ld_; + DGEMM(&transa, &transb, &dim_, &dim_, &dim_, &alpha, a.data_.get(), &ald, + b.data_.get(), &bld, &beta, data_.get(), &ld); +#endif } void ReplicatedMatrix::symm(const char side, const char uplo, const double alpha, const ReplicatedMatrix& a, const ReplicatedMatrix& b, const double beta) { +#ifdef HAVE_MAGMA magma_side_t magma_side = magma_side_const(side); magma_uplo_t magma_uplo = magma_uplo_const(uplo); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsymm(magma_side, magma_uplo, dim_, dim_, alpha, a.device_data_.get(), - a.ld_, b.device_data_.get(), b.ld_, beta, device_data_.get(), ld_, - magma_singleton.queue_); + magma_dsymm(magma_side, magma_uplo, dim_, dim_, alpha, a.data_.get(), a.ld_, + b.data_.get(), b.ld_, beta, data_.get(), ld_, magma_singleton.queue_); +#else + int ld = ld_; + int ald = a.ld_; + int bld = b.ld_; + DSYMM(&side, &uplo, &dim_, &dim_, &alpha, a.data_.get(), &ald, + b.data_.get(), &bld, &beta, data_.get(), &ld); +#endif } int ReplicatedMatrix::potrf(char uplo) { - assert(device_data_.get()); + assert(data_.get()); + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; - magma_dpotrf_gpu(magma_uplo, dim_, device_data_.get(), ld_, &info); + magma_dpotrf_gpu(magma_uplo, dim_, data_.get(), ld_, &info); +#else + int ld = ld_; + DPOTRF(&uplo, &dim_, data_.get(), &ld, &info); +#endif if (info != 0) - std::cerr << "magma_dpotrf_gpu failed, info = " << info << std::endl; + std::cerr << "ReplicatedMatrix::potrf() failed, info = " << info + << std::endl; return info; } @@ -317,17 +484,27 @@ int ReplicatedMatrix::potrf(char uplo) void ReplicatedMatrix::getrf(std::vector& ipiv) { int info; - magma_dgetrf_gpu(dim_, dim_, device_data_.get(), ld_, ipiv.data(), &info); +#ifdef HAVE_MAGMA + magma_dgetrf_gpu(dim_, dim_, data_.get(), ld_, ipiv.data(), &info); +#else + int ld = ld_; + DGETRF(&dim_, &dim_, data_.get(), &ld, ipiv.data(), &info); +#endif if (info != 0) std::cerr << "magma_dgetrf_gpu failed, info = " << info << std::endl; } int ReplicatedMatrix::potri(char uplo) { + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; - magma_dpotri_gpu(magma_uplo, dim_, device_data_.get(), ld_, &info); + magma_dpotri_gpu(magma_uplo, dim_, data_.get(), ld_, &info); +#else + int ld = ld_; + DPOTRI(&uplo, &dim_, data_.get(), &ld, &info); +#endif if (info != 0) std::cerr << "magma_dpotri_gpu failed, info = " << info << std::endl; @@ -339,34 +516,51 @@ int ReplicatedMatrix::potri(char uplo) // A = U**T*U or A = L*L**T computed by potrf void ReplicatedMatrix::potrs(char uplo, ReplicatedMatrix& b) { + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; - magma_dpotrs_gpu(magma_uplo, dim_, dim_, device_data_.get(), ld_, - b.device_data_.get(), b.ld_, &info); - if (info != 0) - std::cerr << "magma_dpotrs_gpu failed, info = " << info << std::endl; + magma_dpotrs_gpu( + magma_uplo, dim_, 1, data_.get(), ld_, b.data(), dim_, &info); +#else + int ione = 1; + int ld = ld_; + DPOTRS(&uplo, &dim_, &ione, data_.get(), &ld, b.data(), &dim_, &info); +#endif + if (info != 0) std::cerr << "dpotrs failed, info = " << info << std::endl; } void ReplicatedMatrix::potrs(char uplo, ReplicatedVector& b) { + int info; +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); - int info; magma_dpotrs_gpu( - magma_uplo, dim_, 1, device_data_.get(), ld_, b.data(), dim_, &info); - if (info != 0) - std::cerr << "magma_dpotrs_gpu failed, info = " << info << std::endl; + magma_uplo, dim_, 1, data_.get(), ld_, b.data(), dim_, &info); +#else + int ione = 1; + int ld = ld_; + DPOTRS(&uplo, &dim_, &ione, data_.get(), &ld, b.data(), &dim_, &info); +#endif + if (info != 0) std::cerr << "dpotrs failed, info = " << info << std::endl; } void ReplicatedMatrix::getrs( char trans, ReplicatedMatrix& b, std::vector& ipiv) { + int info; +#ifdef HAVE_MAGMA magma_trans_t magma_trans = magma_trans_const(trans); - int info; - magma_dgetrs_gpu(magma_trans, dim_, dim_, device_data_.get(), ld_, - ipiv.data(), b.device_data_.get(), b.ld_, &info); + magma_dgetrs_gpu(magma_trans, dim_, dim_, data_.get(), ld_, ipiv.data(), + b.data_.get(), b.ld_, &info); +#else + int ld = ld_; + int bld = b.ld_; + DGETRS(&trans, &dim_, &dim_, data_.get(), &ld, ipiv.data(), b.data_.get(), + &bld, &info); +#endif if (info != 0) std::cerr << "magma_dgetrs_gpu failed, info = " << info << std::endl; } @@ -374,27 +568,37 @@ void ReplicatedMatrix::getrs( void ReplicatedMatrix::syev( char jobz, char uplo, std::vector& evals, ReplicatedMatrix& z) { + int info; +#ifdef HAVE_MAGMA magma_vec_t magma_jobz = magma_vec_const(jobz); magma_uplo_t magma_uplo = magma_uplo_const(uplo); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); // copy matrix into z - magmablas_dlacpy(MagmaFull, dim_, dim_, device_data_.get(), ld_, - z.device_data_.get(), z.ld_, magma_singleton.queue_); + magmablas_dlacpy(MagmaFull, dim_, dim_, data_.get(), ld_, z.data_.get(), + z.ld_, magma_singleton.queue_); magma_int_t nb = magma_get_ssytrd_nb(dim_); magma_int_t lwork = std::max(2 * dim_ + dim_ * nb, 1 + 6 * dim_ + 2 * dim_ * dim_); + int liwork = 3 + 5 * dim_; - int info; std::vector wa(dim_ * dim_); std::vector work(lwork); std::vector iwork(liwork); - magma_dsyevd_gpu(magma_jobz, magma_uplo, dim_, z.device_data_.get(), z.ld_, + magma_dsyevd_gpu(magma_jobz, magma_uplo, dim_, z.data_.get(), z.ld_, evals.data(), wa.data(), dim_, work.data(), lwork, iwork.data(), liwork, &info); +#else + memcpy(z.data_.get(), data_.get(), dim_ * ld_ * sizeof(double)); + int lwork = 3 * dim_ - 1; + std::vector work(lwork); + int zld = z.ld_; + DSYEV(&jobz, &uplo, &dim_, z.data_.get(), &zld, evals.data(), work.data(), + &lwork, &info); +#endif if (info != 0) std::cerr << "magma_dsyevd_gpu failed, info = " << info << std::endl; // for(auto& d : evals)std::cout<(itype); - int info; - magma_dsygst_gpu(magma_itype, magma_uplo, dim_, device_data_.get(), ld_, - b.device_data_.get(), b.ld_, &info); + magma_dsygst_gpu(magma_itype, magma_uplo, dim_, data_.get(), ld_, + b.data_.get(), b.ld_, &info); +#else + int ld = ld_; + int bld = b.ld_; + DSYGST(&itype, &uplo, &dim_, data_.get(), &ld, b.data_.get(), &bld, &info); +#endif if (info != 0) std::cerr << "magma_dsygst_gpu failed, info = " << info << std::endl; } @@ -415,6 +625,7 @@ void ReplicatedMatrix::sygst(int itype, char uplo, const ReplicatedMatrix& b) void ReplicatedMatrix::trmm(const char side, const char uplo, const char trans, const char diag, const double alpha, const ReplicatedMatrix& a) { +#ifdef HAVE_MAGMA magma_side_t magma_side = magma_side_const(side); magma_uplo_t magma_uplo = magma_uplo_const(uplo); magma_trans_t magma_trans = magma_trans_const(trans); @@ -423,13 +634,19 @@ void ReplicatedMatrix::trmm(const char side, const char uplo, const char trans, auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dtrmm(magma_side, magma_uplo, magma_trans, magma_diag, dim_, dim_, - alpha, a.device_data_.get(), a.ld_, device_data_.get(), ld_, - magma_singleton.queue_); + alpha, a.data_.get(), a.ld_, data_.get(), ld_, magma_singleton.queue_); +#else + int ld = ld_; + int ald = a.ld_; + DTRMM(&side, &uplo, &trans, &diag, &dim_, &dim_, &alpha, a.data_.get(), + &ald, data_.get(), &ld); +#endif } void ReplicatedMatrix::trtrs(const char uplo, const char trans, const char diag, ReplicatedMatrix& b) const { +#ifdef HAVE_MAGMA magma_uplo_t magma_uplo = magma_uplo_const(uplo); magma_trans_t magma_trans = magma_trans_const(trans); magma_diag_t magma_diag = magma_diag_const(diag); @@ -437,94 +654,142 @@ void ReplicatedMatrix::trtrs(const char uplo, const char trans, const char diag, auto& magma_singleton = MagmaSingleton::get_magma_singleton(); magma_dtrsm(MagmaLeft, magma_uplo, magma_trans, magma_diag, dim_, dim_, 1., - device_data_.get(), ld_, b.device_data_.get(), b.ld_, - magma_singleton.queue_); + data_.get(), ld_, b.data_.get(), b.ld_, magma_singleton.queue_); +#else + double one = 1.; + char side = 'L'; + int ld = ld_; + int bld = b.ld_; + DTRSM(&side, &uplo, &trans, &diag, &dim_, &dim_, &one, data_.get(), &ld, + b.data_.get(), &bld); +#endif } // get max in absolute value of column j int ReplicatedMatrix::iamax(const int j, double& val) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - int indx = magma_idamax(dim_, device_data_.get() + j * ld_, 1, - magma_singleton.queue_) - - 1; - magma_dgetvector(dim_, device_data_.get() + j * ld_ + indx, 1, &val, 1, - magma_singleton.queue_); + int indx + = magma_idamax(dim_, data_.get() + j * ld_, 1, magma_singleton.queue_) + - 1; + magma_dgetvector( + dim_, data_.get() + j * ld_ + indx, 1, &val, 1, magma_singleton.queue_); + return indx; +#else + int ione = 1; + int indx = IDAMAX(&dim_, data_.get() + j * ld_, &ione) - 1; + val = *(data_.get() + j * ld_ + indx); +#endif return indx; } void ReplicatedMatrix::setVal(const int i, const int j, const double val) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetvector(dim_, &val, 1, device_data_.get() + j * ld_ + i, 1, - magma_singleton.queue_); + // this call does not look correct... + magma_dsetvector( + dim_, &val, 1, data_.get() + j * ld_ + i, 1, magma_singleton.queue_); +#else + *(data_.get() + j * ld_ + i) = val; +#endif } void ReplicatedMatrix::setDiagonal(const std::vector& diag_values) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetvector(dim_, diag_values.data(), 1, device_data_.get(), ld_ + 1, + magma_dsetvector(dim_, diag_values.data(), 1, data_.get(), ld_ + 1, magma_singleton.queue_); +#else + double* data = data_.get(); + for (int i = 0; i < dim_; i++) + data[i * (ld_ + 1)] = diag_values[i]; +#endif } double ReplicatedMatrix::trace() const { + const std::vector val(dim_, 1.); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); // this is a little contorted, but it works for now... std::unique_ptr tmp_dev( - MemoryDev::allocate(dim_ * ld_), MemoryDev::free); - const std::vector val(dim_, 1.); + Memory::allocate(dim_ * ld_), Memory::free); magma_dsetvector( dim_, val.data(), 1, tmp_dev.get(), 1, magma_singleton.queue_); - return magma_ddot(dim_, device_data_.get(), ld_ + 1, tmp_dev.get(), 1, - magma_singleton.queue_); + return magma_ddot( + dim_, data_.get(), ld_ + 1, tmp_dev.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + int ldp = ld_ + 1; + return DDOT(&dim_, data_.get(), &ldp, val.data(), &ione); +#endif } double ReplicatedMatrix::traceProduct(const ReplicatedMatrix& matrix) const { + double trace = 0.; + +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - double trace = 0.; for (int i = 0; i < dim_; i++) - trace += magma_ddot(dim_, device_data_.get() + i, ld_, - matrix.device_data_.get() + matrix.ld_ * i, 1, - magma_singleton.queue_); + trace += magma_ddot(dim_, data_.get() + i, ld_, + matrix.data_.get() + matrix.ld_ * i, 1, magma_singleton.queue_); +#else + int ione = 1; + int ld = ld_; + for (int i = 0; i < dim_; i++) + trace += DDOT(&dim_, data_.get() + i, &ld, + matrix.data_.get() + matrix.ld_ * i, &ione); +#endif return trace; } double ReplicatedMatrix::norm(char ty) { +#ifdef HAVE_MAGMA magma_norm_t magma_ty = magma_norm_const(ty); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - int lwork = dim_; double* dwork; - magma_dmalloc(&dwork, lwork); - double norm_val = magmablas_dlange(magma_ty, dim_, dim_, device_data_.get(), - ld_, dwork, lwork, magma_singleton.queue_); + magma_dmalloc(&dwork, dim_); + double norm_val = magmablas_dlange(magma_ty, dim_, dim_, data_.get(), ld_, + dwork, lwork, magma_singleton.queue_); magma_singleton.sync(); magma_free(dwork); return norm_val; +#else + std::vector dwork(dim_); + int ld = ld_; + return DLANGE(&ty, &dim_, &dim_, data_.get(), &ld, dwork.data()); +#endif } void ReplicatedMatrix::trset(const char uplo) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); std::vector mat(dim_ * dim_); - magma_dgetmatrix(dim_, dim_, device_data_.get(), ld_, mat.data(), dim_, - magma_singleton.queue_); + magma_dgetmatrix( + dim_, dim_, data_.get(), ld_, mat.data(), dim_, magma_singleton.queue_); +#else + double* mat = data_.get(); +#endif if (uplo == 'l' || uplo == 'L') { @@ -539,16 +804,22 @@ void ReplicatedMatrix::trset(const char uplo) mat[i + j * dim_] = 0.; } - magma_dsetmatrix(dim_, dim_, mat.data(), dim_, device_data_.get(), ld_, - magma_singleton.queue_); +#ifdef HAVE_MAGMA + magma_dsetmatrix( + dim_, dim_, mat.data(), dim_, data_.get(), ld_, magma_singleton.queue_); +#endif } void ReplicatedMatrix::clear() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 0.0, device_data_.get(), ld_, + magmablas_dlaset(MagmaFull, dim_, dim_, 0.0, 0.0, data_.get(), ld_, magma_singleton.queue_); +#else + memset(data_.get(), 0, dim_ * ld_ * sizeof(double)); +#endif } void ReplicatedMatrix::print(std::ostream& os, const int ia, const int ja, @@ -557,13 +828,16 @@ void ReplicatedMatrix::print(std::ostream& os, const int ia, const int ja, const int m = std::min(ma, std::max(dim_ - ia, 0)); const int n = std::min(na, std::max(dim_ - ja, 0)); +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); std::vector mat(dim_ * dim_); - magma_dgetmatrix(dim_, dim_, device_data_.get(), ld_, mat.data(), dim_, - magma_singleton.queue_); - + magma_dgetmatrix( + dim_, dim_, data_.get(), ld_, mat.data(), dim_, magma_singleton.queue_); +#else + const double* const mat = data_.get(); +#endif if (onpe0_) for (int i = ia; i < m; i++) { @@ -573,5 +847,12 @@ void ReplicatedMatrix::print(std::ostream& os, const int ia, const int ja, } } +// add shift to diagonal, to shift eigenvalues +void ReplicatedMatrix::shift(const double shift) +{ + double* mat = data_.get(); + for (int i = 0; i < dim_; i++) + mat[i + i * dim_] += shift; +} + void ReplicatedMatrix::printMM(std::ostream& os) const {} -#endif diff --git a/src/ReplicatedMatrix.h b/src/ReplicatedMatrix.h index 79c50599..f34914db 100644 --- a/src/ReplicatedMatrix.h +++ b/src/ReplicatedMatrix.h @@ -9,8 +9,6 @@ #ifndef MGMOL_REPLICATEDMATRIX_H #define MGMOL_REPLICATEDMATRIX_H -#ifdef HAVE_MAGMA - class ReplicatedVector; #include "SquareLocalMatrices.h" #include "SquareSubMatrix.h" @@ -32,7 +30,7 @@ class ReplicatedMatrix size_t ld_; // matrix data - std::unique_ptr device_data_; + std::unique_ptr data_; std::string name_; @@ -63,7 +61,7 @@ class ReplicatedMatrix std::string name() { return name_; } - double* const data() const { return device_data_.get(); } + double* data() const { return data_.get(); } int m() const { return dim_; } @@ -78,6 +76,8 @@ class ReplicatedMatrix } ReplicatedMatrix& operator=(const ReplicatedMatrix& rhs); + void assign(const double* const src, const int ld); + void assign(const ReplicatedMatrix& src, const int ib, const int jb); template @@ -126,6 +126,7 @@ class ReplicatedMatrix int iamax(const int j, double& val); double norm(char ty); double traceProduct(const ReplicatedMatrix&) const; + void shift(const double); void print( std::ostream& os, const int, const int, const int, const int) const; @@ -138,5 +139,3 @@ class ReplicatedMatrix void rotateSym(ReplicatedMatrix&, const ReplicatedMatrix&, ReplicatedMatrix&); #endif - -#endif diff --git a/src/ReplicatedMatrix2SquareLocalMatrices.cc b/src/ReplicatedMatrix2SquareLocalMatrices.cc new file mode 100644 index 00000000..3a2b0855 --- /dev/null +++ b/src/ReplicatedMatrix2SquareLocalMatrices.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#include "ReplicatedMatrix2SquareLocalMatrices.h" + +ReplicatedMatrix2SquareLocalMatrices* + ReplicatedMatrix2SquareLocalMatrices::pinstance_ + = nullptr; +std::vector> + ReplicatedMatrix2SquareLocalMatrices::global_indexes_; + +Timer ReplicatedMatrix2SquareLocalMatrices::convert_tm_( + "ReplicatedMatrix2SquareLocalMatrices::convert"); + +void ReplicatedMatrix2SquareLocalMatrices::convert(const ReplicatedMatrix& rmat, + SquareLocalMatrices& lmat) +{ + convert_tm_.start(); + + const short nd = lmat.nmat(); + const int dim = lmat.n(); + const int nst = rmat.ld(); + + for (short i = 0; i < nd; i++) + { + double* dst = lmat.getSubMatrix(i); + double* src = rmat.data(); + for (int jj = 0; jj < dim; jj++) + { + const int st2 = global_indexes_[i][jj]; + if (st2 != -1) + { + for (int ii = 0; ii < dim; ii++) + { + const int st1 = global_indexes_[i][ii]; + if (st1 != -1) + { + dst[ii + dim * jj] = src[st1 + nst * st2]; + } + } + } + } + } + + convert_tm_.stop(); +} diff --git a/src/ReplicatedMatrix2SquareLocalMatrices.h b/src/ReplicatedMatrix2SquareLocalMatrices.h new file mode 100644 index 00000000..b3e05005 --- /dev/null +++ b/src/ReplicatedMatrix2SquareLocalMatrices.h @@ -0,0 +1,53 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#ifndef MGMOL_ReplicatedMatrix2SquareLocalMatrices_H +#define MGMOL_ReplicatedMatrix2SquareLocalMatrices_H + +#include "ReplicatedMatrix.h" +#include "SquareLocalMatrices.h" +#include "Timer.h" + +#include +#include + +class ReplicatedMatrix2SquareLocalMatrices +{ + static ReplicatedMatrix2SquareLocalMatrices* pinstance_; + + static Timer convert_tm_; + + static std::vector> global_indexes_; + +public: + static ReplicatedMatrix2SquareLocalMatrices* instance() + { + if (pinstance_ == nullptr) + { + pinstance_ = new ReplicatedMatrix2SquareLocalMatrices(); + } + return pinstance_; + } + + ReplicatedMatrix2SquareLocalMatrices() {} + + static void setup(const std::vector>& gids) + { + global_indexes_ = gids; + } + + ~ReplicatedMatrix2SquareLocalMatrices() {} + + void convert(const ReplicatedMatrix& dmat, + SquareLocalMatrices& lmat); + + static void printTimers(std::ostream& os) { convert_tm_.print(os); } +}; + +#endif diff --git a/src/ReplicatedVector.cc b/src/ReplicatedVector.cc index 85fdb69e..1d9c495c 100644 --- a/src/ReplicatedVector.cc +++ b/src/ReplicatedVector.cc @@ -6,92 +6,132 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifdef HAVE_MAGMA - #include "ReplicatedVector.h" #include "memory_space.h" +#ifdef HAVE_MAGMA #include "magma_v2.h" +#else +#include "blas2_c.h" +#endif +#ifdef HAVE_MAGMA using MemoryDev = MemorySpace::Memory; +#else +using MemoryDev = MemorySpace::Memory; +#endif ReplicatedVector::ReplicatedVector(const std::string name, const int n) - : dim_(n), device_data_(MemoryDev::allocate(dim_), MemoryDev::free) + : dim_(n), data_(MemoryDev::allocate(dim_), MemoryDev::free) { } ReplicatedVector::ReplicatedVector(const ReplicatedVector& v) - : dim_(v.dim_), device_data_(MemoryDev::allocate(dim_), MemoryDev::free) + : dim_(v.dim_), data_(MemoryDev::allocate(dim_), MemoryDev::free) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopy(dim_, v.device_data_.get(), 1, device_data_.get(), 1, - magma_singleton.queue_); + magma_dcopy(dim_, v.data_.get(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memcpy(data_.get(), v.data_.get(), dim_ * sizeof(double)); +#endif } ReplicatedVector::ReplicatedVector(const std::vector& v) - : dim_(v.size()), device_data_(MemoryDev::allocate(dim_), MemoryDev::free) + : dim_(v.size()), data_(MemoryDev::allocate(dim_), MemoryDev::free) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dsetvector( - dim_, v.data(), 1, device_data_.get(), 1, magma_singleton.queue_); + magma_dsetvector(dim_, v.data(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memcpy(data_.get(), v.data(), dim_ * sizeof(double)); +#endif } ReplicatedVector& ReplicatedVector::operator=(const ReplicatedVector& src) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_dcopy(dim_, src.device_data_.get(), 1, device_data_.get(), 1, - magma_singleton.queue_); + magma_dcopy( + dim_, src.data_.get(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memcpy(data_.get(), src.data_.get(), dim_ * sizeof(double)); +#endif return *this; } void ReplicatedVector::axpy(const double alpha, const ReplicatedVector& x) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magma_daxpy(dim_, alpha, x.device_data_.get(), 1, device_data_.get(), 1, - magma_singleton.queue_); + magma_daxpy( + dim_, alpha, x.data_.get(), 1, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + double a = alpha; + DAXPY(&dim_, &a, x.data_.get(), &ione, data_.get(), &ione); +#endif } void ReplicatedVector::clear() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); std::vector zero(dim_, 0.); magma_dsetvector( - dim_, zero.data(), 1, device_data_.get(), 1, magma_singleton.queue_); + dim_, zero.data(), 1, data_.get(), 1, magma_singleton.queue_); +#else + memset(data_.get(), 0., dim_ * sizeof(double)); +#endif } double ReplicatedVector::nrm2() { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - return magma_dnrm2(dim_, device_data_.get(), 1, magma_singleton.queue_); + return magma_dnrm2(dim_, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + return DNRM2(&dim_, data_.get(), &ione); +#endif } double ReplicatedVector::dot(const ReplicatedVector& v) { +#ifdef HAVE_MAGMA auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - return magma_ddot(dim_, device_data_.get(), 1, v.device_data_.get(), 1, - magma_singleton.queue_); + return magma_ddot( + dim_, data_.get(), 1, v.data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + return DDOT(&dim_, data_.get(), &ione, v.data_.get(), &ione); +#endif } void ReplicatedVector::gemv(const char trans, const double alpha, const ReplicatedMatrix& a, const ReplicatedVector& b, const double beta) { +#ifdef HAVE_MAGMA magma_trans_t magma_trans = magma_trans_const(trans); auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - magmablas_dgemv(magma_trans, dim_, dim_, alpha, a.device_data_.get(), a.ld_, - b.device_data_.get(), 1, beta, device_data_.get(), 1, - magma_singleton.queue_); -} - + magmablas_dgemv(magma_trans, dim_, dim_, alpha, a.data_.get(), a.ld_, + b.data_.get(), 1, beta, data_.get(), 1, magma_singleton.queue_); +#else + int ione = 1; + int lda = a.ld_; + DGEMV(&trans, &dim_, &dim_, &alpha, a.data_.get(), &lda, b.data_.get(), + &ione, &beta, data_.get(), &ione); #endif +} diff --git a/src/ReplicatedVector.h b/src/ReplicatedVector.h index 4140a73b..0b437944 100644 --- a/src/ReplicatedVector.h +++ b/src/ReplicatedVector.h @@ -9,8 +9,6 @@ #ifndef MGMOL_REPLICATEDVECTOR_H #define MGMOL_REPLICATEDVECTOR_H -#ifdef HAVE_MAGMA - #include "ReplicatedMatrix.h" #include @@ -19,14 +17,14 @@ class ReplicatedVector { int dim_; - std::unique_ptr device_data_; + std::unique_ptr data_; public: ReplicatedVector(const std::string name, const int n); ReplicatedVector(const ReplicatedVector&); ReplicatedVector(const std::vector&); ReplicatedVector& operator=(const ReplicatedVector&); - double* data() { return device_data_.get(); } + double* data() { return data_.get(); } void clear(); double dot(const ReplicatedVector& v); double nrm2(); @@ -38,6 +36,5 @@ class ReplicatedVector void gemm(const char transa, const char transb, const double alpha, const ReplicatedMatrix&, const ReplicatedVector&, const double beta); }; -#endif #endif diff --git a/src/ReplicatedWorkSpace.cc b/src/ReplicatedWorkSpace.cc index b6b5cb83..da76e2f7 100644 --- a/src/ReplicatedWorkSpace.cc +++ b/src/ReplicatedWorkSpace.cc @@ -12,6 +12,7 @@ #include "MGmol_MPI.h" #include "MGmol_blas1.h" +#include #include template @@ -54,13 +55,14 @@ void ReplicatedWorkSpace::initSquareMatrix( distmat.allgather(square_matrix_, ndim_); } -#ifdef HAVE_MAGMA template void ReplicatedWorkSpace::initSquareMatrix( const ReplicatedMatrix& mat) { - mat.get(square_matrix_, ndim_ * ndim_); + assert(square_matrix_ != nullptr); + assert(ndim_ > 0); + + mat.get(square_matrix_, ndim_); } -#endif template class ReplicatedWorkSpace; diff --git a/src/ReplicatedWorkSpace.h b/src/ReplicatedWorkSpace.h index 918481f9..c4d66f59 100644 --- a/src/ReplicatedWorkSpace.h +++ b/src/ReplicatedWorkSpace.h @@ -40,7 +40,11 @@ class ReplicatedWorkSpace } ReplicatedWorkSpace(const ReplicatedWorkSpace&); - ~ReplicatedWorkSpace() { delete[] square_matrix_; } + ~ReplicatedWorkSpace() + { + delete[] square_matrix_; + square_matrix_ = nullptr; + } public: Timer mpisum_tm() { return mpisum_tm_; } @@ -70,9 +74,7 @@ class ReplicatedWorkSpace void setUpperTriangularSquareMatrixToZero(); void initSquareMatrix(const dist_matrix::DistMatrix& tmat); -#ifdef HAVE_MAGMA void initSquareMatrix(const ReplicatedMatrix& mat); -#endif int getDim() { return ndim_; } }; diff --git a/src/Rho.cc b/src/Rho.cc index e80e74fe..bd89b6a2 100644 --- a/src/Rho.cc +++ b/src/Rho.cc @@ -592,10 +592,10 @@ template void Rho::computeRho>( template double Rho::dotWithRho( const float* const func) const; #endif -#ifdef HAVE_MAGMA template void Rho::computeRho( ExtendedGridOrbitals&, const ReplicatedMatrix&); template void Rho::computeRho( ExtendedGridOrbitals&, ExtendedGridOrbitals&, const ReplicatedMatrix&, const ReplicatedMatrix&, const ReplicatedMatrix&, const ReplicatedMatrix&); -#endif +template void Rho::computeRho( + LocGridOrbitals&, const ReplicatedMatrix&); diff --git a/src/SP2.cc b/src/SP2.cc index fa007212..6ff7625a 100644 --- a/src/SP2.cc +++ b/src/SP2.cc @@ -231,7 +231,6 @@ void SP2::getDM(dist_matrix::DistMatrix& submatM, // output getdm_tm_.stop(); } -#ifdef HAVE_MAGMA template <> void SP2::getDM(ReplicatedMatrix& submatM, // output const ReplicatedMatrix& invS) @@ -247,4 +246,3 @@ void SP2::getDM(ReplicatedMatrix& submatM, // output getdm_tm_.stop(); } -#endif diff --git a/src/computeHij.cc b/src/computeHij.cc index 16f74653..b93f05a2 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -34,8 +34,8 @@ void MGmol::addHlocal2matrix(LocGridOrbitals& orbitalsi, { computeHij_tm_.start(); -#if DEBUG - os_ << " addHlocal2matrix()" << endl; +#ifdef PRINT_OPERATIONS + os_ << " addHlocal2matrix() at line " << __LINE__ << std::endl; #endif hamiltonian_->addHlocal2matrix(orbitalsi, orbitalsj, mat, true); @@ -43,6 +43,15 @@ void MGmol::addHlocal2matrix(LocGridOrbitals& orbitalsi, computeHij_tm_.stop(); } +template <> +template <> +void MGmol::addHlocal2matrix( + LocGridOrbitals& orbitalsi, LocGridOrbitals& orbitalsj, ReplicatedMatrix& H) +{ + std::cerr << "Not implemented!" << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); +} + template <> template <> void MGmol::computeHij(LocGridOrbitals& orbitals_i, @@ -52,7 +61,7 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, const bool consolidate) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif // compute phi_i^T*Hnl*Phi_j @@ -93,7 +102,7 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, const bool consolidate) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif kbpsi->computeHvnlMatrix(ions, mat); @@ -146,7 +155,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, dist_matrix::DistMatrix& hij) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif hij.clear(); @@ -203,7 +212,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, const KBPsiMatrixSparse* const kbpsi, dist_matrix::DistMatrix& hij) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line" << __LINE__ << std::endl; #endif SquareSubMatrix submat(kbpsi->computeHvnlMatrix(ions)); @@ -222,7 +231,7 @@ void MGmol::computeHij(OrbitalsType& orbitals_i, ProjectedMatricesInterface* projmatrices) { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHij()" << endl; + if (onpe0) os_ << "computeHij() at line " << __LINE__ << std::endl; #endif kbpsi->computeHvnlMatrix(ions, projmatrices); @@ -259,7 +268,7 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, { // H_nl #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "computeHnlPhiAndAdd2HPhi()" << endl; + if (onpe0) os_ << "computeHnlPhiAndAdd2HPhi()" << std::endl; #endif Control& ct = *(Control::instance()); @@ -346,8 +355,8 @@ void MGmol::addHlocal2matrix( { computeHij_tm_.start(); -#if DEBUG - os_ << " addHlocal2matrix()" << endl; +#ifdef PRINT_OPERATIONS + os_ << " addHlocal2matrix()" << std::endl; #endif // add local H to mat @@ -373,8 +382,8 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, const int phi_it_index = phi.getIterativeIndex(); -#if DEBUG - os_ << " getHpsiAndTheta" << endl; +#ifdef PRINT_OPERATIONS + os_ << " getHpsiAndTheta" << std::endl; #endif hphi.assign(hamiltonian_->applyLocal(phi)); @@ -386,13 +395,14 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, { #ifdef PRINT_OPERATIONS if (onpe0) - os_ << "Hij matrix up to date, no computation necessary" << endl; + os_ << "Hij matrix up to date, no computation necessary" + << std::endl; #endif } else { #ifdef PRINT_OPERATIONS - if (onpe0) os_ << "build matrix Hij = Phi**T * H * Phi" << endl; + if (onpe0) os_ << "build matrix Hij = Phi**T * H * Phi" << std::endl; #endif proj_matrices_->clearSparseH(); @@ -426,8 +436,6 @@ template void MGmol::addHlocal2matrix( template void MGmol::addHlocal2matrix( LocGridOrbitals& orbitalsi, LocGridOrbitals& orbitalsj, dist_matrix::DistMatrix&); -#ifdef HAVE_MAGMA template void MGmol::addHlocal2matrix( ExtendedGridOrbitals& orbitalsi, ExtendedGridOrbitals& orbitalsj, ReplicatedMatrix& mat); -#endif diff --git a/src/linear_algebra/lapack_c.h b/src/linear_algebra/lapack_c.h index 4a72f48f..3f275c84 100644 --- a/src/linear_algebra/lapack_c.h +++ b/src/linear_algebra/lapack_c.h @@ -13,6 +13,8 @@ #include "fc_mangle.h" typedef const char* const Pchar; +typedef const int* const Pint; +typedef const double* const Pdouble; #define dsygst DSYGST #define dtrtrs DTRTRS @@ -24,15 +26,15 @@ extern "C" { void DSYEV(Pchar, Pchar, const int* const, double*, const int* const, double*, double*, const int* const, int*); - void dsygv(const int* const, Pchar, Pchar, const int* const, double*, + void DSYGV(const int* const, Pchar, Pchar, const int* const, double*, const int* const, double*, const int* const, double*, double*, const int* const, int*); void DPOTRI(Pchar, const int* const, double*, const int* const, int*); void DPOTRF(Pchar, const int* const, double*, const int* const, int*); void DPOTRS(Pchar, const int* const, const int* const, double*, const int* const, double*, const int* const, int*); - void dgetrf(int*, int*, double*, int*, int*, int*); - void dgetrs(Pchar, int*, int*, double*, int*, int*, double*, int*, int*); + void DGETRF(int*, int*, double*, int*, int*, int*); + void DGETRS(Pchar, int*, int*, double*, int*, int*, double*, int*, int*); void dpocon(Pchar, const int* const, double*, const int* const, double*, double*, double*, const int* const, int*); void dtrtrs(Pchar, Pchar, Pchar, const int* const, const int* const, @@ -43,6 +45,7 @@ extern "C" void dgesvd(Pchar, Pchar, int*, int*, double*, int*, double*, double*, int*, double*, int*, double*, int*, int*); double dlange(Pchar, int*, int*, double*, int*, double*); + void DLACPY(Pchar, Pint, Pint, Pdouble, Pint, Pdouble, Pint); } #endif diff --git a/src/local_matrices/LocalMatrices.h b/src/local_matrices/LocalMatrices.h index d2aacf8f..615cfab1 100644 --- a/src/local_matrices/LocalMatrices.h +++ b/src/local_matrices/LocalMatrices.h @@ -71,7 +71,7 @@ class LocalMatrices int m() const { return m_; } - const DataType* getSubMatrix(const int iloc = 0) const + DataType* getSubMatrix(const int iloc = 0) const { assert(iloc < (int)ptr_matrices_.size()); assert(ptr_matrices_[iloc] != NULL); diff --git a/src/quench.cc b/src/quench.cc index 6eed74bb..561ea263 100644 --- a/src/quench.cc +++ b/src/quench.cc @@ -497,16 +497,27 @@ int MGmol::outerSolve(OrbitalsType& orbitals, MGmol_MPI& mmpi = *(MGmol_MPI::instance()); const bool with_spin = (mmpi.nspin() > 1); -#ifdef HAVE_MAGMA - DavidsonSolver -#else - DavidsonSolver> -#endif - solver(os_, *ions_, hamiltonian_.get(), rho_.get(), - energy_.get(), electrostat_.get(), this, gids, ct.dm_mix, - with_spin); + if (ct.rmatrices) + { + DavidsonSolver + + solver(os_, *ions_, hamiltonian_.get(), rho_.get(), + energy_.get(), electrostat_.get(), this, gids, + ct.dm_mix, with_spin); + + retval = solver.solve(orbitals, work_orbitals); + } + else + { + DavidsonSolver> - retval = solver.solve(orbitals, work_orbitals); + solver(os_, *ions_, hamiltonian_.get(), rho_.get(), + energy_.get(), electrostat_.get(), this, gids, + ct.dm_mix, with_spin); + + retval = solver.solve(orbitals, work_orbitals); + } break; } diff --git a/src/read_config.cc b/src/read_config.cc index 3ca9ec69..9e9bad39 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -176,7 +176,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, "solver for projected matrices")("ProjectedMatrices.printMM", po::value()->default_value(false), "print projected matrices in MM format")( - "LocalizationRegions.radius", + "ProjectedMatrices.replicated", + po::value()->default_value(false), + "use replicated projected matrices")("LocalizationRegions.radius", po::value()->default_value(1000.), "Localization regions radius")("LocalizationRegions.adaptive", po::value()->default_value(true), diff --git a/src/setup.cc b/src/setup.cc index 2ff6d79e..8ad59f9e 100644 --- a/src/setup.cc +++ b/src/setup.cc @@ -82,25 +82,26 @@ int MGmol::setupFromInput(const std::string filename) // data if (!ct.short_sighted) { - MatricesBlacsContext::instance().setup(mmpi.commSpin(), ct.numst); + ReplicatedWorkSpace::instance().setup(ct.numst); - dist_matrix::DistMatrix::setBlockSize(64); + if (!ct.rmatrices) + { + MatricesBlacsContext::instance().setup(mmpi.commSpin(), ct.numst); - dist_matrix::DistMatrix::setDefaultBlacsContext( - MatricesBlacsContext::instance().bcxt()); + dist_matrix::DistMatrix::setBlockSize(64); - ReplicatedWorkSpace::instance().setup(ct.numst); + dist_matrix::DistMatrix::setDefaultBlacsContext( + MatricesBlacsContext::instance().bcxt()); - dist_matrix::SparseDistMatrix::setNumTasksPerPartitioning( - 128); + dist_matrix::SparseDistMatrix< + DISTMATDTYPE>::setNumTasksPerPartitioning(128); - int npes = mmpi.size(); - setSparseDistMatriConsolidationNumber(npes); + int npes = mmpi.size(); + setSparseDistMatriConsolidationNumber(npes); + } } -#ifdef HAVE_MAGMA - ReplicatedMatrix::setMPIcomm(mmpi.commSpin()); -#endif + if (ct.rmatrices) ReplicatedMatrix::setMPIcomm(mmpi.commSpin()); LocGridOrbitals::setDotProduct(ct.dot_product_type); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4e4a365d..9fa70160 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -242,6 +242,7 @@ add_executable(testGramMatrix ${CMAKE_SOURCE_DIR}/src/GramMatrix.cc ${CMAKE_SOURCE_DIR}/src/Power.cc ${CMAKE_SOURCE_DIR}/src/magma_singleton.cc + ${CMAKE_SOURCE_DIR}/src/LocalMatrices2ReplicatedMatrix.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/LocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/SquareLocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/ReplicatedMatrix.cc @@ -265,6 +266,7 @@ add_executable(testDensityMatrix ${CMAKE_SOURCE_DIR}/src/magma_singleton.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/LocalMatrices.cc ${CMAKE_SOURCE_DIR}/src/local_matrices/SquareLocalMatrices.cc + ${CMAKE_SOURCE_DIR}/src/LocalMatrices2ReplicatedMatrix.cc ${CMAKE_SOURCE_DIR}/src/ReplicatedMatrix.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/DistMatrix.cc ${CMAKE_SOURCE_DIR}/src/DistMatrix/BlacsContext.cc @@ -487,6 +489,13 @@ add_test(NAME Davidson ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/davidson.cfg ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME DavidsonReplicated + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/DavidsonReplicated/davidson.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/Davidson/coords.in + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testSpinO2 COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/SpinO2/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} diff --git a/tests/DavidsonReplicated/davidson.cfg b/tests/DavidsonReplicated/davidson.cfg new file mode 100644 index 00000000..dc093a07 --- /dev/null +++ b/tests/DavidsonReplicated/davidson.cfg @@ -0,0 +1,35 @@ +verbosity=2 +xcFunctional=LDA +FDtype=4th +[Mesh] +nx=32 +ny=32 +nz=32 +[Domain] +ox=0. +oy=0. +oz=0. +lx=15.3 +ly=15.3 +lz=15.3 +[Potentials] +pseudopotential=pseudo.Al_LDA_FHI +[Poisson] +solver=CG +[Run] +type=QUENCH +[Quench] +solver=Davidson +max_steps=200 +atol=1.e-8 +[Orbitals] +nempty=10 +initial_type=random +temperature=300. +[ProjectedMatrices] +solver=exact +replicated=true +[DensityMatrix] +nb_inner_it=2 +[Restart] +output_level=0 From 9654f734496ce0a6dc9339ab2e3c2b55bcd5c135 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 12 Sep 2025 08:43:45 -0400 Subject: [PATCH 69/99] Add timers for handwritten linear algebra loops --- src/MGmol.cc | 9 +++++++-- src/linear_algebra/mputils.cc | 22 +++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index 1de0ef8d..ded126b9 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -95,9 +95,11 @@ extern Timer dsyrk_tm; extern Timer ssyrk_tm; extern Timer mpsyrk_tm; extern Timer tttsyrk_tm; -extern Timer mpdot_tm; extern Timer ttdot_tm; +extern Timer loopdot_tm; extern Timer loopaxpy_tm; +extern Timer loopscal_tm; +extern Timer loopcp_tm; extern Timer get_NOLMO_tm; extern Timer get_MLWF_tm; extern Timer md_iterations_tm; @@ -860,9 +862,12 @@ void MGmol::printTimers() dsyrk_tm.print(os_); mpsyrk_tm.print(os_); tttsyrk_tm.print(os_); - mpdot_tm.print(os_); ttdot_tm.print(os_); + + loopcp_tm.print(os_); loopaxpy_tm.print(os_); + loopscal_tm.print(os_); + loopdot_tm.print(os_); dist_matrix::SubMatrices::printTimers(os_); diff --git a/src/linear_algebra/mputils.cc b/src/linear_algebra/mputils.cc index 53daee46..b49ba366 100644 --- a/src/linear_algebra/mputils.cc +++ b/src/linear_algebra/mputils.cc @@ -38,9 +38,13 @@ Timer bligemm_tm("bligemm"); Timer dsyrk_tm("dsyrk"); Timer ssyrk_tm("ssyrk"); -Timer mpdot_tm("mpdot"); Timer ttdot_tm("ttdot"); + +// Timers for hand written loops +Timer loopdot_tm("loopdot"); Timer loopaxpy_tm("loopaxpy"); +Timer loopscal_tm("loopscal"); +Timer loopcp_tm("loopcp"); /* Function definitions. See mputils.h for comments */ @@ -70,6 +74,8 @@ void LAU_H::MPscal(const int len, const double scal, double* dptr) template <> void LAU_H::MPscal(const int len, const double scal, float* dptr) { + loopscal_tm.start(); + MemorySpace::assert_is_host_ptr(dptr); if (scal == 1.) @@ -86,6 +92,8 @@ void LAU_H::MPscal(const int len, const double scal, float* dptr) dptr[k] = static_cast(scal * val); } } + + loopscal_tm.stop(); } // MemorySpace::Device @@ -159,7 +167,7 @@ double LAU_H::MPdot( MemorySpace::assert_is_host_ptr(xptr); MemorySpace::assert_is_host_ptr(yptr); - mpdot_tm.start(); + loopdot_tm.start(); double dot = 0.; for (int k = 0; k < len; k++) @@ -169,7 +177,7 @@ double LAU_H::MPdot( dot += val1 * val2; } - mpdot_tm.stop(); + loopdot_tm.stop(); return dot; } @@ -811,14 +819,22 @@ void MPcpy(float* const dest, const float* const src, const int n) void MPcpy( double* __restrict__ dest, const float* __restrict__ src, const int n) { + loopcp_tm.start(); + for (int i = 0; i < n; i++) dest[i] = src[i]; + + loopcp_tm.stop(); } void MPcpy( float* __restrict__ dest, const double* __restrict__ src, const int n) { + loopcp_tm.start(); + for (int i = 0; i < n; i++) dest[i] = src[i]; + + loopcp_tm.stop(); } template void LAU_H::MPgemm(const char transa, From c91738fb0684107b8ce13c9f484bdc20d3487fa9 Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Fri, 24 Oct 2025 10:29:20 -0400 Subject: [PATCH 70/99] Skip data reset before assigning arrays w ghosts * costly, and not necessary since data is assigned right after --- src/BlockVector.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 671ba90d..941fa614 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -493,8 +493,6 @@ void BlockVector::setDataWithGhosts( set_data_tm_.start(); - data_wghosts->resetData(); - data_wghosts->set_updated_boundaries(false); // get number of mesh points From e1fed75961e69827491b96008d883da670d458bb Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Fri, 24 Oct 2025 15:17:22 -0400 Subject: [PATCH 71/99] Add timers in BlockVector class --- src/BlockVector.cc | 35 +++++++++++++++++++++++++++++++---- src/BlockVector.h | 20 ++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 941fa614..386b19f8 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -262,9 +262,7 @@ BlockVector::BlockVector( setup(bv); - if (copy_data) - MemorySpace::Memory::copy( - bv.storage_, size_storage_, storage_); + if (copy_data) copyFrom(bv); } template @@ -278,10 +276,21 @@ BlockVector::operator=( setup(bv); + copyFrom(bv); + + return *this; +} + +template +void BlockVector::copyFrom( + const BlockVector& bv) +{ + copy_tm_.start(); + MemorySpace::Memory::copy( bv.storage_, size_storage_, storage_); - return *this; + copy_tm_.stop(); } template @@ -289,12 +298,17 @@ BlockVector& BlockVector::operator-=( const BlockVector& src) { + opminus_tm_.start(); + for (unsigned int i = 0; i < vect_.size(); i++) { ScalarType* vi = vect_[i]; ScalarType const* const si = src.vect_[i]; BV::subtract(numel_, si, vi); } + + opminus_tm_.stop(); + return *this; } @@ -303,11 +317,15 @@ template void BlockVector::assign( const pb::GridFuncVector& src) { + assign_tm_.start(); + for (unsigned int i = 0; i < vect_.size(); i++) { ScalarType* dest = vect_[i]; src.template getValues(i, dest); } + + assign_tm_.stop(); } template @@ -396,7 +414,11 @@ void BlockVector::scal(const double alpha) { assert(storage_ != nullptr); + scal_tm_.start(); + LinearAlgebraUtils::MPscal(size_storage_, alpha, storage_); + + scal_tm_.stop(); } template @@ -453,6 +475,7 @@ void BlockVector::axpy( LinearAlgebraUtils::MPaxpy( locnumel_, alpha, vect_[ix] + shift, vect_[iy] + shift); } + template void BlockVector::axpy(const double alpha, BlockVector& bv, const int ix, const int iy, @@ -519,6 +542,10 @@ void BlockVector::printTimers(std::ostream& os) { set_data_tm_.print(os); trade_data_tm_.print(os); + assign_tm_.print(os); + scal_tm_.print(os); + opminus_tm_.print(os); + copy_tm_.print(os); } template diff --git a/src/BlockVector.h b/src/BlockVector.h index 47cbef2f..e28b02ce 100644 --- a/src/BlockVector.h +++ b/src/BlockVector.h @@ -30,6 +30,10 @@ class BlockVector { static Timer set_data_tm_; static Timer trade_data_tm_; + static Timer assign_tm_; + static Timer scal_tm_; + static Timer opminus_tm_; + static Timer copy_tm_; static short n_instances_; static short subdivx_; @@ -88,6 +92,8 @@ class BlockVector ~BlockVector(); + void copyFrom(const BlockVector& bv); + const pb::GridFuncVector& getDataWGhosts() { assert(data_wghosts_ != 0); @@ -307,4 +313,18 @@ Timer BlockVector::set_data_tm_( template Timer BlockVector::trade_data_tm_( "BlockVector::trade_data"); + +template +Timer BlockVector::assign_tm_( + "BlockVector::assign"); + +template +Timer BlockVector::scal_tm_("BlockVector::scal"); + +template +Timer BlockVector::opminus_tm_( + "BlockVector::opminus"); + +template +Timer BlockVector::copy_tm_("BlockVector::copy"); #endif From dce67c636cb77f6447b822c1a00172bbcb450a9a Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Mon, 27 Oct 2025 11:02:42 -0400 Subject: [PATCH 72/99] Optimize preconditioner implementation * conversion between data type and storage type was taking a substantial amount of time --- src/OrbitalsPreconditioning.cc | 69 ++++++++++++++++++++++------------ src/OrbitalsPreconditioning.h | 16 +++++--- src/pb/GridFuncVector.cc | 21 ++++++++++- src/pb/GridFuncVector.h | 12 +++++- 4 files changed, 87 insertions(+), 31 deletions(-) diff --git a/src/OrbitalsPreconditioning.cc b/src/OrbitalsPreconditioning.cc index 3a5061df..79aa5ee4 100644 --- a/src/OrbitalsPreconditioning.cc +++ b/src/OrbitalsPreconditioning.cc @@ -27,17 +27,6 @@ OrbitalsPreconditioning::~OrbitalsPreconditioning() delete precond_; delete map2masks_; - - if (gfv_work_ != nullptr) - { - delete gfv_work_; - gfv_work_ = nullptr; - } - if (gfv_work2_ != nullptr) - { - delete gfv_work2_; - gfv_work2_ = nullptr; - } } template @@ -71,12 +60,24 @@ void OrbitalsPreconditioning::setup(T& orbitals, const short mg_levels, assert(orbitals.chromatic_number() == static_cast(orbitals.getOverlappingGids()[0].size())); - gfv_work_ = new pb::GridFuncVector(mygrid, - ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], orbitals.getOverlappingGids()); + gfv_work1_ + = std::shared_ptr>( + new pb::GridFuncVector(mygrid, + ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids())); gfv_work2_ - = new pb::GridFuncVector(mygrid, - ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], orbitals.getOverlappingGids()); + = std::shared_ptr>( + new pb::GridFuncVector(mygrid, + ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids())); + + if (!std::is_same::value) + gfv_work3_ + = std::shared_ptr>( + new pb::GridFuncVector(mygrid, + ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids())); is_set_ = true; @@ -89,24 +90,46 @@ void OrbitalsPreconditioning::precond_mg(T& orbitals) assert(is_set_); assert(precond_ != nullptr); assert(gamma_ > 0.); - assert(gfv_work_ != nullptr); + assert(gfv_work1_); #ifdef PRINT_OPERATIONS if (onpe0) (*MPIdata::sout) << "T::precond_mg()..." << endl; #endif precond_tm_.start(); - gfv_work_->resetData(); + // initialize gfv_work2_ with data from orbitals + if (std::is_same::value) + { + orbitals.setDataWithGhosts(gfv_work2_.get()); + } + else + { + // Convert to data with ghosts first, then convert to different + // precision. This is more efficient in practice than doing precision + // conversion in setDataWithGhosts + orbitals.setDataWithGhosts(gfv_work3_.get()); - // store residual in GridFuncVector container - // used for ghost values (no ghost values needed) - orbitals.setDataWithGhosts(gfv_work2_); - gfv_work_->axpy((MGPRECONDTYPE)gamma_, *gfv_work2_); + gfv_work2_->copyFrom(*gfv_work3_); + } + + gfv_work1_->resetData(); + gfv_work1_->axpy((MGPRECONDTYPE)gamma_, *gfv_work2_); // block-implemented preconditioner - precond_->mg(*gfv_work_, *gfv_work2_, lap_type_, 0); + precond_->mg(*gfv_work1_, *gfv_work2_, lap_type_, 0); + + if (std::is_same::value) + { + orbitals.setPsi(*gfv_work1_); + } + else + { + // Convert to orbitals precision first + gfv_work3_->copyFrom(*gfv_work1_); - orbitals.setPsi(*gfv_work_); + // set orbitals to GridFuncVector second + orbitals.setPsi(*gfv_work3_); + } #ifdef PRINT_OPERATIONS if (onpe0) diff --git a/src/OrbitalsPreconditioning.h b/src/OrbitalsPreconditioning.h index 7a26261e..6937d4f9 100644 --- a/src/OrbitalsPreconditioning.h +++ b/src/OrbitalsPreconditioning.h @@ -34,9 +34,15 @@ class OrbitalsPreconditioning #endif Preconditioning* precond_; - pb::GridFuncVector* gfv_work_; - pb::GridFuncVector* gfv_work2_; + // work arrays with preconditioner precision + std::shared_ptr> + gfv_work1_; + std::shared_ptr> + gfv_work2_; + + // tmp work array for case ORBDTYPE!=MGPRECONDTYPE + std::shared_ptr> gfv_work3_; short lap_type_; @@ -53,10 +59,8 @@ class OrbitalsPreconditioning public: OrbitalsPreconditioning() { - is_set_ = false; - precond_ = nullptr; - gfv_work_ = nullptr; - gfv_work2_ = nullptr; + is_set_ = false; + precond_ = nullptr; }; ~OrbitalsPreconditioning(); diff --git a/src/pb/GridFuncVector.cc b/src/pb/GridFuncVector.cc index 7d9e375e..5c3fbda5 100644 --- a/src/pb/GridFuncVector.cc +++ b/src/pb/GridFuncVector.cc @@ -1643,7 +1643,7 @@ void GridFuncVector::extend3D( template GridFuncVector& GridFuncVector::operator-=( - const GridFuncVector& func) + const GridFuncVector& func) { assert(func.grid_.sizeg() == grid_.sizeg()); assert(func.grid_.ghost_pt() == grid_.ghost_pt()); @@ -1657,6 +1657,20 @@ GridFuncVector::operator-=( return *this; } +template +template +void GridFuncVector::copyFrom( + const GridFuncVector& src) +{ + copy_tm_.start(); + + MPcpy(memory_.get(), src.getDataPtr(0), nfunc_ * grid_.sizeg()); + + updated_boundaries_ = src.getUpdatedBoundariesFlag(); + + copy_tm_.stop(); +} + template template void GridFuncVector::axpy(const ScalarType2 alpha, @@ -2465,6 +2479,11 @@ template void GridFuncVector::axpy( const float alpha, const GridFuncVector& func); template void GridFuncVector::axpy( const double alpha, const GridFuncVector& func); +template void GridFuncVector::copyFrom( + const GridFuncVector& src); +template void GridFuncVector::copyFrom( + const GridFuncVector& src); + #ifdef HAVE_MAGMA template class GridFuncVector; template class GridFuncVector; diff --git a/src/pb/GridFuncVector.h b/src/pb/GridFuncVector.h index ed69b3b0..391466eb 100644 --- a/src/pb/GridFuncVector.h +++ b/src/pb/GridFuncVector.h @@ -38,6 +38,7 @@ class GridFuncVector static Timer wait_north_south_tm_; static Timer wait_up_down_tm_; static Timer wait_east_west_tm_; + static Timer copy_tm_; static Map2Masks* map2masks_; @@ -219,11 +220,13 @@ class GridFuncVector ScalarType* data() { return memory_.get(); } - ScalarType* getDataPtr(const int ifunc, const int index = 0) + ScalarType* getDataPtr(const int ifunc, const int index = 0) const { return memory_.get() + ifunc * grid_.sizeg() + index; } + bool getUpdatedBoundariesFlag() const { return updated_boundaries_; } + // assign values to one GridFunc from values in array src // (without ghosts) template @@ -467,6 +470,9 @@ class GridFuncVector void axpy(const ScalarType2 alpha, const GridFuncVector& func); + template + void copyFrom(const GridFuncVector& src); + template void getValues(const int k, InputScalarType* vv) const; @@ -491,6 +497,7 @@ class GridFuncVector finishExchangeNorthSouth_tm_.print(os); finishExchangeUpDown_tm_.print(os); finishExchangeEastWest_tm_.print(os); + copy_tm_.print(os); } }; @@ -521,6 +528,9 @@ Timer GridFuncVector::wait_up_down_tm_( template Timer GridFuncVector::wait_east_west_tm_( "GridFuncVector::waitEW"); +template +Timer GridFuncVector::copy_tm_( + "GridFuncVector::copy"); template Map2Masks* GridFuncVector::map2masks_(nullptr); From 0178b753d9daeba9d369f4d3492ef03017938d2d Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Tue, 28 Oct 2025 22:43:15 -0400 Subject: [PATCH 73/99] Differentiate timers in GridFuncVector based on datatype --- src/pb/GridFuncVector.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pb/GridFuncVector.h b/src/pb/GridFuncVector.h index 391466eb..88f115d8 100644 --- a/src/pb/GridFuncVector.h +++ b/src/pb/GridFuncVector.h @@ -503,7 +503,7 @@ class GridFuncVector template Timer GridFuncVector::trade_bc_tm_( - "GridFuncVector::trade_bc"); + "GridFuncVector::trade_bc_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::trade_bc_colors_tm_( "GridFuncVector::trade_bc_colors"); @@ -512,25 +512,25 @@ Timer GridFuncVector::prod_tm_( "GridFuncVector::prod"); template Timer GridFuncVector::finishExchangeNorthSouth_tm_( - "GridFuncVector::finishExNorthSouth"); + "GridFuncVector::finishExNorthSouth_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::finishExchangeUpDown_tm_( - "GridFuncVector::finishExUpDown"); + "GridFuncVector::finishExUpDown_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::finishExchangeEastWest_tm_( - "GridFuncVector::finishExEastWest"); + "GridFuncVector::finishExEastWest_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::wait_north_south_tm_( - "GridFuncVector::waitNS"); + "GridFuncVector::waitNS_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::wait_up_down_tm_( - "GridFuncVector::waitUD"); + "GridFuncVector::waitUD_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::wait_east_west_tm_( - "GridFuncVector::waitEW"); + "GridFuncVector::waitEW_" + std::to_string(sizeof(ScalarType))); template Timer GridFuncVector::copy_tm_( - "GridFuncVector::copy"); + "GridFuncVector::copy_" + std::to_string(sizeof(ScalarType))); template Map2Masks* GridFuncVector::map2masks_(nullptr); From 6b2c0b53518a61a5d2d4c351431e3132fa09ae18 Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Wed, 29 Oct 2025 08:38:12 -0400 Subject: [PATCH 74/99] Rm redundant function --- src/BlockVector.cc | 16 ++-------------- src/BlockVector.h | 8 ++++++-- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 386b19f8..21e2854c 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -262,7 +262,7 @@ BlockVector::BlockVector( setup(bv); - if (copy_data) copyFrom(bv); + if (copy_data) copyDataFrom(bv); } template @@ -276,23 +276,11 @@ BlockVector::operator=( setup(bv); - copyFrom(bv); + copyDataFrom(bv); return *this; } -template -void BlockVector::copyFrom( - const BlockVector& bv) -{ - copy_tm_.start(); - - MemorySpace::Memory::copy( - bv.storage_, size_storage_, storage_); - - copy_tm_.stop(); -} - template BlockVector& BlockVector::operator-=( diff --git a/src/BlockVector.h b/src/BlockVector.h index e28b02ce..f424d78f 100644 --- a/src/BlockVector.h +++ b/src/BlockVector.h @@ -92,8 +92,6 @@ class BlockVector ~BlockVector(); - void copyFrom(const BlockVector& bv); - const pb::GridFuncVector& getDataWGhosts() { assert(data_wghosts_ != 0); @@ -184,13 +182,19 @@ class BlockVector } void setToDataWithGhosts() { assign(*data_wghosts_); } + void copyDataFrom(const BlockVector& src) { + copy_tm_.start(); + assert(src.size_storage_ == size_storage_); assert(storage_ != nullptr); assert(src.storage_ != nullptr); + MemorySpace::Memory::copy( src.storage_, size_storage_, storage_); + + copy_tm_.stop(); } pb::GridFunc& getVectorWithGhosts(const int i) From 185c4c050d09917c41bc6f360058807ebe0861f0 Mon Sep 17 00:00:00 2001 From: "Fattebert J.-L." Date: Wed, 29 Oct 2025 08:59:24 -0400 Subject: [PATCH 75/99] Do not use syrk anymore * make double precision consistent with mixed-precision --- src/ExtendedGridOrbitals.cc | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index e8c49896..a6ff6be2 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -980,20 +980,7 @@ void ExtendedGridOrbitals::getLocalOverlap( if (numst_ != 0) { -#ifdef MGMOL_USE_MIXEDP getLocalOverlap(*this, ss); -#else - ORBDTYPE* psi = block_vector_.vect(0); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - ss.syrk(iloc, loc_numpt_, psi + iloc * loc_numpt_, lda_); - } - - // We may need the full matrix - ss.fillUpperWithLower(); - - ss.scal(grid_.vel()); -#endif } } @@ -1054,11 +1041,9 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, for (short iloc = 0; iloc < subdivx_; iloc++) { LinearAlgebraUtils::MPgemmTN(numst_, numst_, - loc_numpt_, 1., a + iloc * loc_numpt_, lda, b + +iloc * loc_numpt_, - ldb, 0., ss.getRawPtr(iloc), ss.m()); + loc_numpt_, grid_.vel(), a + iloc * loc_numpt_, lda, + b + +iloc * loc_numpt_, ldb, 0., ss.getRawPtr(iloc), ss.m()); } - - ss.scal(grid_.vel()); } void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( From 8e9902496efee68dee56cb98da4598147ac67f47 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 29 Oct 2025 11:27:17 -0400 Subject: [PATCH 76/99] Nosubdivx extended (#376) * Assume subdivx=1 in ExtendedGridOrbitals --- src/ExtendedGridOrbitals.cc | 486 +++++++++++++++--------------------- src/ExtendedGridOrbitals.h | 13 +- src/SinCosOps.cc | 33 ++- 3 files changed, 226 insertions(+), 306 deletions(-) diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index a6ff6be2..54784e04 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -36,10 +36,8 @@ #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -short ExtendedGridOrbitals::subdivx_ = 0; -int ExtendedGridOrbitals::lda_ = 0; -int ExtendedGridOrbitals::numpt_ = 0; -int ExtendedGridOrbitals::loc_numpt_ = 0; +int ExtendedGridOrbitals::lda_ = 0; +int ExtendedGridOrbitals::numpt_ = 0; ExtendedGridOrbitalsPtrFunc ExtendedGridOrbitals::dotProduct_ = &ExtendedGridOrbitals::dotProductDiagonal; int ExtendedGridOrbitals::data_wghosts_index_ = -1; @@ -65,7 +63,7 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, MasksSet* corrmasks, ClusterOrbitals* local_cluster, const bool setup_flag) : name_(std::move(name)), proj_matrices_(proj_matrices), - block_vector_(my_grid, subdivx, bc), + block_vector_(my_grid, 1, bc), grid_(my_grid) { (void)lrs; @@ -74,18 +72,16 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, (void)local_cluster; // preconditions - assert(subdivx > 0); + assert(subdivx == 1); assert(proj_matrices != nullptr); for (short i = 0; i < 3; i++) assert(bc[i] == 0 || bc[i] == 1); assert(grid_.size() > 0); - subdivx_ = subdivx; - numst_ = numst; - numpt_ = grid_.size(); - lda_ = block_vector_.getld(); - loc_numpt_ = numpt_ / subdivx_; + numst_ = numst; + numpt_ = grid_.size(); + lda_ = block_vector_.getld(); assert(numst_ >= 0); @@ -216,7 +212,6 @@ void ExtendedGridOrbitals::initGauss( const double rc, const std::shared_ptr lrs) { assert(numst_ >= 0); - assert(subdivx_ > 0); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); Control& ct = *(Control::instance()); @@ -230,7 +225,7 @@ void ExtendedGridOrbitals::initGauss( const double start1 = grid_.start(1); const double start2 = grid_.start(2); - const int dim0 = grid_.dim(0) / subdivx_; + const int dim0 = grid_.dim(0); const int dim1 = grid_.dim(1); const int dim2 = grid_.dim(2); @@ -255,34 +250,31 @@ void ExtendedGridOrbitals::initGauss( MemorySpace::Memory::set( ipsi_host_view, ipsi_size, 0); - for (short iloc = 0; iloc < subdivx_; iloc++) + const Vector3D& center(lrs->getCenter(icolor)); + Vector3D xc; + + xc[0] = start0; + for (int ix = 0; ix < dim0; ix++) { - const Vector3D& center(lrs->getCenter(icolor)); - Vector3D xc; + xc[1] = start1; - xc[0] = start0 + iloc * dim0 * hgrid[0]; - for (int ix = iloc * dim0; ix < (iloc + 1) * dim0; ix++) + for (int iy = 0; iy < dim1; iy++) { - xc[1] = start1; - - for (int iy = 0; iy < dim1; iy++) + xc[2] = start2; + for (int iz = 0; iz < dim2; iz++) { - xc[2] = start2; - for (int iz = 0; iz < dim2; iz++) - { - const double r = xc.minimage(center, ll, ct.bcWF); - if (r < rmax) - ipsi_host_view[ix * incx + iy * incy + iz] - = static_cast(exp(-r * r * invrc2)); - else - ipsi_host_view[ix * incx + iy * incy + iz] = 0.; - - xc[2] += hgrid[2]; - } - xc[1] += hgrid[1]; + const double r = xc.minimage(center, ll, ct.bcWF); + if (r < rmax) + ipsi_host_view[ix * incx + iy * incy + iz] + = static_cast(exp(-r * r * invrc2)); + else + ipsi_host_view[ix * incx + iy * incy + iz] = 0.; + + xc[2] += hgrid[2]; } - xc[0] += hgrid[0]; + xc[1] += hgrid[1]; } + xc[0] += hgrid[0]; } MemorySpace::Memory::copy_view_to_dev( @@ -303,7 +295,7 @@ void ExtendedGridOrbitals::initFourier() const double start1 = grid_.start(1) - grid_.origin(1); const double start2 = grid_.start(2) - grid_.origin(2); - const int dim0 = grid_.dim(0) / subdivx_; + const int dim0 = grid_.dim(0); const int dim1 = grid_.dim(1); const int dim2 = grid_.dim(2); @@ -340,30 +332,27 @@ void ExtendedGridOrbitals::initFourier() ipsi_host_view, numpt_, 0); // TODO this can be done on the GPU with OpenMP - for (short iloc = 0; iloc < subdivx_; iloc++) + double x = start0; + for (int ix = 0; ix < dim0; ix++) { - double x = start0 + iloc * dim0 * hgrid[0]; - for (int ix = iloc * dim0; ix < (iloc + 1) * dim0; ix++) - { - double y = start1; + double y = start1; - for (int iy = 0; iy < dim1; iy++) + for (int iy = 0; iy < dim1; iy++) + { + double z = start2; + for (int iz = 0; iz < dim2; iz++) { - double z = start2; - for (int iz = 0; iz < dim2; iz++) - { - ipsi_host_view[ix * incx + iy * incy + iz] - = 1. - - static_cast(std::cos(kk[0] * x) - * std::cos(kk[1] * y) - * std::cos(kk[2] * z)); - - z += hgrid[2]; - } - y += hgrid[1]; + ipsi_host_view[ix * incx + iy * incy + iz] + = 1. + - static_cast(std::cos(kk[0] * x) + * std::cos(kk[1] * y) + * std::cos(kk[2] * z)); + + z += hgrid[2]; } - x += hgrid[0]; + y += hgrid[1]; } + x += hgrid[0]; } MemorySpace::Memory::copy_view_to_dev( @@ -397,8 +386,6 @@ void ExtendedGridOrbitals::multiply_by_matrix( { prod_matrix_tm_.start(); - assert(subdivx_ > 0); - unsigned int const product_size = numst_ * ldp; ORBDTYPE* product_host_view = MemorySpace::Memory::allocate_host_view( @@ -407,24 +394,20 @@ void ExtendedGridOrbitals::multiply_by_matrix( product, product_size, product_host_view); memset(product_host_view, 0, ldp * numst_ * sizeof(ORBDTYPE)); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); - // TODO this can be done on the GPU - // Compute product for subdomain iloc - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, matrix, numst_, 0., - product_host_view + iloc * loc_numpt_, ldp); + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, matrix, numst_, 0., product_host_view, ldp); + + MemorySpace::Memory::free_host_view( + phi_host_view); - MemorySpace::Memory::free_host_view( - phi_host_view); - } MemorySpace::Memory::copy_view_to_dev( product_host_view, product_size, product); MemorySpace::Memory::free_host_view( @@ -510,8 +493,8 @@ void ExtendedGridOrbitals::multiply_by_matrix( { prod_matrix_tm_.start(); - ORBDTYPE* product = new ORBDTYPE[loc_numpt_ * numst_]; - memset(product, 0, loc_numpt_ * numst_ * sizeof(ORBDTYPE)); + ORBDTYPE* product = new ORBDTYPE[numpt_ * numst_]; + memset(product, 0, numpt_ * numst_ * sizeof(ORBDTYPE)); ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); @@ -519,32 +502,26 @@ void ExtendedGridOrbitals::multiply_by_matrix( matrix.allgather(work_matrix, numst_); - const size_t slnumpt = loc_numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = numpt_ * sizeof(ORBDTYPE); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); - // TODO this can be done on the GPU - // Compute loc_numpt_ rows (for subdomain iloc) - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, work_matrix, numst_, 0., product, - loc_numpt_); + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, work_matrix, numst_, 0., product, numpt_); - for (int color = 0; color < numst_; color++) - memcpy(phi_host_view + color * lda_, product + color * loc_numpt_, - slnumpt); + for (int color = 0; color < numst_; color++) + memcpy(phi_host_view + color * lda_, product + color * numpt_, slnumpt); - MemorySpace::Memory::copy_view_to_dev( - phi_host_view, phi_size, getPsi(0, iloc)); - MemorySpace::Memory::free_host_view( - phi_host_view); - } + MemorySpace::Memory::copy_view_to_dev( + phi_host_view, phi_size, getPsi(0)); + MemorySpace::Memory::free_host_view( + phi_host_view); delete[] product; @@ -858,11 +835,7 @@ int ExtendedGridOrbitals::read_func_hdf5( #else ORBDTYPE* buffer_dev = buffer; #endif - for (short iloc = 0; iloc < subdivx_; iloc++) - { - const int shift = iloc * loc_numpt_; - block_vector_.assignLocal(icolor, iloc, buffer_dev + shift); - } + block_vector_.assignLocal(icolor, 0, buffer_dev); #ifdef HAVE_MAGMA MemorySpace::Memory::free(buffer_dev); #endif @@ -901,7 +874,7 @@ void ExtendedGridOrbitals::computeMatB( const short bcolor = 32; - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); ORBDTYPE* work = new ORBDTYPE[lda_ * bcolor]; memset(work, 0, lda_ * bcolor * sizeof(ORBDTYPE)); @@ -930,25 +903,18 @@ void ExtendedGridOrbitals::computeMatB( LapOper.rhs(getFuncWithGhosts(icolor + i), work + i * lda_); } - for (short iloc = 0; iloc < subdivx_; iloc++) - { - - MATDTYPE* ssiloc = ss.getRawPtr(iloc); + MATDTYPE* ss0 = ss.getRawPtr(0); - // calculate nf columns of ssiloc - LinearAlgebraUtils::MPgemmTN(numst_, nf, - loc_numpt_, 1., orbitals_psi_host_view + iloc * loc_numpt_, - lda_, work + iloc * loc_numpt_, lda_, 0., - ssiloc + icolor * numst_, numst_); - } + // calculate nf columns of ss0 + LinearAlgebraUtils::MPgemmTN(numst_, nf, numpt_, + grid_.vel(), orbitals_psi_host_view, lda_, work, lda_, 0., + ss0 + icolor * numst_, numst_); } MemorySpace::Memory::free_host_view( orbitals_psi_host_view); delete[] work; - const double vel = grid_.vel(); - ss.scal(vel); proj_matrices_->initializeMatB(ss); matB_tm_.stop(); @@ -974,9 +940,8 @@ void ExtendedGridOrbitals::getLocalOverlap( SquareLocalMatrices& ss) { assert(numst_ >= 0); - assert(loc_numpt_ > 0); + assert(numpt_ > 0); assert(grid_.vel() > 1.e-8); - assert(subdivx_ > 0); if (numst_ != 0) { @@ -1025,12 +990,11 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, const int ld, LocalMatrices& ss, const bool transpose) { - assert(loc_numpt_ > 0); - assert(loc_numpt_ <= ld); + assert(numpt_ > 0); + assert(numpt_ <= ld); assert(array != nullptr); assert(numst_ != 0); assert(grid_.vel() > 0.); - assert(subdivx_ > 0); const ORBDTYPE* const a = transpose ? array : block_vector_.vect(0); const ORBDTYPE* const b = transpose ? block_vector_.vect(0) : array; @@ -1038,12 +1002,8 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, const int lda = transpose ? ld : lda_; const int ldb = transpose ? lda_ : ld; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - LinearAlgebraUtils::MPgemmTN(numst_, numst_, - loc_numpt_, grid_.vel(), a + iloc * loc_numpt_, lda, - b + +iloc * loc_numpt_, ldb, 0., ss.getRawPtr(iloc), ss.m()); - } + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, + grid_.vel(), a, lda, b, ldb, 0., ss.getRawPtr(0), ss.m()); } void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( @@ -1054,15 +1014,11 @@ void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( for (int icolor = 0; icolor < numst_; icolor++) { - ss[icolor] = 0.; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - double alpha - = LinearAlgebraUtils::MPdot(loc_numpt_, - orbitals.getPsi(icolor, iloc), getPsi(icolor, iloc)); + ss[icolor] = 0.; + double alpha = LinearAlgebraUtils::MPdot( + numpt_, orbitals.getPsi(icolor), getPsi(icolor)); - ss[icolor] += (DISTMATDTYPE)(alpha * grid_.vel()); - } + ss[icolor] += (DISTMATDTYPE)(alpha * grid_.vel()); } std::vector tmp(ss); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); @@ -1072,7 +1028,7 @@ void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( void ExtendedGridOrbitals::computeGram( dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(ss); @@ -1086,7 +1042,7 @@ void ExtendedGridOrbitals::computeGram( void ExtendedGridOrbitals::computeGram(const ExtendedGridOrbitals& orbitals, dist_matrix::DistMatrix& gram_mat) { - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(orbitals, ss); @@ -1110,11 +1066,11 @@ void ExtendedGridOrbitals::computeGram(const int verbosity) (*MPIdata::sout) << "ExtendedGridOrbitals::computeGram()" << std::endl; #endif - assert(subdivx_ > 0); - assert(subdivx_ < 1000); + assert(1 > 0); + assert(1 < 1000); assert(numst_ >= 0); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); getLocalOverlap(ss); @@ -1147,7 +1103,7 @@ double ExtendedGridOrbitals::dotProductWithDM( { assert(proj_matrices_ != nullptr); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); computeLocalProduct(orbitals, ss); @@ -1159,7 +1115,7 @@ double ExtendedGridOrbitals::dotProductWithInvS( { assert(proj_matrices_ != nullptr); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); computeLocalProduct(orbitals, ss); @@ -1181,7 +1137,7 @@ double ExtendedGridOrbitals::dotProductSimple( { assert(proj_matrices_ != nullptr); - SquareLocalMatrices ss(subdivx_, numst_); + SquareLocalMatrices ss(1, numst_); computeLocalProduct(orbitals, ss); @@ -1199,8 +1155,8 @@ double ExtendedGridOrbitals::dotProduct( dot_product_tm_.start(); assert(numst_ >= 0); - assert(subdivx_ > 0); - assert(subdivx_ < 1000); + assert(1 > 0); + assert(1 < 1000); double dot = 0.; if (dot_type == 0) @@ -1246,8 +1202,8 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, SquareLocalMatrices* localP = matrixTransform; if (matrixTransform == nullptr) - localP = new SquareLocalMatrices( - subdivx_, numst_); + localP + = new SquareLocalMatrices(1, numst_); incrementIterativeIndex(); @@ -1303,12 +1259,10 @@ double ExtendedGridOrbitals::normState(const int gid) const assert(gid >= 0); double tmp = 0.; - for (short iloc = 0; iloc < subdivx_; iloc++) - { - // diagonal element - tmp += block_vector_.dot(gid, gid, iloc); - // cout<<"gid="< 1.e-15); diagS[color] = 1. / sqrt(diagS[color]); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - block_vector_.scal(diagS[color], color, iloc); - } + block_vector_.scal(diagS[color], color, 0); } incrementIterativeIndex(); @@ -1498,11 +1432,11 @@ void ExtendedGridOrbitals::projectOut(ExtendedGridOrbitals& orbitals) void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) { assert(lda > 1); - assert(loc_numpt_ > 0); + assert(numpt_ > 0); assert(numst_ >= 0); - assert(lda_ >= loc_numpt_); + assert(lda_ >= numpt_); - SquareLocalMatrices lmatrix(subdivx_, numst_); + SquareLocalMatrices lmatrix(1, numst_); if (numst_ != 0) computeLocalProduct(array, lda, lmatrix, false); @@ -1513,47 +1447,44 @@ void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) #endif proj_matrices_->applyInvS(lmatrix); - ORBDTYPE* tproduct = new ORBDTYPE[loc_numpt_ * numst_]; - memset(tproduct, 0, loc_numpt_ * numst_ * sizeof(ORBDTYPE)); + ORBDTYPE* tproduct = new ORBDTYPE[numpt_ * numst_]; + memset(tproduct, 0, numpt_ * numst_ * sizeof(ORBDTYPE)); - // loop over subdomains - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + getPsi(0), phi_size, phi_host_view); - MATDTYPE* localMat_iloc = lmatrix.getRawPtr(iloc); + MATDTYPE* localMat = lmatrix.getRawPtr(); - // TODO this can be done on the GPU - // Compute loc_numpt_ rows (for subdomain iloc) - LinearAlgebraUtils::MPgemmNN(loc_numpt_, numst_, - numst_, 1., phi_host_view, lda_, localMat_iloc, numst_, 0., - tproduct, loc_numpt_); + // TODO this can be done on the GPU + // Compute numpt_ rows (for subdomain 0) + LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., + phi_host_view, lda_, localMat, numst_, 0., tproduct, numpt_); - MemorySpace::Memory::free_host_view( - phi_host_view); + MemorySpace::Memory::free_host_view( + phi_host_view); - ORBDTYPE* parray = array + iloc * loc_numpt_; - unsigned int const parray_size = numst_ * lda; - ORBDTYPE* parray_host_view = MemorySpace::Memory::allocate_host_view(parray_size); - MemorySpace::Memory::copy_view_to_host( - parray, parray_size, parray_host_view); + ORBDTYPE* parray = array + 0 * numpt_; + unsigned int const parray_size = numst_ * lda; + ORBDTYPE* parray_host_view + = MemorySpace::Memory::allocate_host_view( + parray_size); + MemorySpace::Memory::copy_view_to_host( + parray, parray_size, parray_host_view); - ORBDTYPE minus = -1.; - for (int j = 0; j < numst_; j++) - LinearAlgebraUtils::MPaxpy(loc_numpt_, minus, - tproduct + j * loc_numpt_, parray_host_view + j * lda); + ORBDTYPE minus = -1.; + for (int j = 0; j < numst_; j++) + LinearAlgebraUtils::MPaxpy( + numpt_, minus, tproduct + j * numpt_, parray_host_view + j * lda); - MemorySpace::Memory::copy_view_to_dev( - parray_host_view, parray_size, parray); + MemorySpace::Memory::copy_view_to_dev( + parray_host_view, parray_size, parray); - MemorySpace::Memory::free_host_view( - parray_host_view); - } + MemorySpace::Memory::free_host_view( + parray_host_view); delete[] tproduct; } @@ -1568,7 +1499,7 @@ void ExtendedGridOrbitals::initRand() std::vector yrand(grid_.gdim(1)); std::vector zrand(grid_.gdim(2)); - const int loc_length = dim[0] / subdivx_; + const int loc_length = dim[0] / 1; assert(loc_length > 0); assert(static_cast(loc_length) <= dim[0]); @@ -1597,28 +1528,25 @@ void ExtendedGridOrbitals::initRand() for (unsigned int idx = 0; idx < grid_.gdim(2); idx++) zrand[idx] = ran0() - 0.5; - unsigned int const size = loc_numpt_; + unsigned int const size = numpt_; ORBDTYPE* psi_state_view = MemorySpace::Memory::allocate_host_view(size); MemorySpace::Memory::copy_view_to_host( psi(istate), size, psi_state_view); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - for (int ix = loc_length * iloc; ix < loc_length * (iloc + 1); ix++) - for (unsigned int iy = 0; iy < dim[1]; iy++) - for (unsigned int iz = 0; iz < dim[2]; iz++) - { - const double alpha = xrand[xoff + ix] * yrand[yoff + iy] - * zrand[zoff + iz]; - - psi_state_view[ix * incx + iy * incy + iz] - = alpha * alpha; - - assert((ix * incx + iy * incy + iz) - < static_cast(lda_)); - } - } + for (int ix = loc_length * 0; ix < loc_length; ix++) + for (unsigned int iy = 0; iy < dim[1]; iy++) + for (unsigned int iz = 0; iz < dim[2]; iz++) + { + const double alpha = xrand[xoff + ix] * yrand[yoff + iy] + * zrand[zoff + iz]; + + psi_state_view[ix * incx + iy * incy + iz] = alpha * alpha; + + assert((ix * incx + iy * incy + iz) + < static_cast(lda_)); + } + MemorySpace::Memory::copy_view_to_dev( psi_state_view, size, psi(istate)); MemorySpace::Memory::free_host_view( @@ -1650,22 +1578,21 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( MemorySpace::Memory::copy_view_to_host( block_vector_.vect(0), block_vector_size, block_vector_host_view); - for (short iloc = 0; iloc < subdivx_; iloc++) - { - unsigned int const phi_size = loc_numpt_ * numst_; - ORBDTYPE* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); - MemorySpace::Memory::copy_view_to_host( - Apsi.getPsi(0, iloc), phi_size, phi_host_view); + unsigned int const phi_size = numpt_ * numst_; + ORBDTYPE* phi_host_view + = MemorySpace::Memory::allocate_host_view( + phi_size); + MemorySpace::Memory::copy_view_to_host( + Apsi.getPsi(0), phi_size, phi_host_view); - // TODO this can be done on the GPU - LinearAlgebraUtils::MPgemmTN(numst_, numst_, - loc_numpt_, vel, block_vector_host_view + iloc * loc_numpt_, lda_, - phi_host_view, lda_, 1., work.data(), numst_); + // TODO this can be done on the GPU + LinearAlgebraUtils::MPgemmTN(numst_, numst_, numpt_, vel, + block_vector_host_view + 0 * numpt_, lda_, phi_host_view, lda_, 1., + work.data(), numst_); + + MemorySpace::Memory::free_host_view( + phi_host_view); - MemorySpace::Memory::free_host_view( - phi_host_view); - } MemorySpace::Memory::free_host_view( block_vector_host_view); @@ -1713,14 +1640,11 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( void ExtendedGridOrbitals::computeGlobalIndexes() { overlapping_gids_.clear(); - overlapping_gids_.resize(subdivx_); - for (short iloc = 0; iloc < subdivx_; iloc++) + overlapping_gids_.resize(1); + overlapping_gids_[0].resize(numst_, -1); + for (int gid = 0; gid < numst_; gid++) { - overlapping_gids_[iloc].resize(numst_, -1); - for (int gid = 0; gid < numst_; gid++) - { - overlapping_gids_[iloc][gid] = gid; - } + overlapping_gids_[0][gid] = gid; } } diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 244a150f..92809507 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -60,7 +60,6 @@ class ExtendedGridOrbitals : public Orbitals static int lda_; // leading dimension for storage static int numpt_; - static int loc_numpt_; // static double (ExtendedGridOrbitals::*dotProduct_)(const // ExtendedGridOrbitals&); @@ -140,8 +139,6 @@ class ExtendedGridOrbitals : public Orbitals protected: const pb::Grid& grid_; - static short subdivx_; - // indexes corresponding to valid function in each subdomain static std::vector> overlapping_gids_; @@ -195,7 +192,7 @@ class ExtendedGridOrbitals : public Orbitals int numst(void) const { return numst_; } int getLda() const { return lda_; } - int getLocNumpt() const { return loc_numpt_; } + int getLocNumpt() const { return numpt_; } int getNumpt() const { return numpt_; } bool isCompatibleWith(const ExtendedGridOrbitals&) const { return true; } @@ -262,10 +259,10 @@ class ExtendedGridOrbitals : public Orbitals assert(new_storage != 0); block_vector_.setStorage(new_storage); } - ORBDTYPE* getPsi(const int i, const short iloc = 0) const + ORBDTYPE* getPsi(const int i, const int iloc = 0) const { - assert(iloc < subdivx_); - return block_vector_.vect(i) + iloc * loc_numpt_; + assert(iloc == 0); + return block_vector_.vect(i); } template void setPsi(const pb::GridFunc& gf_work, const int ist) @@ -283,7 +280,7 @@ class ExtendedGridOrbitals : public Orbitals assert(numst_ < 10000); return numst_; } - short subdivx(void) const { return subdivx_; } + short subdivx(void) const { return 1; } void printChromaticNumber(std::ostream& os) const { if (onpe0) os << " Max. chromatic_number: " << numst_ << std::endl; diff --git a/src/SinCosOps.cc b/src/SinCosOps.cc index 85f4ea79..e1c3fdd6 100644 --- a/src/SinCosOps.cc +++ b/src/SinCosOps.cc @@ -39,7 +39,7 @@ void SinCosOps::compute(const T& orbitals, vector>& a) int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -64,7 +64,7 @@ void SinCosOps::compute(const T& orbitals, vector>& a) MemorySpace::Memory::copy_view_to_host( orbitals.psi(0), size_psi, psi_view); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -153,7 +153,7 @@ void SinCosOps::computeSquare(const T& orbitals, vector>& a) const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -200,7 +200,7 @@ void SinCosOps::computeSquare(const T& orbitals, vector>& a) } const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -274,7 +274,7 @@ void SinCosOps::computeSquare1D( int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -298,7 +298,7 @@ void SinCosOps::computeSquare1D( } const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -366,7 +366,7 @@ void SinCosOps::compute1D( int n2 = numst * numst; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -389,7 +389,7 @@ void SinCosOps::compute1D( const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (int icolor = 0; icolor < size; icolor++) @@ -466,7 +466,7 @@ void SinCosOps::computeDiag2states( color_st[ic] = orbitals.getColor(st[ic]); } - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -487,7 +487,7 @@ void SinCosOps::computeDiag2states( const short mycolor = color_st[ic]; if (mycolor >= 0) - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { if (orbitals.overlapping_gids_[iloc][mycolor] == st[ic]) @@ -558,7 +558,7 @@ void SinCosOps::compute2states( int n2 = 4; - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -578,7 +578,7 @@ void SinCosOps::compute2states( const int mycolor = color_st[ic]; if (mycolor >= 0) - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { if (orbitals.overlapping_gids_[iloc][mycolor] == st[ic]) @@ -656,7 +656,7 @@ void SinCosOps::compute( const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals1.subdivx_; + int loc_length = dim0 / orbitals1.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -671,9 +671,8 @@ void SinCosOps::compute( vector cosz; grid.getSinCosFunctions(sinx, siny, sinz, cosx, cosy, cosz); - for (short iloc = 0; iloc < orbitals1.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals1.subdivx(); iloc++) { - for (int color = 0; color < orbitals1.chromatic_number(); color++) { int i = orbitals1.overlapping_gids_[iloc][color]; @@ -741,7 +740,7 @@ void SinCosOps::computeDiag(const T& orbitals, const int dim1 = grid.dim(1); const int dim2 = grid.dim(2); - int loc_length = dim0 / orbitals.subdivx_; + int loc_length = dim0 / orbitals.subdivx(); assert(loc_length > 0); assert(loc_length <= dim0); @@ -768,7 +767,7 @@ void SinCosOps::computeDiag(const T& orbitals, const int size = orbitals.chromatic_number(); - for (short iloc = 0; iloc < orbitals.subdivx_; iloc++) + for (short iloc = 0; iloc < orbitals.subdivx(); iloc++) { for (short icolor = 0; icolor < size; icolor++) { From fc91b00392fbaca44004f7eb520e2e7ac4deb08e Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Fri, 31 Oct 2025 14:07:47 -0400 Subject: [PATCH 77/99] Fix several compiler warnings (#377) * Fix several compiler warnings --- src/DMStrategyFactory.cc | 17 ++++++++++------- src/DensityMatrix.cc | 4 ++-- src/DistMatrix/DistMatrix.cc | 4 ++-- src/DistMatrix/DistMatrix.h | 3 +-- src/DistMatrix/DistMatrixTools.cc | 2 +- src/Energy.cc | 11 +++++++++-- src/ExtendedGridOrbitals.cc | 4 ++++ src/ExtendedGridOrbitals.h | 2 +- src/HDFrestart.cc | 4 ++-- src/Ions.cc | 2 +- src/NonOrthoDMStrategy.cc | 4 ++-- src/PowerGen.cc | 4 ++-- src/ProjectedMatrices.cc | 11 ++++------- src/ProjectedMatrices.h | 2 +- src/ReplicatedMatrix.cc | 13 +++++++++---- src/ReplicatedMatrix.h | 4 ++-- src/ReplicatedVector.cc | 2 +- src/ReplicatedVector.h | 2 +- src/computeHij.cc | 4 ++++ 19 files changed, 59 insertions(+), 40 deletions(-) diff --git a/src/DMStrategyFactory.cc b/src/DMStrategyFactory.cc index d7b1274d..7d2132e4 100644 --- a/src/DMStrategyFactory.cc +++ b/src/DMStrategyFactory.cc @@ -36,14 +36,17 @@ DMStrategy* DMStrategyFactory DMStrategy* DMStrategyFactory::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& os, Ions& ions, Rho* rho, - Energy* energy, Electrostatic* electrostat, - MGmol* mgmol_strategy, - ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* orbitals, - const bool short_sighted) + std::ostream& /*os*/, Ions& /*ions*/, Rho* /*rho*/, + Energy* /*energy*/, Electrostatic* /*electrostat*/, + MGmol* /*mgmol_strategy*/, + ProjectedMatricesInterface* /*proj_matrices*/, + LocGridOrbitals* /*orbitals*/, const bool /*short_sighted*/) { - std::cerr << "Not implemented" << std::endl; - assert(0 == 1); + + std::cerr << "DMStrategy not implemented" << std::endl; + MPI_Abort(comm, EXIT_FAILURE); + + return nullptr; } template <> diff --git a/src/DensityMatrix.cc b/src/DensityMatrix.cc index 0214f0c4..a82ec288 100644 --- a/src/DensityMatrix.cc +++ b/src/DensityMatrix.cc @@ -78,7 +78,7 @@ void DensityMatrix::build( #endif // diagonal matrix with occ values in diagonal - MatrixType gamma("Gamma", &occ[0], dim_, dim_); + MatrixType gamma("Gamma", &occ[0], dim_); gamma.scal(orbital_occupation_); // rescale for spin // work_ = zmat*gamma with gamma symmetric @@ -132,7 +132,7 @@ void DensityMatrix::build() std::cout << "Warning: occupations not up to date to build DM!!!" << std::endl; - MatrixType gamma("Gamma", &occupation_[0], dim_, dim_); + MatrixType gamma("Gamma", &occupation_[0], dim_); gamma.scal(orbital_occupation_); // rescale for spin *dm_ = gamma; diff --git a/src/DistMatrix/DistMatrix.cc b/src/DistMatrix/DistMatrix.cc index a8064fcb..89f19400 100644 --- a/src/DistMatrix/DistMatrix.cc +++ b/src/DistMatrix/DistMatrix.cc @@ -2036,12 +2036,12 @@ DistMatrix::DistMatrix(const std::string& name, const BlacsContext& bc, template DistMatrix::DistMatrix( - const std::string& name, const T* const dmat, const int m, const int n) + const std::string& name, const T* const dmat, const int m) : object_name_(name), bc_(*default_bc_), comm_global_(default_bc_->comm_global()) { - resize(m, n, distmatrix_def_block_size_, distmatrix_def_block_size_); + resize(m, m, distmatrix_def_block_size_, distmatrix_def_block_size_); setDiagonalValues(dmat); } diff --git a/src/DistMatrix/DistMatrix.h b/src/DistMatrix/DistMatrix.h index 175fbd9b..e07c8d56 100644 --- a/src/DistMatrix/DistMatrix.h +++ b/src/DistMatrix/DistMatrix.h @@ -243,8 +243,7 @@ class DistMatrix // Construct a diagonal DistMatrix from a vector dmat of diagonal elements DistMatrix(const std::string& name, const BlacsContext&, const T* const dmat, const int m, const int n); - DistMatrix( - const std::string& name, const T* const dmat, const int m, const int n); + DistMatrix(const std::string& name, const T* const dmat, const int m); DistMatrix& operator=(const DistMatrix& a); DistMatrix& assign(const DistMatrix&, const int, const int); diff --git a/src/DistMatrix/DistMatrixTools.cc b/src/DistMatrix/DistMatrixTools.cc index 4c3ed2ce..a3262bc4 100644 --- a/src/DistMatrix/DistMatrixTools.cc +++ b/src/DistMatrix/DistMatrixTools.cc @@ -120,7 +120,7 @@ void sqrtDistMatrix(dist_matrix::DistMatrix& u) { eigenvalues[i] = 1. / sqrt(eigenvalues[i]); } - dist_matrix::DistMatrix g("g", &eigenvalues[0], nst, nst); + dist_matrix::DistMatrix g("g", &eigenvalues[0], nst); // u = z * g * z**T w.symm('r', 'l', 1., g, z, 0.); diff --git a/src/Energy.cc b/src/Energy.cc index f53dbc93..f2e2607f 100644 --- a/src/Energy.cc +++ b/src/Energy.cc @@ -60,9 +60,9 @@ double Energy::getEVrhoRho() const template double Energy::evaluateEnergyIonsInVext(Ions& ions) { +#ifdef HAVE_TRICUBIC double energy = 0.; -#ifdef HAVE_TRICUBIC if (!pot_.withVext()) return energy; //(*MPIdata::sout)<<"Energy::evaluateEnergyIonsInVext()"<::evaluateEnergyIonsInVext(Ions& ions) std::vector val(nions); pot_.getValVext(positions, val); + double energy = 0.; + // loop over ions again ion = ions.local_ions().begin(); int ion_index = 0; @@ -105,8 +107,13 @@ double Energy::evaluateEnergyIonsInVext(Ions& ions) MGmol_MPI& mmpi = *(MGmol_MPI::instance()); mmpi.allreduce(&energy, &tmp, 1, MPI_SUM); energy = tmp; -#endif + return energy; +#else + (void)ions; + + return 0.; +#endif } template diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 54784e04..de4c8a87 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -72,7 +72,11 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, (void)local_cluster; // preconditions +#ifndef NDEBUG assert(subdivx == 1); +#else + (void)subdivx; +#endif assert(proj_matrices != nullptr); for (short i = 0; i < 3; i++) diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 92809507..baa114d3 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -261,7 +261,7 @@ class ExtendedGridOrbitals : public Orbitals } ORBDTYPE* getPsi(const int i, const int iloc = 0) const { - assert(iloc == 0); + (void)iloc; return block_vector_.vect(i); } template diff --git a/src/HDFrestart.cc b/src/HDFrestart.cc index 34efde9a..2c9674d0 100644 --- a/src/HDFrestart.cc +++ b/src/HDFrestart.cc @@ -1655,13 +1655,13 @@ int HDFrestart::writeData(const T* const data, hid_t space_id, hid_t memspace, { if (precision == 1) { - assert(work_space_float_.size() == bsize_); + assert((int)work_space_float_.size() == bsize_); for (int i = 0; i < bsize_; i++) work_space_float_[i] = (float)data[i]; } else { - assert(work_space_double_.size() == bsize_); + assert((int)work_space_double_.size() == bsize_); for (int i = 0; i < bsize_; i++) work_space_double_[i] = (double)data[i]; } diff --git a/src/Ions.cc b/src/Ions.cc index 70ca5c36..7dd39229 100644 --- a/src/Ions.cc +++ b/src/Ions.cc @@ -2766,7 +2766,7 @@ void Ions::gatherForces(std::vector& forces, const int root) const const int index = ion->index(); // std::cout << "index = " << index << std::endl; assert(index < num_ions_); - assert(forces.size() >= 3 * index); + assert((int)forces.size() >= 3 * index); assert(index < num_ions_); ion->getForce(&forces[3 * index]); } diff --git a/src/NonOrthoDMStrategy.cc b/src/NonOrthoDMStrategy.cc index 28a8f1d5..a4c09afd 100644 --- a/src/NonOrthoDMStrategy.cc +++ b/src/NonOrthoDMStrategy.cc @@ -21,7 +21,7 @@ NonOrthoDMStrategy::NonOrthoDMStrategy( } template -void NonOrthoDMStrategy::initialize(OrbitalsType& orbitals) +void NonOrthoDMStrategy::initialize(OrbitalsType& /*orbitals*/) { Control& ct = *(Control::instance()); MGmol_MPI& mmpi = *(MGmol_MPI::instance()); @@ -35,7 +35,7 @@ void NonOrthoDMStrategy::initialize(OrbitalsType& orbitals) } template -int NonOrthoDMStrategy::update(OrbitalsType& orbitals) +int NonOrthoDMStrategy::update(OrbitalsType& /*orbitals*/) { assert(proj_matrices_ != nullptr); diff --git a/src/PowerGen.cc b/src/PowerGen.cc index 89482ca0..3bd9dc74 100644 --- a/src/PowerGen.cc +++ b/src/PowerGen.cc @@ -46,10 +46,10 @@ void PowerGen::computeGenEigenInterval(MatrixType& mat, // initialize solution data // initial guess - VectorType sol("sol", m); + VectorType sol(m); sol = vec1_; // initialize local solution data // new solution - VectorType new_sol("new_sol", m); + VectorType new_sol(m); // get norm of initial sol double alpha = sol.nrm2(); diff --git a/src/ProjectedMatrices.cc b/src/ProjectedMatrices.cc index 29964c33..8a3b773a 100644 --- a/src/ProjectedMatrices.cc +++ b/src/ProjectedMatrices.cc @@ -167,8 +167,8 @@ void ProjectedMatrices::convert( } template <> -void ProjectedMatrices>::setupMPI( - const std::vector>& global_indexes) +void ProjectedMatrices>:: + setupGlobalIndexes(const std::vector>& global_indexes) { MGmol_MPI& mmpi = *(MGmol_MPI::instance()); MPI_Comm comm = mmpi.commSpin(); @@ -179,12 +179,9 @@ void ProjectedMatrices>::setupMPI( } template <> -void ProjectedMatrices::setupMPI( +void ProjectedMatrices::setupGlobalIndexes( const std::vector>& global_indexes) { - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); - MPI_Comm comm = mmpi.commSpin(); - LocalMatrices2ReplicatedMatrix::setup(global_indexes); ReplicatedMatrix2SquareLocalMatrices::setup(global_indexes); @@ -200,7 +197,7 @@ void ProjectedMatrices::setup( global_indexes_ = global_indexes; - setupMPI(global_indexes); + setupGlobalIndexes(global_indexes); localX_.reset(new SquareLocalMatrices( subdiv_, chromatic_number_)); diff --git a/src/ProjectedMatrices.h b/src/ProjectedMatrices.h index 142dbf70..fa5add85 100644 --- a/src/ProjectedMatrices.h +++ b/src/ProjectedMatrices.h @@ -132,7 +132,7 @@ class ProjectedMatrices : public ProjectedMatricesInterface void convert(const SquareLocalMatrices& src, MatrixType& dst); - void setupMPI(const std::vector>&); + void setupGlobalIndexes(const std::vector>&); std::string getMatrixType(); diff --git a/src/ReplicatedMatrix.cc b/src/ReplicatedMatrix.cc index 48a679a8..2f266ab2 100644 --- a/src/ReplicatedMatrix.cc +++ b/src/ReplicatedMatrix.cc @@ -65,8 +65,8 @@ ReplicatedMatrix::ReplicatedMatrix(const std::string name, const int n) clear(); } -ReplicatedMatrix::ReplicatedMatrix(const std::string name, - const double* const diagonal, const int m, const int n) +ReplicatedMatrix::ReplicatedMatrix( + const std::string name, const double* const diagonal, const int m) : dim_(m), ld_(roundup(dim_)), data_(Memory::allocate(dim_ * ld_), Memory::free), @@ -349,7 +349,7 @@ void ReplicatedMatrix::setRandom(const double minv, const double maxv) #else double* data = data_.get(); for (int j = 0; j < dim_; j++) - for (int i = 0; i < dim_ * ld_; i++) + for (int i = 0; i < dim_ * (int)ld_; i++) data[j * ld_ + i] = mat[j * dim_ + i]; #endif } @@ -855,4 +855,9 @@ void ReplicatedMatrix::shift(const double shift) mat[i + i * dim_] += shift; } -void ReplicatedMatrix::printMM(std::ostream& os) const {} +void ReplicatedMatrix::printMM(std::ostream& os) const +{ + (void)os; + std::cerr << "ReplicatedMatrix::printMM() not implemented" << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); +} diff --git a/src/ReplicatedMatrix.h b/src/ReplicatedMatrix.h index f34914db..ff72b296 100644 --- a/src/ReplicatedMatrix.h +++ b/src/ReplicatedMatrix.h @@ -52,8 +52,8 @@ class ReplicatedMatrix ReplicatedMatrix(const std::string name, const int n); // construct diagonal matrix from diagonal values - ReplicatedMatrix(const std::string name, const double* const diagonal, - const int m, const int n); + ReplicatedMatrix( + const std::string name, const double* const diagonal, const int m); ReplicatedMatrix(const ReplicatedMatrix&); diff --git a/src/ReplicatedVector.cc b/src/ReplicatedVector.cc index 1d9c495c..f5e07ff0 100644 --- a/src/ReplicatedVector.cc +++ b/src/ReplicatedVector.cc @@ -22,7 +22,7 @@ using MemoryDev = MemorySpace::Memory; using MemoryDev = MemorySpace::Memory; #endif -ReplicatedVector::ReplicatedVector(const std::string name, const int n) +ReplicatedVector::ReplicatedVector(const int n) : dim_(n), data_(MemoryDev::allocate(dim_), MemoryDev::free) { } diff --git a/src/ReplicatedVector.h b/src/ReplicatedVector.h index 0b437944..a9c03e68 100644 --- a/src/ReplicatedVector.h +++ b/src/ReplicatedVector.h @@ -20,7 +20,7 @@ class ReplicatedVector std::unique_ptr data_; public: - ReplicatedVector(const std::string name, const int n); + ReplicatedVector(const int n); ReplicatedVector(const ReplicatedVector&); ReplicatedVector(const std::vector&); ReplicatedVector& operator=(const ReplicatedVector&); diff --git a/src/computeHij.cc b/src/computeHij.cc index b93f05a2..d25834a8 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -48,6 +48,10 @@ template <> void MGmol::addHlocal2matrix( LocGridOrbitals& orbitalsi, LocGridOrbitals& orbitalsj, ReplicatedMatrix& H) { + (void)orbitalsi; + (void)orbitalsj; + (void)H; + std::cerr << "Not implemented!" << std::endl; MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); } From bc43b775e7265f1946130a8260542b4fc11ed57f Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 3 Nov 2025 11:56:32 -0500 Subject: [PATCH 78/99] Fix some stdout content (#378) --- src/DavidsonSolver.cc | 1 + src/MGmol.cc | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index b3c22d54..b5cbb5ba 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -827,6 +827,7 @@ int DavidsonSolver::solve( assert(pmat); pmat->printOccupations(os_); + proj_mat2N_->printEigenvalues(os_); } if (mmpi.PE0() && ct.verbose > 1) diff --git a/src/MGmol.cc b/src/MGmol.cc index ded126b9..b692d7a6 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -521,8 +521,7 @@ void MGmol::run() constraints_->projectOutForces(20); - if ((ions_->getNumIons() <= 1024 || ct.verbose > 2) - && ct.verbose > 0) + if ((ions_->getNumIons() <= 1024 || ct.verbose > 1)) ions_->printForcesGlobal(os_); finalEnergy(); From 741af36c4a3f662d9aa95c8bf195dda0d80196b6 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 3 Nov 2025 20:42:17 -0500 Subject: [PATCH 79/99] Fix header of asci files generated by read_hdf5 (#379) --- util/read_hdf5.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/util/read_hdf5.py b/util/read_hdf5.py index b9bf42bd..2d2ce1a9 100644 --- a/util/read_hdf5.py +++ b/util/read_hdf5.py @@ -665,13 +665,10 @@ def writeAtomsXYZ(xyz_filename, filename, origin, lattice): ''' MAIN ''' -# *argv (sys.argv in Python) - Takes an Arbitrary Number of Paramters -# and Stores Them in a List - # USAGE: -# ssh -l user cab.llnl.gov (If Using Cab System) # python read_hdf5.py [ -bov ] file.hdf5 datasetName - +# The '-bov' option should be used to visulaize data with VisIt +# For a plain asci file with data in one column, no '-bov' option should be used def main(): ''' Variables ''' @@ -800,25 +797,30 @@ def main(): else: print('\nWrite data...\n') - + ndec = 4 with open(output_data_filename, 'w') as tfile: - tfile.write('\n' + str( origin[0] ) + '\t' - + str( origin[1] ) + '\t' - + str( origin[2] ) + '\t' - + str( origin[0] + lattice[0] ) + '\t' - + str( origin[1] + lattice[1] ) + '\t' - + str( origin[2] + lattice[2] ) - + ' // cell corners') - - tfile.write(str(dim[0]) + '\t' + str(dim[1]) + '\t' - + str(dim[2]) + ' // mesh') - + tfile.write( str(round(lattice[0],ndec)) + '\t' + + str(round(lattice[1],ndec)) + '\t' + + str(round(lattice[2],ndec)) + + ' // domain dimensions [Bohr]\n') + tfile.write( str(round(origin[0],ndec)) + '\t' + + str(round(origin[1],ndec)) + '\t' + + str(round(origin[2],ndec)) + + ' // lower left corner [Bohr]\n') + tfile.write( str(dim[0]) + '\t' + + str(dim[1]) + '\t' + + str(dim[2]) + ' // mesh') + + count=0 for i in range( dim[0] ): for j in range( dim[1] ): for k in range( dim[2] ): row = (i * incx) + (j * incy) + k tfile.write('\n' + str( data[row] )) + count = count + 1 + + print("Written {} values.".format(count)) # Release Data and Attributes del data From 6aab60df5fcc65873cc734c4598104ccb75f334f Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 6 Nov 2025 08:20:03 -0500 Subject: [PATCH 80/99] Tune timers (#380) * Move Hloc Timer to not include ghosts value filling * Remove double counted Timer in KBPsiMatrixSparse * Remove unused code in KBPsiMatrixSparse --- src/FIRE.h | 8 ++-- src/Hamiltonian.cc | 4 +- src/KBPsiMatrixSparse.cc | 87 ++++++++++------------------------------ src/KBPsiMatrixSparse.h | 11 ++--- 4 files changed, 31 insertions(+), 79 deletions(-) diff --git a/src/FIRE.h b/src/FIRE.h index f4570c10..f25e44ce 100644 --- a/src/FIRE.h +++ b/src/FIRE.h @@ -10,19 +10,17 @@ #ifndef MGMOL_FIRE_H #define MGMOL_FIRE_H +#include "ConstraintSet.h" +#include "Electrostatic.h" #include "Energy.h" #include "FIRE_IonicStepper.h" #include "IonicAlgorithm.h" #include "Ions.h" #include "LocalizationRegions.h" #include "MGmol.h" +#include "MasksSet.h" #include "Rho.h" -class MasksSet; -class Electrostatic; -class KBPsiMatrixInterface; -class ConstraintSet; - template class FIRE : public IonicAlgorithm { diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index bdd9b166..7d685e54 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -86,7 +86,6 @@ const T& Hamiltonian::applyLocal(T& phi, const bool force) template void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) { - apply_Hloc_tm_.start(); #ifdef PRINT_OPERATIONS if (onpe0) (*MPIdata::sout) << "Hamiltonian::applyLocal() for " << ncolors @@ -102,6 +101,9 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) phi.setDataWithGhosts(); phi.trade_boundaries(); + // start timer after filling ghost values + apply_Hloc_tm_.start(); + using memory_space_type = typename T::memory_space_type; if (ct.Mehrstellen()) diff --git a/src/KBPsiMatrixSparse.cc b/src/KBPsiMatrixSparse.cc index 70859870..e9ac4544 100644 --- a/src/KBPsiMatrixSparse.cc +++ b/src/KBPsiMatrixSparse.cc @@ -276,8 +276,9 @@ void KBPsiMatrixSparse::scaleWithKBcoeff(const Ions& ions) // potential, and add them into Aij. // Note: neglecting the small matrix elements reduces the size of hnlij and thus // reduces the size of communications later on. -void KBPsiMatrixSparse::computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, - const Ion& ion, SquareSubMatrix& hnlij) const +void KBPsiMatrixSparse::computeHvnlElementsIon( + const KBPsiMatrixSparse* const kbpsi2, const Ion& ion, + SquareSubMatrix& hnlij) const { assert(ion.here()); @@ -346,8 +347,9 @@ void KBPsiMatrixSparse::computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, } } -void KBPsiMatrixSparse::computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, - const Ion& ion, VariableSizeMatrix& mat) const +void KBPsiMatrixSparse::computeHvnlElementsIon( + const KBPsiMatrixSparse* const kbpsi2, const Ion& ion, + VariableSizeMatrix& mat) const { assert(ion.here()); @@ -451,7 +453,7 @@ SquareSubMatrix KBPsiMatrixSparse::computeHvnlMatrix( // (distribution of work AND Hvnlij contributions) for (const auto& ion : ions.local_ions()) { - computeHvnlMatrix((KBPsiMatrixSparse*)kbpsi2, *ion, Aij); + computeHvnlElementsIon((KBPsiMatrixSparse*)kbpsi2, *ion, Aij); } computeHvnlMatrix_tm_.stop(); @@ -470,7 +472,7 @@ void KBPsiMatrixSparse::computeHvnlMatrix( // (distribution of work AND Hvnlij contributions) for (const auto& ion : ions.local_ions()) { - computeHvnlMatrix((KBPsiMatrixSparse*)kbpsi2, *ion, mat); + computeHvnlElementsIon((KBPsiMatrixSparse*)kbpsi2, *ion, mat); } computeHvnlMatrix_tm_.stop(); @@ -480,63 +482,8 @@ void KBPsiMatrixSparse::computeHvnlMatrix( const KBPsiMatrixInterface* const kbpsi2, const Ions& ions, ProjectedMatricesInterface* proj_matrices) const { - computeHvnlMatrix_tm_.start(); - SquareSubMatrix hnlij(computeHvnlMatrix(kbpsi2, ions)); proj_matrices->setLocalMatrixElementsHnl(hnlij); - - computeHvnlMatrix_tm_.stop(); -} - -// build elements of matrix (assumed to be symmetric) -// assemble resulting matrix in variable sparse matrix format -void KBPsiMatrixSparse::getPsiKBPsiSym( - const Ion& ion, VariableSizeMatrix& sm) -{ - std::vector gids; - ion.getGidsNLprojs(gids); - std::vector kbsigns; - ion.getKBsigns(kbsigns); - - const short nprojs = (short)gids.size(); - for (short i = 0; i < nprojs; i++) - { - const int gid = gids[i]; - const double coeff = (double)kbsigns[i]; - int* rindex = (int*)(kbpsimat_->getTableValue(gid)); - if (rindex == nullptr) continue; - const int lrindex = *rindex; - const int nnzrow1 = kbpsimat_->nnzrow(lrindex); - for (int p1 = 0; p1 < nnzrow1; p1++) - { - double kbpsielement1 = kbpsimat_->getRowEntry(lrindex, p1); - if (fabs(kbpsielement1) <= tolKBpsi) continue; - const int st1 = kbpsimat_->getColumnIndex(lrindex, p1); - for (int p2 = 0; p2 < nnzrow1; p2++) - { - double kbpsielement2 = kbpsimat_->getRowEntry(lrindex, p2); - if (fabs(kbpsielement2) <= tolKBpsi) continue; - const double alpha = coeff * kbpsielement1 * kbpsielement2; - /* set hnlij */ - if (fabs(alpha) > tolKBpsi) - { - const int st2 = kbpsimat_->getColumnIndex(lrindex, p2); - sm.insertMatrixElement(st1, st2, alpha, ADD, true); - } - } - } - } -} - -void KBPsiMatrixSparse::getPsiKBPsiSym( - const Ions& ions, VariableSizeMatrix& sm) -{ - // loop over all the ions - // parallelization over ions by including only those centered in subdomain - for (const auto& ion : ions.local_ions()) - { - getPsiKBPsiSym(*ion, sm); - } } template @@ -654,20 +601,26 @@ double KBPsiMatrixSparse::getEvnl( } double KBPsiMatrixSparse::getTraceDM( - const int gid, const DISTMATDTYPE* const mat_X, const int numst) const + const int gid, const double* const mat_X, const int numst) const { + trace_tm_.start(); + double trace = 0.; int* rindex = (int*)(*kbpsimat_).getTableValue(gid); - if (rindex == nullptr) return trace; + if (rindex == nullptr) + { + trace_tm_.stop(); + return trace; + } const int lrindex = *rindex; const int nnzrow1 = kbpsimat_->nnzrow(lrindex); for (int p1 = 0; p1 < nnzrow1; p1++) { - const int st1 = kbpsimat_->getColumnIndex(lrindex, p1); - const double t1 = (*kbpsimat_).getRowEntry(lrindex, p1); - const DISTMATDTYPE* const pmat = &mat_X[st1 * numst]; + const int st1 = kbpsimat_->getColumnIndex(lrindex, p1); + const double t1 = (*kbpsimat_).getRowEntry(lrindex, p1); + const double* const pmat = &mat_X[st1 * numst]; for (int p2 = 0; p2 < nnzrow1; p2++) { @@ -677,6 +630,8 @@ double KBPsiMatrixSparse::getTraceDM( } } + trace_tm_.stop(); + return trace; } diff --git a/src/KBPsiMatrixSparse.h b/src/KBPsiMatrixSparse.h index 073fbddc..82321d1d 100644 --- a/src/KBPsiMatrixSparse.h +++ b/src/KBPsiMatrixSparse.h @@ -66,15 +66,12 @@ class KBPsiMatrixSparse : public KBPsiMatrixInterface return (*kbBpsimat_).get_value(gid, st); } - void computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi, const Ion&, - SquareSubMatrix& mat) const; - void computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, + // private functions working with single Ion + void computeHvnlElementsIon(const KBPsiMatrixSparse* const kbpsi, + const Ion&, SquareSubMatrix& mat) const; + void computeHvnlElementsIon(const KBPsiMatrixSparse* const kbpsi2, const Ion& ion, VariableSizeMatrix& mat) const; - void computeHvnlMatrix(const KBPsiMatrixSparse* const kbpsi2, const Ion&, - ProjectedMatricesInterface*) const; - void getPsiKBPsiSym(const Ions& ions, VariableSizeMatrix& sm); - void getPsiKBPsiSym(const Ion& ion, VariableSizeMatrix& sm); template void computeKBpsi(const Ions& ions, OrbitalsType& orbitals, const int first_color, const int nb_colors, const bool flag); From 15def8cb8a608c1b6395faf556990dfc704c6c52 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 6 Nov 2025 21:15:09 -0500 Subject: [PATCH 81/99] Pass in Hamiltonian object to functions that need it (#381) --- src/DMStrategyFactory.cc | 17 ++++++--- src/DMStrategyFactory.h | 10 +++-- src/Hamiltonian.cc | 17 +++++++++ src/HamiltonianMVPSolver.cc | 14 ++++--- src/HamiltonianMVPSolver.h | 3 ++ src/HamiltonianMVP_DMStrategy.cc | 10 +++-- src/HamiltonianMVP_DMStrategy.h | 9 +++-- src/MGmol.cc | 15 ++++---- src/MGmol.h | 6 --- src/MVPSolver.cc | 10 ++--- src/MVPSolver.h | 9 +++-- src/MVP_DMStrategy.cc | 7 ++-- src/MVP_DMStrategy.h | 12 +++--- src/computeHij.cc | 63 ++------------------------------ 14 files changed, 89 insertions(+), 113 deletions(-) diff --git a/src/DMStrategyFactory.cc b/src/DMStrategyFactory.cc index 7d2132e4..61aec57c 100644 --- a/src/DMStrategyFactory.cc +++ b/src/DMStrategyFactory.cc @@ -7,6 +7,7 @@ DMStrategy* DMStrategyFactory* rho, Energy* energy, Electrostatic* electrostat, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* orbitals, const bool short_sighted) @@ -16,7 +17,7 @@ DMStrategy* DMStrategyFactory* dm_strategy = new HamiltonianMVP_DMStrategy, ProjectedMatricesSparse, LocGridOrbitals>(comm, os, ions, rho, - energy, electrostat, mgmol_strategy, orbitals); + energy, electrostat, hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } @@ -27,7 +28,7 @@ DMStrategy* DMStrategyFactory, ProjectedMatrices>, LocGridOrbitals>(comm, os, ions, rho, energy, electrostat, - mgmol_strategy, orbitals); + hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } @@ -38,6 +39,7 @@ DMStrategy* DMStrategyFactory::createHamiltonianMVP_DMStrategy(MPI_Comm comm, std::ostream& /*os*/, Ions& /*ions*/, Rho* /*rho*/, Energy* /*energy*/, Electrostatic* /*electrostat*/, + Hamiltonian* /*hamiltonian*/, MGmol* /*mgmol_strategy*/, ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* /*orbitals*/, const bool /*short_sighted*/) @@ -55,6 +57,7 @@ DMStrategy* DMStrategyFactory* rho, Energy* energy, Electrostatic* electrostat, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, ProjectedMatricesInterface* /*proj_matrices*/, ExtendedGridOrbitals* orbitals, const bool short_sighted) @@ -64,8 +67,8 @@ DMStrategy* DMStrategyFactory* dm_strategy = new HamiltonianMVP_DMStrategy, ProjectedMatrices>, - ExtendedGridOrbitals>( - comm, os, ions, rho, energy, electrostat, mgmol_strategy, orbitals); + ExtendedGridOrbitals>(comm, os, ions, rho, energy, electrostat, + hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } @@ -75,6 +78,7 @@ DMStrategy* DMStrategyFactory::createHamiltonianMVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, ProjectedMatricesInterface* /*proj_matrices*/, ExtendedGridOrbitals* orbitals, const bool short_sighted) @@ -83,8 +87,9 @@ DMStrategy* DMStrategyFactory* dm_strategy = new HamiltonianMVP_DMStrategy, ExtendedGridOrbitals>( - comm, os, ions, rho, energy, electrostat, mgmol_strategy, orbitals); + ProjectedMatrices, ExtendedGridOrbitals>(comm, os, + ions, rho, energy, electrostat, hamiltonian, mgmol_strategy, + orbitals); return dm_strategy; } diff --git a/src/DMStrategyFactory.h b/src/DMStrategyFactory.h index 24f4f272..11a42c3d 100644 --- a/src/DMStrategyFactory.h +++ b/src/DMStrategyFactory.h @@ -26,7 +26,8 @@ class DMStrategyFactory public: static DMStrategy* create(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, ProjectedMatricesInterface* proj_matrices, OrbitalsType* orbitals) { Control& ct = *(Control::instance()); @@ -36,14 +37,14 @@ class DMStrategyFactory if (ct.DM_solver() == DMNonLinearSolverType::MVP) { dm_strategy = new MVP_DMStrategy(comm, os, - ions, rho, energy, electrostat, mgmol_strategy, + ions, rho, energy, electrostat, hamiltonian, mgmol_strategy, orbitals->getOverlappingGids(), proj_matrices, ct.use_old_dm()); } else if (ct.DM_solver() == DMNonLinearSolverType::HMVP) { dm_strategy = createHamiltonianMVP_DMStrategy(comm, os, ions, rho, - energy, electrostat, mgmol_strategy, proj_matrices, orbitals, - ct.short_sighted); + energy, electrostat, hamiltonian, mgmol_strategy, proj_matrices, + orbitals, ct.short_sighted); } else { @@ -84,6 +85,7 @@ class DMStrategyFactory static DMStrategy* createHamiltonianMVP_DMStrategy( MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, ProjectedMatricesInterface* proj_matrices, OrbitalsType* orbitals, const bool); diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index 7d685e54..5d3683ec 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -216,6 +216,18 @@ void Hamiltonian::addHlocal2matrix( phi1.addDotWithNcol2Matrix(*hlphi_, hij); } +template <> +template <> +void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, + LocGridOrbitals& phi2, ReplicatedMatrix& hij, const bool force) +{ + applyLocal(phi2, force); + + // phi1.addDotWithNcol2Matrix(*hlphi_, hij); + std::cerr << "Not implemented!" << std::endl; + MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); +} + template void Hamiltonian::addHlocalij( T& phi1, T& phi2, ProjectedMatricesInterface* proj_matrices) @@ -293,3 +305,8 @@ template void Hamiltonian::addHlocalij( ExtendedGridOrbitals&, ProjectedMatricesInterface* proj_matrices); template void Hamiltonian::addHlocal2matrix(LocGridOrbitals&, LocGridOrbitals&, VariableSizeMatrix& mat, const bool force); +template void Hamiltonian::addHlocal2matrix(LocGridOrbitals&, + LocGridOrbitals&, dist_matrix::DistMatrix& hij, + const bool force); +template void Hamiltonian::addHlocal2matrix( + LocGridOrbitals&, LocGridOrbitals&, ReplicatedMatrix&, const bool force); diff --git a/src/HamiltonianMVPSolver.cc b/src/HamiltonianMVPSolver.cc index 891767d1..715f52ba 100644 --- a/src/HamiltonianMVPSolver.cc +++ b/src/HamiltonianMVPSolver.cc @@ -37,8 +37,9 @@ template HamiltonianMVPSolver::HamiltonianMVPSolver(std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - const int numst, const short n_inner_steps, const MatrixType& hinit, + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, const int numst, + const short n_inner_steps, const MatrixType& hinit, const bool try_shorter_intervals) : os_(os), n_inner_steps_(n_inner_steps), @@ -50,6 +51,7 @@ HamiltonianMVPSolver::solve( // compute new h11 for the current potential by adding local part to // nonlocal components h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -177,7 +179,7 @@ int HamiltonianMVPSolver::solve( // update H and compute energy at midpoint h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -212,7 +214,7 @@ int HamiltonianMVPSolver::solve( // update H with new potential h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -268,7 +270,7 @@ int HamiltonianMVPSolver::solve( // update H h11 = h11nl; - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); projmatrices->assignH(h11); projmatrices->setHB2H(); diff --git a/src/HamiltonianMVPSolver.h b/src/HamiltonianMVPSolver.h index 3d917b93..06388034 100644 --- a/src/HamiltonianMVPSolver.h +++ b/src/HamiltonianMVPSolver.h @@ -11,6 +11,7 @@ #define MGMOL_HAMILTONIANMVP_SOLVER_H_ #include "Energy.h" +#include "Hamiltonian.h" #include "MGmol.h" #include "Rho.h" #include "Timer.h" @@ -38,6 +39,7 @@ class HamiltonianMVPSolver Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; MGmol* mgmol_strategy_; int numst_; @@ -66,6 +68,7 @@ class HamiltonianMVPSolver public: HamiltonianMVPSolver(std::ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, const int numst, const short n_inner_steps, const MatrixType& hinit, const bool try_shorter_intervals = false); diff --git a/src/HamiltonianMVP_DMStrategy.cc b/src/HamiltonianMVP_DMStrategy.cc index 681568d5..c0ef609e 100644 --- a/src/HamiltonianMVP_DMStrategy.cc +++ b/src/HamiltonianMVP_DMStrategy.cc @@ -22,14 +22,15 @@ template HamiltonianMVP_DMStrategy::HamiltonianMVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - OrbitalsType* orbitals) + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, OrbitalsType* orbitals) : comm_(comm), os_(os), ions_(ions), rho_(rho), energy_(energy), electrostat_(electrostat), + hamiltonian_(hamiltonian), global_indexes_(orbitals->getOverlappingGids()), mgmol_strategy_(mgmol_strategy) { @@ -44,8 +45,9 @@ HamiltonianMVP_DMStrategy( - os_, ions_, rho_, energy_, electrostat_, mgmol_strategy_, ct.numst, - ct.dm_inner_steps, projmatrices->getH(), true); + os_, ions_, rho_, energy_, electrostat_, hamiltonian_, + mgmol_strategy_, ct.numst, ct.dm_inner_steps, projmatrices->getH(), + true); } template diff --git a/src/HamiltonianMVP_DMStrategy.h b/src/HamiltonianMVP_DMStrategy.h index da0e58b1..5f174903 100644 --- a/src/HamiltonianMVP_DMStrategy.h +++ b/src/HamiltonianMVP_DMStrategy.h @@ -11,13 +11,13 @@ #define MGMOL_HamiltonianMVP_DMStrategy_H #include "DMStrategy.h" +#include "Electrostatic.h" #include "Energy.h" #include "HamiltonianMVPSolver.h" +#include "Ions.h" #include "MGmol.h" #include "Rho.h" -class Ions; -class Electrostatic; template class MGmol; @@ -32,6 +32,7 @@ class HamiltonianMVP_DMStrategy : public DMStrategy Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; const std::vector>& global_indexes_; MGmol* mgmol_strategy_; @@ -40,8 +41,8 @@ class HamiltonianMVP_DMStrategy : public DMStrategy public: HamiltonianMVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - OrbitalsType* orbitals); + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, OrbitalsType* orbitals); ~HamiltonianMVP_DMStrategy() override; diff --git a/src/MGmol.cc b/src/MGmol.cc index b692d7a6..13ca51e6 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -461,12 +461,13 @@ int MGmol::initial() dm_strategy_.reset( DMStrategyFactory::create(comm_, os_, *ions_, rho_.get(), energy_.get(), electrostat_.get(), - this, proj_matrices_.get(), current_orbitals_)); + hamiltonian_.get(), this, proj_matrices_.get(), + current_orbitals_)); else dm_strategy_.reset(DMStrategyFactory>::create(comm_, os_, *ions_, - rho_.get(), energy_.get(), electrostat_.get(), this, - proj_matrices_.get(), current_orbitals_)); + rho_.get(), energy_.get(), electrostat_.get(), hamiltonian_.get(), + this, proj_matrices_.get(), current_orbitals_)); // theta = invB * Hij proj_matrices_->updateThetaAndHB(); @@ -1502,8 +1503,8 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, { std::shared_ptr> dm_strategy( DMStrategyFactory::create(comm_, - os_, ions, rho_.get(), energy_.get(), electrostat_.get(), this, - proj_matrices_.get(), dorbitals)); + os_, ions, rho_.get(), energy_.get(), electrostat_.get(), + hamiltonian_.get(), this, proj_matrices_.get(), dorbitals)); dm_strategy->update(*dorbitals); } @@ -1512,8 +1513,8 @@ double MGmol::evaluateDMandEnergyAndForces(Orbitals* orbitals, std::shared_ptr> dm_strategy( DMStrategyFactory>::create(comm_, os_, ions, - rho_.get(), energy_.get(), electrostat_.get(), this, - proj_matrices_.get(), dorbitals)); + rho_.get(), energy_.get(), electrostat_.get(), + hamiltonian_.get(), this, proj_matrices_.get(), dorbitals)); dm_strategy->update(*dorbitals); } diff --git a/src/MGmol.h b/src/MGmol.h index 6eb1c8c0..7bd26236 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -254,12 +254,6 @@ class MGmol : public MGmolInterface const Ions& ions, const KBPsiMatrixSparse* const kbpsi, ProjectedMatricesInterface*); - template - void addHlocal2matrix( - OrbitalsType& orbitalsi, OrbitalsType& orbitalsj, MatrixType& mat); - void addHlocal2matrix(OrbitalsType& orbitalsi, OrbitalsType& orbitalsj, - VariableSizeMatrix& mat); - void update_pot(const pb::GridFunc& vh_init, const Ions& ions); void update_pot(const Ions& ions); int quench(OrbitalsType& orbitals, Ions& ions, const int max_steps, diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 8cd4fc8a..29021a95 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -44,8 +44,8 @@ double evalEntropyMVP(ProjectedMatricesInterface* projmatrices, template MVPSolver::MVPSolver(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - const int numst, const double kbT, + Electrostatic* electrostat, Hamiltonian* hamiltonian, + MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, const short n_inner_steps, const double mixing, const double tol_de0, const bool use_old_dm) @@ -69,6 +69,7 @@ MVPSolver::MVPSolver(MPI_Comm comm, std::ostream& os, rho_ = rho; energy_ = energy; electrostat_ = electrostat; + hamiltonian_ = hamiltonian; mgmol_strategy_ = mgmol_strategy; work_ = new MatrixType("workMVP", numst_, numst_); @@ -238,7 +239,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) // compute h11 for the current potential by adding local part to // nonlocal components MatrixType h11(h11_nl); - mgmol_strategy_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); current_proj_mat->assignH(h11); current_proj_mat->setHB2H(); @@ -318,8 +319,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) // update h11 { h11 = h11_nl; - mgmol_strategy_->addHlocal2matrix( - orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); } proj_mat_work_->assignH(h11); diff --git a/src/MVPSolver.h b/src/MVPSolver.h index 2e0c36b6..ac5022cc 100644 --- a/src/MVPSolver.h +++ b/src/MVPSolver.h @@ -10,6 +10,7 @@ #define MGMOL_MVPSOLVER_H #include "Energy.h" +#include "Hamiltonian.h" #include "MGmol.h" #include "Rho.h" #include "Timer.h" @@ -42,6 +43,7 @@ class MVPSolver Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; MGmol* mgmol_strategy_; @@ -56,10 +58,9 @@ class MVPSolver void buildTarget_MVP(MatrixType& h11, MatrixType& s11, MatrixType& target); public: - MVPSolver(MPI_Comm comm, std::ostream& os, Ions& ions, - Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, - const int numst, const double kbT, + MVPSolver(MPI_Comm comm, std::ostream& os, Ions& ions, Rho*, + Energy*, Electrostatic*, Hamiltonian*, + MGmol* mgmol_strategy, const int numst, const double kbT, const std::vector>& global_indexes, const short n_inner_steps, const double mixing, const double tol_de0, const bool use_old_dm); diff --git a/src/MVP_DMStrategy.cc b/src/MVP_DMStrategy.cc index e8737028..1a5cf95b 100644 --- a/src/MVP_DMStrategy.cc +++ b/src/MVP_DMStrategy.cc @@ -24,7 +24,7 @@ template MVP_DMStrategy::MVP_DMStrategy(MPI_Comm comm, ostream& os, Ions& ions, Rho* rho, Energy* energy, Electrostatic* electrostat, - MGmol* mgmol_strategy, + Hamiltonian* hamiltonian, MGmol* mgmol_strategy, const std::vector>& overlappingGids, ProjectedMatricesInterface* proj_matrices, const bool use_old_dm) : proj_matrices_(proj_matrices), @@ -34,6 +34,7 @@ MVP_DMStrategy::MVP_DMStrategy(MPI_Comm comm, rho_(rho), energy_(energy), electrostat_(electrostat), + hamiltonian_(hamiltonian), global_indexes_(overlappingGids), mgmol_strategy_(mgmol_strategy), use_old_dm_(use_old_dm) @@ -53,8 +54,8 @@ int MVP_DMStrategy::update(OrbitalsType& orbitals) } MVPSolver solver(comm_, os_, ions_, rho_, energy_, - electrostat_, mgmol_strategy_, ct.numst, ct.occ_width, global_indexes_, - ct.dm_inner_steps, ct.dm_mix, ct.dm_tol, use_old_dm_); + electrostat_, hamiltonian_, mgmol_strategy_, ct.numst, ct.occ_width, + global_indexes_, ct.dm_inner_steps, ct.dm_mix, ct.dm_tol, use_old_dm_); return solver.solve(orbitals); } diff --git a/src/MVP_DMStrategy.h b/src/MVP_DMStrategy.h index 2c16fbcc..04dbcad8 100644 --- a/src/MVP_DMStrategy.h +++ b/src/MVP_DMStrategy.h @@ -11,18 +11,18 @@ #define MGMOL_MVP_DMStrategy_H #include "DMStrategy.h" +#include "Electrostatic.h" #include "Energy.h" +#include "Hamiltonian.h" +#include "Ions.h" #include "MGmol.h" +#include "ProjectedMatricesInterface.h" #include "Rho.h" #include #include #include -class ProjectedMatricesInterface; -class Ions; -class Electrostatic; - template class MVP_DMStrategy : public DMStrategy { @@ -36,6 +36,7 @@ class MVP_DMStrategy : public DMStrategy Rho* rho_; Energy* energy_; Electrostatic* electrostat_; + Hamiltonian* hamiltonian_; const std::vector>& global_indexes_; MGmol* mgmol_strategy_; @@ -44,7 +45,8 @@ class MVP_DMStrategy : public DMStrategy public: MVP_DMStrategy(MPI_Comm comm, std::ostream& os, Ions& ions, Rho* rho, Energy* energy, - Electrostatic* electrostat, MGmol* mgmol_strategy, + Electrostatic* electrostat, Hamiltonian*, + MGmol* mgmol_strategy, const std::vector>& overlappingGids, ProjectedMatricesInterface* proj_matrices, const bool use_old_dm); diff --git a/src/computeHij.cc b/src/computeHij.cc index d25834a8..d229c89d 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -28,34 +28,6 @@ #include "ReplicatedMatrix.h" #include "SquareSubMatrix2DistMatrix.h" -template <> -void MGmol::addHlocal2matrix(LocGridOrbitals& orbitalsi, - LocGridOrbitals& orbitalsj, VariableSizeMatrix& mat) -{ - computeHij_tm_.start(); - -#ifdef PRINT_OPERATIONS - os_ << " addHlocal2matrix() at line " << __LINE__ << std::endl; -#endif - - hamiltonian_->addHlocal2matrix(orbitalsi, orbitalsj, mat, true); - - computeHij_tm_.stop(); -} - -template <> -template <> -void MGmol::addHlocal2matrix( - LocGridOrbitals& orbitalsi, LocGridOrbitals& orbitalsj, ReplicatedMatrix& H) -{ - (void)orbitalsi; - (void)orbitalsj; - (void)H; - - std::cerr << "Not implemented!" << std::endl; - MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE); -} - template <> template <> void MGmol::computeHij(LocGridOrbitals& orbitals_i, @@ -72,7 +44,7 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, kbpsi_i->computeHvnlMatrix(kbpsi_j, ions, mat); // add local Hamiltonian part to phi_i^T*H*phi_j - addHlocal2matrix(orbitals_i, orbitals_j, mat); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, mat, true); // sum matrix elements among processors if (consolidate) @@ -112,7 +84,7 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, kbpsi->computeHvnlMatrix(ions, mat); // add local Hamiltonian part to phi^T*H*phi - addHlocal2matrix(orbitals_i, orbitals_j, mat); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, mat, true); // sum matrix elements among processors if (consolidate) @@ -170,7 +142,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, ss2dm->accumulate(submat, hij, 0.); // add local Hamiltonian part to phi^T*H*phi - addHlocal2matrix(orbitals_i, orbitals_j, hij); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, hij, true); } template <> @@ -225,7 +197,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, ss2dm->accumulate(submat, hij, 0.); // add local Hamiltonian part to phi^T*H*phi - addHlocal2matrix(orbitals_i, orbitals_j, hij); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, hij); } template @@ -352,23 +324,6 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, hphi.setIterativeIndex(phi.getIterativeIndex()); } -template -template -void MGmol::addHlocal2matrix( - OrbitalsType& orbitalsi, OrbitalsType& orbitalsj, MatrixType& mat) -{ - computeHij_tm_.start(); - -#ifdef PRINT_OPERATIONS - os_ << " addHlocal2matrix()" << std::endl; -#endif - - // add local H to mat - hamiltonian_->addHlocal2matrix(orbitalsi, orbitalsj, mat); - - computeHij_tm_.stop(); -} - template void MGmol::getHpsiAndTheta( Ions& ions, OrbitalsType& phi, OrbitalsType& hphi) @@ -433,13 +388,3 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, template class MGmol; template class MGmol; - -template void MGmol::addHlocal2matrix( - ExtendedGridOrbitals& orbitalsi, ExtendedGridOrbitals& orbitalsj, - dist_matrix::DistMatrix&); -template void MGmol::addHlocal2matrix( - LocGridOrbitals& orbitalsi, LocGridOrbitals& orbitalsj, - dist_matrix::DistMatrix&); -template void MGmol::addHlocal2matrix( - ExtendedGridOrbitals& orbitalsi, ExtendedGridOrbitals& orbitalsj, - ReplicatedMatrix& mat); From 04764af2eea48e134e5a8ff51a1ab0a61d810f19 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 12 Nov 2025 08:38:07 -0500 Subject: [PATCH 82/99] Template preconditioner (#382) * Use more shared_ptr in OrbitalsPreconditioning * Template OrbitalsPreconditioning on P datatype --- src/MGmol.cc | 2 +- src/MGmol.h | 3 +- src/OrbitalsPreconditioning.cc | 86 ++++++++++++++++------------------ src/OrbitalsPreconditioning.h | 29 +++++------- src/quench.cc | 3 +- 5 files changed, 59 insertions(+), 64 deletions(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index 13ca51e6..c20fef32 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -960,7 +960,7 @@ void MGmol::printTimers() ChebyshevApproximation< dist_matrix::DistMatrix>::printTimers(os_); } - OrbitalsPreconditioning::printTimers(os_); + OrbitalsPreconditioning::printTimers(os_); MDfiles::printTimers(os_); ChebyshevApproximationInterface::printTimers(os_); } diff --git a/src/MGmol.h b/src/MGmol.h index 7bd26236..9c400b54 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -100,7 +100,8 @@ class MGmol : public MGmolInterface std::shared_ptr h5f_file_; - std::shared_ptr> orbitals_precond_; + std::shared_ptr> + orbitals_precond_; double total_energy_; std::shared_ptr constraints_; diff --git a/src/OrbitalsPreconditioning.cc b/src/OrbitalsPreconditioning.cc index 79aa5ee4..7b63cbed 100644 --- a/src/OrbitalsPreconditioning.cc +++ b/src/OrbitalsPreconditioning.cc @@ -19,20 +19,17 @@ #include "Preconditioning.h" #include "ProjectedMatricesInterface.h" -template -OrbitalsPreconditioning::~OrbitalsPreconditioning() +template +OrbitalsPreconditioning::~OrbitalsPreconditioning() { assert(is_set_); - assert(precond_ != nullptr); - - delete precond_; - delete map2masks_; + assert(precond_); } -template -void OrbitalsPreconditioning::setup(T& orbitals, const short mg_levels, - const short lap_type, MasksSet* currentMasks, - const std::shared_ptr& lrs) +template +void OrbitalsPreconditioning::setup( + OrbitalsType& orbitals, const short mg_levels, const short lap_type, + MasksSet* currentMasks, const std::shared_ptr& lrs) { assert(!is_set_); @@ -42,18 +39,17 @@ void OrbitalsPreconditioning::setup(T& orbitals, const short mg_levels, Mesh* mymesh = Mesh::instance(); const pb::Grid& mygrid(mymesh->grid()); - precond_ = new Preconditioning( + precond_ = std::make_shared>( lap_type, mg_levels, mygrid, ct.bcWF); if (currentMasks != nullptr) { // set masks in GridFuncVector class - map2masks_ = new Map2Masks(currentMasks, lrs->getOverlapGids()); - pb::GridFuncVector::setMasks( - map2masks_); + map2masks_ + = std::make_shared(currentMasks, lrs->getOverlapGids()); + pb::GridFuncVector::setMasks( + map2masks_.get()); } - else - map2masks_ = nullptr; precond_->setup(orbitals.getOverlappingGids()); @@ -61,34 +57,32 @@ void OrbitalsPreconditioning::setup(T& orbitals, const short mg_levels, == static_cast(orbitals.getOverlappingGids()[0].size())); gfv_work1_ - = std::shared_ptr>( - new pb::GridFuncVector(mygrid, - ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], - orbitals.getOverlappingGids())); + = std::make_shared>( + mygrid, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids()); gfv_work2_ - = std::shared_ptr>( - new pb::GridFuncVector(mygrid, - ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], - orbitals.getOverlappingGids())); + = std::make_shared>( + mygrid, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids()); - if (!std::is_same::value) + if (sizeof(ORBDTYPE) != sizeof(PDataType)) gfv_work3_ - = std::shared_ptr>( - new pb::GridFuncVector(mygrid, - ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], - orbitals.getOverlappingGids())); + = std::make_shared>( + mygrid, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2], + orbitals.getOverlappingGids()); is_set_ = true; assert(gfv_work2_); } -template -void OrbitalsPreconditioning::precond_mg(T& orbitals) +template +void OrbitalsPreconditioning::precond_mg( + OrbitalsType& orbitals) { assert(is_set_); - assert(precond_ != nullptr); + assert(precond_); assert(gamma_ > 0.); assert(gfv_work1_); @@ -98,7 +92,7 @@ void OrbitalsPreconditioning::precond_mg(T& orbitals) precond_tm_.start(); // initialize gfv_work2_ with data from orbitals - if (std::is_same::value) + if (sizeof(ORBDTYPE) == sizeof(PDataType)) { orbitals.setDataWithGhosts(gfv_work2_.get()); } @@ -113,12 +107,12 @@ void OrbitalsPreconditioning::precond_mg(T& orbitals) } gfv_work1_->resetData(); - gfv_work1_->axpy((MGPRECONDTYPE)gamma_, *gfv_work2_); + gfv_work1_->axpy((PDataType)gamma_, *gfv_work2_); // block-implemented preconditioner precond_->mg(*gfv_work1_, *gfv_work2_, lap_type_, 0); - if (std::is_same::value) + if (sizeof(ORBDTYPE) == sizeof(PDataType)) { orbitals.setPsi(*gfv_work1_); } @@ -133,18 +127,19 @@ void OrbitalsPreconditioning::precond_mg(T& orbitals) #ifdef PRINT_OPERATIONS if (onpe0) - (*MPIdata::sout) << "OrbitalsPreconditioning::precond_mg() done" + (*MPIdata::sout) << "OrbitalsPreconditioning::" + "precond_mg() done" << endl; #endif precond_tm_.stop(); } -template -void OrbitalsPreconditioning::setGamma(const pb::Lap& lapOper, - const Potentials& pot, const short mg_levels, - ProjectedMatricesInterface* proj_matrices) +template +void OrbitalsPreconditioning::setGamma( + const pb::Lap& lapOper, const Potentials& pot, + const short mg_levels, ProjectedMatricesInterface* proj_matrices) { - assert(precond_ != nullptr); + assert(precond_); assert(is_set_); const double small_eig = proj_matrices->getLowestEigenvalue(); @@ -167,11 +162,12 @@ void OrbitalsPreconditioning::setGamma(const pb::Lap& lapOper, #endif } -template -void OrbitalsPreconditioning::printTimers(std::ostream& os) +template +void OrbitalsPreconditioning::printTimers( + std::ostream& os) { precond_tm_.print(os); } -template class OrbitalsPreconditioning; -template class OrbitalsPreconditioning; +template class OrbitalsPreconditioning; +template class OrbitalsPreconditioning; diff --git a/src/OrbitalsPreconditioning.h b/src/OrbitalsPreconditioning.h index 6937d4f9..0f85e11d 100644 --- a/src/OrbitalsPreconditioning.h +++ b/src/OrbitalsPreconditioning.h @@ -23,7 +23,7 @@ class ProjectedMatricesInterface; class Potentials; class LocalizationRegions; -template +template class OrbitalsPreconditioning { private: @@ -33,15 +33,15 @@ class OrbitalsPreconditioning using memory_space_type = MemorySpace::Host; #endif - Preconditioning* precond_; + std::shared_ptr> precond_; // work arrays with preconditioner precision - std::shared_ptr> + std::shared_ptr> gfv_work1_; - std::shared_ptr> + std::shared_ptr> gfv_work2_; - // tmp work array for case ORBDTYPE!=MGPRECONDTYPE + // tmp work array for case ORBDTYPE!=PDataType std::shared_ptr> gfv_work3_; short lap_type_; @@ -54,27 +54,24 @@ class OrbitalsPreconditioning // timers static Timer precond_tm_; - Map2Masks* map2masks_; + std::shared_ptr map2masks_; public: - OrbitalsPreconditioning() - { - is_set_ = false; - precond_ = nullptr; - }; + OrbitalsPreconditioning() { is_set_ = false; }; ~OrbitalsPreconditioning(); - void setup(T& orbitals, const short mg_levels, const short lap_type, - MasksSet*, const std::shared_ptr&); - void precond_mg(T& orbitals); + void setup(OrbitalsType& orbitals, const short mg_levels, + const short lap_type, MasksSet*, + const std::shared_ptr&); + void precond_mg(OrbitalsType& orbitals); void setGamma(const pb::Lap& lapOper, const Potentials& pot, const short mg_levels, ProjectedMatricesInterface* proj_matrices); static void printTimers(std::ostream& os); }; -template -Timer OrbitalsPreconditioning::precond_tm_( +template +Timer OrbitalsPreconditioning::precond_tm_( "OrbitalsPreconditioning::precond"); #endif diff --git a/src/quench.cc b/src/quench.cc index 561ea263..87b3239e 100644 --- a/src/quench.cc +++ b/src/quench.cc @@ -572,7 +572,8 @@ int MGmol::quench(OrbitalsType& orbitals, Ions& ions, applyAOMMprojection(orbitals); } - orbitals_precond_.reset(new OrbitalsPreconditioning()); + orbitals_precond_.reset( + new OrbitalsPreconditioning()); orbitals_precond_->setup( orbitals, ct.getMGlevels(), ct.lap_type, currentMasks_.get(), lrs_); From 1cb7a49d05baa04b1ba6d0ecf77ea6da4cd3d61c Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 17 Nov 2025 19:31:48 -0500 Subject: [PATCH 83/99] Use local Hphi in Davidson (#383) --- src/DavidsonSolver.cc | 31 +++++++++++++++++++------------ src/Electrostatic.cc | 3 ++- src/Hamiltonian.h | 3 +-- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index b5cbb5ba..59c303e6 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -424,7 +424,7 @@ int DavidsonSolver::solve( os_ << "DavidsonSolver -> Iteration " << outer_it << std::endl; os_ << "###########################" << std::endl; } - OrbitalsType tmp_orbitals("Davidson_tmp", orbitals); + OrbitalsType hphi("Davidson_hphi", orbitals); MatrixType dm2Ninit("dm2N", 2 * numst_, 2 * numst_); std::vector eval(2 * numst_); MatrixType evect("EigVect", 2 * numst_, 2 * numst_); @@ -485,9 +485,9 @@ int DavidsonSolver::solve( orbitals.getProjMatrices()); assert(projmatrices != nullptr); - // get H*psi stored in work_orbitals + // get H*phi stored in hphi // h11 computed at the same time - mgmol_strategy_->computePrecondResidual(orbitals, tmp_orbitals, + mgmol_strategy_->computePrecondResidual(orbitals, hphi, work_orbitals, ions_, &kbpsi_1, false, false); projmatrices->setHB2H(); @@ -525,15 +525,19 @@ int DavidsonSolver::solve( else { h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->applyLocal(numst_, orbitals, hphi); + orbitals.addDotWithNcol2Matrix(hphi, h11); } - // update h22, h12 and h21 - h22 = h22nl; - hamiltonian_->addHlocal2matrix(work_orbitals, work_orbitals, h22); + // compute H*P and store in hphi + hamiltonian_->applyLocal(numst_, work_orbitals, hphi); + // update h22, h12 and h21 h12 = h12nl; - hamiltonian_->addHlocal2matrix(orbitals, work_orbitals, h12); + orbitals.addDotWithNcol2Matrix(hphi, h12); + + h22 = h22nl; + work_orbitals.addDotWithNcol2Matrix(hphi, h22); h21.transpose(1., h12, 0.); @@ -606,17 +610,20 @@ int DavidsonSolver::solve( // update h11, h22, h12, and h21 h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->applyLocal(numst_, orbitals, hphi); + orbitals.addDotWithNcol2Matrix(hphi, h11); + + hamiltonian_->applyLocal(numst_, work_orbitals, hphi); h22 = h22nl; - hamiltonian_->addHlocal2matrix( - work_orbitals, work_orbitals, h22); + work_orbitals.addDotWithNcol2Matrix(hphi, h22); h12 = h12nl; - hamiltonian_->addHlocal2matrix(orbitals, work_orbitals, h12); + orbitals.addDotWithNcol2Matrix(hphi, h12); h21.transpose(1., h12, 0.); + // assemble 2N x 2N Hamiltonian proj_mat2N_->assignBlocksH(h11, h12, h21, h22); proj_mat2N_->setHB2H(); diff --git a/src/Electrostatic.cc b/src/Electrostatic.cc index aba09d07..869bfc70 100644 --- a/src/Electrostatic.cc +++ b/src/Electrostatic.cc @@ -319,7 +319,8 @@ void Electrostatic::computeVh(const Ions& ions, Rho& rho, Potentials& pot) eepsilon_ = 0.; } - iterative_index_ = rho.getIterativeIndex(); + iterative_index_ = pot.getIterativeIndex(); + iterative_index_++; pot.setVh(poisson_solver_->vh(), iterative_index_); if (diel_flag_) diff --git a/src/Hamiltonian.h b/src/Hamiltonian.h index 31b84a2e..08194f87 100644 --- a/src/Hamiltonian.h +++ b/src/Hamiltonian.h @@ -27,8 +27,6 @@ class Hamiltonian static Timer apply_Hloc_tm_; - void applyLocal(const int nstates, OrbitalsType& phi, OrbitalsType& hphi); - public: static Timer apply_Hloc_tm() { return apply_Hloc_tm_; } @@ -42,6 +40,7 @@ class Hamiltonian pb::Lap* lapOper() { return lapOper_; } const OrbitalsType& applyLocal(OrbitalsType& phi, const bool force = false); + void applyLocal(const int nstates, OrbitalsType& phi, OrbitalsType& hphi); template void addHlocal2matrix(OrbitalsType& orbitals1, OrbitalsType& orbitals2, From e24340ae200a1bbc83dfcdf94da6179549057023 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 19 Nov 2025 21:14:22 -0500 Subject: [PATCH 84/99] Residual compute (#384) * Refactor H application and R computation * Rm optional args to Hamiltonian::addHlocal2matrix() --- src/Hamiltonian.cc | 4 ++- src/Hamiltonian.h | 2 +- src/HamiltonianMVPSolver.cc | 8 +++--- src/MGmol.cc | 55 +++++++++++-------------------------- src/MGmol.h | 11 ++++++-- src/MVPSolver.cc | 9 +++--- src/computeHij.cc | 19 ++++++------- 7 files changed, 46 insertions(+), 62 deletions(-) diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index 5d3683ec..c7efa2f6 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -141,7 +141,7 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) for (int i = 0; i < ncolors; i++) { using memory_space_type = typename T::memory_space_type; - ORBDTYPE* ihphi = hphi.getPsi(i); + auto ihphi = hphi.getPsi(i); unsigned int const size = hphi.getNumpt(); ORBDTYPE* ihphi_host_view = MemorySpace::Memory::allocate_host_view(size); @@ -221,6 +221,8 @@ template <> void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, LocGridOrbitals& phi2, ReplicatedMatrix& hij, const bool force) { + (void)hij; + applyLocal(phi2, force); // phi1.addDotWithNcol2Matrix(*hlphi_, hij); diff --git a/src/Hamiltonian.h b/src/Hamiltonian.h index 08194f87..0c87ceda 100644 --- a/src/Hamiltonian.h +++ b/src/Hamiltonian.h @@ -44,7 +44,7 @@ class Hamiltonian template void addHlocal2matrix(OrbitalsType& orbitals1, OrbitalsType& orbitals2, - MatrixType& mat, const bool force = false); + MatrixType& mat, const bool force); void addHlocalij(OrbitalsType& orbitals1, OrbitalsType& orbitals2, ProjectedMatricesInterface*); void addHlocalij(OrbitalsType& orbitals1, ProjectedMatricesInterface*); diff --git a/src/HamiltonianMVPSolver.cc b/src/HamiltonianMVPSolver.cc index 715f52ba..17f1f66b 100644 --- a/src/HamiltonianMVPSolver.cc +++ b/src/HamiltonianMVPSolver.cc @@ -151,7 +151,7 @@ int HamiltonianMVPSolver::solve( // compute new h11 for the current potential by adding local part to // nonlocal components h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -179,7 +179,7 @@ int HamiltonianMVPSolver::solve( // update H and compute energy at midpoint h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -214,7 +214,7 @@ int HamiltonianMVPSolver::solve( // update H with new potential h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); @@ -270,7 +270,7 @@ int HamiltonianMVPSolver::solve( // update H h11 = h11nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); projmatrices->assignH(h11); projmatrices->setHB2H(); diff --git a/src/MGmol.cc b/src/MGmol.cc index c20fef32..2c61447d 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -1171,33 +1171,28 @@ void MGmol::projectOutKernel(OrbitalsType& phi) } template -void MGmol::setGamma( - const pb::Lap& lapOper, const Potentials& pot) +void MGmol::precond_mg(OrbitalsType& phi) { assert(orbitals_precond_); Control& ct = *(Control::instance()); - orbitals_precond_->setGamma( - lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); -} + Potentials& pot = hamiltonian_->potential(); + pb::Lap* lapOper = hamiltonian_->lapOper(); -template -void MGmol::precond_mg(OrbitalsType& phi) -{ - assert(orbitals_precond_); + orbitals_precond_->setGamma( + *lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); orbitals_precond_->precond_mg(phi); } template -double MGmol::computeResidual(OrbitalsType& orbitals, - OrbitalsType& work_orbitals, Ions& ions, OrbitalsType& res, - const bool print_residual, const bool norm_res) +double MGmol::computeResidual(OrbitalsType& phi, + OrbitalsType& hphi, Ions& ions, OrbitalsType& res, + const KBPsiMatrixSparse* const kbpsi, const bool print_residual, + const bool norm_res) { - assert(orbitals.getIterativeIndex() >= 0); - comp_res_tm_.start(); // os_<<"computeResidual()"<::computeResidual(OrbitalsType& orbitals, proj_matrices_->computeInvB(); - Potentials& pot = hamiltonian_->potential(); - pb::Lap* lapop = hamiltonian_->lapOper(); - - setGamma(*lapop, pot); - - // get H*psi stored in work_orbitals.psi + // get H*phi stored in hphi // and psi^T H psi in Hij - getHpsiAndTheta(ions, orbitals, work_orbitals); + getHpsiAndTheta(ions, phi, hphi, kbpsi); - double norm2Res = computeConstraintResidual( - orbitals, work_orbitals, res, print_residual, norm_res); + double norm2Res + = computeConstraintResidual(phi, hphi, res, print_residual, norm_res); - if (ct.isSpreadFunctionalEnergy()) addResidualSpreadPenalty(orbitals, res); + if (ct.isSpreadFunctionalEnergy()) addResidualSpreadPenalty(phi, res); comp_res_tm_.stop(); @@ -1344,19 +1334,8 @@ double MGmol::computePrecondResidual(OrbitalsType& phi, { Control& ct = *(Control::instance()); - proj_matrices_->computeInvB(); - - Potentials& pot = hamiltonian_->potential(); - pb::Lap* lapop = hamiltonian_->lapOper(); - - setGamma(*lapop, pot); - - // get H*psi stored in hphi - // and psi^T H psi in Hij - getHpsiAndTheta(ions, phi, hphi, kbpsi); - - double norm2Res - = computeConstraintResidual(phi, hphi, res, print_residual, norm_res); + double norm2Res = computeResidual( + phi, hphi, ions, res, kbpsi, print_residual, norm_res); if (ct.withPreconditioner()) { @@ -1366,8 +1345,6 @@ double MGmol::computePrecondResidual(OrbitalsType& phi, orbitals_precond_->precond_mg(res); } - // if( ct.isSpreadFunctionalActive() )addResidualSpreadPenalty(phi,res); - return norm2Res; } diff --git a/src/MGmol.h b/src/MGmol.h index 9c400b54..9cdc345d 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -321,10 +321,17 @@ class MGmol : public MGmolInterface void projectOutKernel(OrbitalsType& phi); void precond_mg(OrbitalsType& orbitals); - void setGamma(const pb::Lap& lapOper, const Potentials& pot); + double computeResidual(OrbitalsType& orbitals, OrbitalsType& work_orbitals, + Ions& ions, OrbitalsType& res, const KBPsiMatrixSparse* const kbpsi, + const bool print_residual, const bool norm_res); double computeResidual(OrbitalsType& orbitals, OrbitalsType& work_orbitals, Ions& ions, OrbitalsType& res, const bool print_residual, - const bool norm_res); + const bool norm_res) + { + return computeResidual(orbitals, work_orbitals, ions, res, + g_kbpsi_.get(), print_residual, norm_res); + } + void applyAOMMprojection(OrbitalsType&); void force(OrbitalsType& orbitals, Ions& ions) { diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 29021a95..787c9bdb 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -239,7 +239,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) // compute h11 for the current potential by adding local part to // nonlocal components MatrixType h11(h11_nl); - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); + hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); current_proj_mat->assignH(h11); current_proj_mat->setHB2H(); @@ -317,10 +317,9 @@ int MVPSolver::solve(OrbitalsType& orbitals) energy_->saveVofRho(); // update h11 - { - h11 = h11_nl; - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11); - } + h11 = h11_nl; + hamiltonian_->addHlocal2matrix( + orbitals, orbitals, h11, false); proj_mat_work_->assignH(h11); proj_mat_work_->setHB2H(); diff --git a/src/computeHij.cc b/src/computeHij.cc index d229c89d..2786d4c6 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -197,7 +197,7 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, ss2dm->accumulate(submat, hij, 0.); // add local Hamiltonian part to phi^T*H*phi - hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, hij); + hamiltonian_->addHlocal2matrix(orbitals_i, orbitals_j, hij, false); } template @@ -324,13 +324,6 @@ void MGmol::computeHnlPhiAndAdd2HPhi(Ions& ions, hphi.setIterativeIndex(phi.getIterativeIndex()); } -template -void MGmol::getHpsiAndTheta( - Ions& ions, OrbitalsType& phi, OrbitalsType& hphi) -{ - getHpsiAndTheta(ions, phi, hphi, g_kbpsi_.get()); -} - template void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, OrbitalsType& hphi, const KBPsiMatrixSparse* const kbpsi) @@ -345,7 +338,7 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, os_ << " getHpsiAndTheta" << std::endl; #endif - hphi.assign(hamiltonian_->applyLocal(phi)); + hamiltonian_->applyLocal(phi.chromatic_number(), phi, hphi); // Compute "nstates" columns of matrix // Hij = phi**T * H_loc * phi and save in sh @@ -370,7 +363,13 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, kbpsi->computeHvnlMatrix(ions, proj_matrices_.get()); // add local part of H to sh - hamiltonian_->addHlocalij(phi, proj_matrices_.get()); + SquareLocalMatrices slm( + phi.subdivx(), phi.chromatic_number()); + + phi.computeLocalProduct(hphi, slm); + proj_matrices_->setLocalMatrixElementsHl(slm); + + proj_matrices_->consolidateH(); energy_->saveVofRho(); From 5bf70e361a7a46ea66e373e533872b162d24dfa5 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 25 Nov 2025 13:00:01 -0500 Subject: [PATCH 85/99] Fix bug in preconditioner setup (#385) * affecting Davidson only --- src/MGmol.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index 2c61447d..44ee6bf2 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -1342,7 +1342,7 @@ double MGmol::computePrecondResidual(OrbitalsType& phi, // PRECONDITIONING // compute the preconditioned steepest descent direction // -> res - orbitals_precond_->precond_mg(res); + precond_mg(res); } return norm2Res; From deeae86252cabe6d36a30bf196b4646f084830d8 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 25 Nov 2025 13:12:54 -0500 Subject: [PATCH 86/99] Fix potential iterative index (#386) * was failing in some assert --- src/Electrostatic.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Electrostatic.cc b/src/Electrostatic.cc index 869bfc70..aba09d07 100644 --- a/src/Electrostatic.cc +++ b/src/Electrostatic.cc @@ -319,8 +319,7 @@ void Electrostatic::computeVh(const Ions& ions, Rho& rho, Potentials& pot) eepsilon_ = 0.; } - iterative_index_ = pot.getIterativeIndex(); - iterative_index_++; + iterative_index_ = rho.getIterativeIndex(); pot.setVh(poisson_solver_->vh(), iterative_index_); if (diel_flag_) From 0e0f0ea607724be62d529451b55ecd67fff2ae74 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Wed, 26 Nov 2025 13:22:36 -0500 Subject: [PATCH 87/99] Refactor Hartree_CG (#387) * Template Hartree_CG on preconditioner data type * Enable runtime precision selection for Hatree_CG preconditioner --- src/Control.cc | 22 +++-- src/Control.h | 5 ++ src/Hartree_CG.cc | 75 +++++++--------- src/Hartree_CG.h | 23 +++-- src/PCGSolver.cc | 125 ++++++++++++++------------- src/PCGSolver.h | 51 ++++++----- src/Poisson.h | 9 +- src/PoissonSolverFactory.h | 173 ++++++++++++++++++++++--------------- src/global.h | 2 - src/read_config.cc | 4 +- 10 files changed, 263 insertions(+), 226 deletions(-) diff --git a/src/Control.cc b/src/Control.cc index d6d26107..59858e67 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -44,6 +44,7 @@ Control::Control() poisson_pc_nu1 = 2; poisson_pc_nu2 = 2; poisson_pc_nlev = 10; + poisson_pc_data_ = 32; coloring_algo_ = 0; maxDistanceAtomicInfo_ = 8.; spread_factor = 2.; @@ -331,7 +332,7 @@ void Control::sync(void) if (onpe0 && verbose > 0) (*MPIdata::sout) << "Control::sync()" << std::endl; // pack - const short size_short_buffer = 91; + const short size_short_buffer = 92; short* short_buffer = new short[size_short_buffer]; if (mype_ == 0) { @@ -421,6 +422,7 @@ void Control::sync(void) short_buffer[88] = hartree_reset_; short_buffer[89] = MD_last_step_; short_buffer[90] = (short)static_cast(poisson_lap_type_); + short_buffer[91] = poisson_pc_data_; } else { @@ -634,6 +636,7 @@ void Control::sync(void) hartree_reset_ = short_buffer[88]; MD_last_step_ = short_buffer[89]; poisson_lap_type_ = static_cast(short_buffer[90]); + poisson_pc_data_ = short_buffer[91]; numst = int_buffer[0]; nel_ = int_buffer[1]; @@ -1406,14 +1409,15 @@ void Control::setOptions(const boost::program_options::variables_map& vm) bool poisson_reset = vm["Poisson.reset"].as(); hartree_reset_ = poisson_reset ? 1 : 0; - poisson_pc_nu1 = vm["Poisson.nu1"].as(); - poisson_pc_nu2 = vm["Poisson.nu2"].as(); - vh_init = vm["Poisson.max_steps_initial"].as(); - vh_its = vm["Poisson.max_steps"].as(); - poisson_pc_nlev = vm["Poisson.max_levels"].as(); - rho0_ = vm["Poisson.rho0"].as(); - drho0_ = vm["Poisson.beta"].as(); - e0_ = vm["Poisson.e0"].as(); + poisson_pc_nu1 = vm["Poisson.nu1"].as(); + poisson_pc_nu2 = vm["Poisson.nu2"].as(); + vh_init = vm["Poisson.max_steps_initial"].as(); + vh_its = vm["Poisson.max_steps"].as(); + poisson_pc_nlev = vm["Poisson.max_levels"].as(); + rho0_ = vm["Poisson.rho0"].as(); + drho0_ = vm["Poisson.beta"].as(); + e0_ = vm["Poisson.e0"].as(); + poisson_pc_data_ = vm["Poisson.precond_precision"].as(); str = vm["ProjectedMatrices.solver"].as(); if (str.compare("short_sighted") == 0) short_sighted = 1; diff --git a/src/Control.h b/src/Control.h index 7b1bca65..9d83a984 100644 --- a/src/Control.h +++ b/src/Control.h @@ -403,6 +403,11 @@ class Control short poisson_pc_nu2; short poisson_pc_nlev; + /*! + * Poisson preconditioner precision (32 or 64) + */ + short poisson_pc_data_; + PoissonFDtype poisson_lap_type_; short lap_type; diff --git a/src/Hartree_CG.cc b/src/Hartree_CG.cc index 375237f8..2418015c 100644 --- a/src/Hartree_CG.cc +++ b/src/Hartree_CG.cc @@ -6,13 +6,8 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE - -#include -#include -using namespace std; - -#include "Control.h" #include "Hartree_CG.h" +#include "Control.h" #include "MultipoleExpansion.h" #include "Laph2.h" @@ -22,15 +17,16 @@ using namespace std; #include "Laph6.h" #include "Laph8.h" -// Timer Poisson::poisson_tm_("Poisson::poisson"); +#include +#include -template -void Hartree_CG::solve( - const pb::GridFunc& rho, const pb::GridFunc& rhoc) +template +void Hartree_CG::solve( + const pb::GridFunc& rho, const pb::GridFunc& rhoc) { PoissonInterface::poisson_tm_.start(); - pb::GridFunc work_rho(rho); + pb::GridFunc work_rho(rho); Control& ct = *(Control::instance()); // Keep in memory vh*rho before updating vh @@ -48,7 +44,7 @@ void Hartree_CG::solve( if (Poisson::bc_[i] == 2) dim_mpol++; //(*MPIdata::sout)<<"dim_mpol="< bc_func( + pb::GridFunc bc_func( Poisson::grid_, Poisson::bc_[0], Poisson::bc_[1], Poisson::bc_[2]); if (dim_mpol > 0) { @@ -70,53 +66,40 @@ void Hartree_CG::solve( } } - /* Check for uniform precision before calling poisson_solver. - * Downgrade or upgrade rhs (work_rho) to have precision of solution (vh_). - * Note that this could be done at the beginning of this function, but - * several operations involving rho might be done in lower precision - * (depending on POTDTYPE), which could affect accuracy. For now, we delay - * the switch until just before the solve call. - */ - // if(sizeof(POTDTYPE) != sizeof(RHODTYPE)) - // { - /* solve with POTDTYPE precision */ - pb::GridFunc rhs(work_rho); + pb::GridFunc rhs(work_rho); rhs *= (4. * M_PI); poisson_solver_->solve(*Poisson::vh_, rhs); - // } - // else - // { - // poisson_solver_->solve(*Poisson::vh_, work_rho); - // } - - double residual_reduction = poisson_solver_->getResidualReduction(); - double final_residual = poisson_solver_->getFinalResidual(); + + const double residual_reduction = poisson_solver_->getResidualReduction(); + const double final_residual = poisson_solver_->getFinalResidual(); const bool large_residual = (residual_reduction > 1.e-3 || final_residual > 1.e-3); if (onpe0 && (large_residual || ct.verbose > 1)) - (*MPIdata::sout) << setprecision(2) << scientific + (*MPIdata::sout) << std::setprecision(2) << std::scientific << "Hartree_CG: residual reduction = " << residual_reduction - << ", final residual = " << final_residual << endl; + << ", final residual = " << final_residual + << std::endl; Poisson::Int_vhrho_ = vel * Poisson::vh_->gdot(rho); Poisson::Int_vhrhoc_ = vel * Poisson::vh_->gdot(rhoc); PoissonInterface::poisson_tm_.stop(); - assert(residual_reduction == residual_reduction); + assert(!std::isnan(residual_reduction)); } -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; -template class Hartree_CG>; -// template class Hartree_CG >; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; +template class Hartree_CG, double, float>; + +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; +template class Hartree_CG, double, double>; diff --git a/src/Hartree_CG.h b/src/Hartree_CG.h index 78e9da65..e1a59657 100644 --- a/src/Hartree_CG.h +++ b/src/Hartree_CG.h @@ -7,28 +7,33 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#ifndef _HARTREE_CG_H_ -#define _HARTREE_CG_H_ +#ifndef MGMOL_HARTREE_CG_H +#define MGMOL_HARTREE_CG_H #include "PCGSolver.h" #include "Poisson.h" -template +#include + +template class Hartree_CG : public Poisson { private: - PCGSolver* poisson_solver_; + std::shared_ptr> + poisson_solver_; public: // Constructor Hartree_CG(const pb::Grid& grid, const short bc[3]) : Poisson(grid, bc) { - T oper(Poisson::grid_); - poisson_solver_ = new PCGSolver(oper, bc[0], bc[1], bc[2]); + OperatorType oper(Poisson::grid_); + poisson_solver_ + = std::make_shared>( + oper, bc[0], bc[1], bc[2]); }; // Destructor - ~Hartree_CG() override { delete poisson_solver_; } + ~Hartree_CG() override {} void setup(const short nu1, const short nu2, const short max_sweeps, const double tol, const short max_nlevels, @@ -38,8 +43,8 @@ class Hartree_CG : public Poisson poisson_solver_->setup(nu1, nu2, max_sweeps, tol, max_nlevels); } - void solve(const pb::GridFunc& rho, - const pb::GridFunc& rhoc) override; + void solve(const pb::GridFunc& rho, + const pb::GridFunc& rhoc) override; }; #endif diff --git a/src/PCGSolver.cc b/src/PCGSolver.cc index b3bd7ec3..7a5fd157 100644 --- a/src/PCGSolver.cc +++ b/src/PCGSolver.cc @@ -12,8 +12,8 @@ #include #include -template -void PCGSolver::clear() +template +void PCGSolver::clear() { for (short i = 0; i < (short)precond_oper_.size(); i++) { @@ -34,7 +34,7 @@ void PCGSolver::clear() assert(gf_newv_[i] != nullptr); delete gf_newv_[i]; } - // delete grids after pb::GridFunc objects since those + // delete grids after pb::GridFunc objects since those // have data members references to grids for (short i = 0; i < (short)grid_.size(); i++) { @@ -47,10 +47,10 @@ void PCGSolver::clear() gf_newv_.clear(); } -template -void PCGSolver::setupPrecon() +template +void PCGSolver::setupPrecon() { - // check if precon is already setup + // check if preconditioner is already setup // Assumes operator does not change, hence // a single setup is sufficient if (is_precond_setup_) return; @@ -60,13 +60,12 @@ void PCGSolver::setupPrecon() grid_.push_back(mygrid); const short nghosts = mygrid->ghost_pt(); - pb::Lap* myoper - = LapFactory::createLap(*grid_[0], lap_type_); + pb::Lap* myoper + = LapFactory::createLap(*grid_[0], precond_lap_type_); precond_oper_.push_back(myoper); - pb::GridFunc* gf_work - = new pb::GridFunc( - *grid_[0], bc_[0], bc_[1], bc_[2]); + pb::GridFunc* gf_work + = new pb::GridFunc(*grid_[0], bc_[0], bc_[1], bc_[2]); gf_work_.push_back(gf_work); // coarse levels @@ -89,20 +88,20 @@ void PCGSolver::setupPrecon() pb::Grid* coarse_grid = new pb::Grid(mygrid->coarse_grid()); grid_.push_back(coarse_grid); - pb::Lap* myoper - = LapFactory::createLap(*coarse_grid, 1); + pb::Lap* myoper + = LapFactory::createLap(*coarse_grid, 1); precond_oper_.push_back(myoper); - gf_work = new pb::GridFunc( + gf_work = new pb::GridFunc( *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_work_.push_back(gf_work); - pb::GridFunc* gf_rcoarse - = new pb::GridFunc( + pb::GridFunc* gf_rcoarse + = new pb::GridFunc( *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_rcoarse_.push_back(gf_rcoarse); - pb::GridFunc* gf_newv - = new pb::GridFunc( + pb::GridFunc* gf_newv + = new pb::GridFunc( *coarse_grid, bc_[0], bc_[1], bc_[2]); gf_newv_.push_back(gf_newv); @@ -112,10 +111,10 @@ void PCGSolver::setupPrecon() } // MG V-cycle with no mask -template -void PCGSolver::preconSolve( - pb::GridFunc& gf_v, - const pb::GridFunc& gf_f, const short level) +template +void PCGSolver::preconSolve( + pb::GridFunc& gf_v, + const pb::GridFunc& gf_f, const short level) { //(*MPIdata::sout)<<"Preconditioning::mg() at level "<::preconSolve( ncycl = 4 > (nu1_ + nu2_) ? 4 : (nu1_ + nu2_); } - pb::Lap* myoper = precond_oper_[level]; + pb::Lap* myoper = precond_oper_[level]; - // SMOOTHING + // pre-smoothing for (short it = 0; it < ncycl; it++) { myoper->jacobi(gf_v, gf_f, *gf_work_[level]); @@ -137,11 +136,11 @@ void PCGSolver::preconSolve( // COARSE GRID CORRECTION // restrictions - pb::GridFunc* rcoarse = gf_rcoarse_[level]; + pb::GridFunc* rcoarse = gf_rcoarse_[level]; gf_work_[level]->restrict3D(*rcoarse); // storage functions for coarse grid - pb::GridFunc* newv = gf_newv_[level]; + pb::GridFunc* newv = gf_newv_[level]; // call mgrid solver on a coarser level newv->resetData(); @@ -161,28 +160,29 @@ void PCGSolver::preconSolve( } // Left Preconditioned CG -template -bool PCGSolver::solve( - pb::GridFunc& gf_phi, const pb::GridFunc& gf_rhs) +template +bool PCGSolver::solve( + pb::GridFunc& gf_phi, + const pb::GridFunc& gf_rhs) { bool converged = false; const pb::Grid& finegrid = gf_phi.grid(); // initial data and residual - We assume a nonzero initial guess - pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc lhs(finegrid, bc_[0], bc_[1], bc_[2]); // scale initial guess with epsilon oper_.inv_transform(gf_phi); // compute initial residual: r := b - Ax /* compute Ax */ oper_.apply(gf_phi, lhs); - /* set r = b */ - pb::GridFunc res(gf_rhs); + // set r = b + pb::GridFunc res(gf_rhs); oper_.transform(res); - /* compute r = r - Ax */ + // compute r = r - Ax res -= lhs; - double init_rnorm = res.norm2(); - assert(init_rnorm == init_rnorm); + const double init_rnorm = res.norm2(); + assert(!std::isnan(init_rnorm)); // cout<<"init_rnorm="<::solve( double rnorm = init_rnorm; - /* preconditioned residual as type POISSONPRECONDTYPE */ - pb::GridFunc prec_z(finegrid, bc_[0], bc_[1], bc_[2]); - pb::GridFunc prec_res(res); + /* preconditioned residual as type PrecondDataType */ + pb::GridFunc prec_z(finegrid, bc_[0], bc_[1], bc_[2]); + pb::GridFunc prec_res(res); /* preconditioning step */ prec_z.setValues(0.); preconSolve(prec_z, prec_res, 0); - pb::GridFunc z(prec_z); + pb::GridFunc z(prec_z); // conjugate vectors - pb::GridFunc p(prec_z); - pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc p(prec_z); + pb::GridFunc ap(p.grid(), bc_[0], bc_[1], bc_[2]); double rtz = res.gdot(z); @@ -213,7 +213,7 @@ bool PCGSolver::solve( double ptap = p.gdot(ap); double alp = rtz / ptap; - assert(alp == alp); + assert(!std::isnan(alp)); // update solution gf_phi.axpy(alp, p); @@ -248,15 +248,14 @@ bool PCGSolver::solve( return converged; } -// Left Preconditioned CG -template -bool PCGSolver::solve( - ScalarType* phi, ScalarType* rhs, const char dis) +template +bool PCGSolver::solve( + ScalarDataType* phi, ScalarDataType* rhs, const char dis) { - pb::GridFunc gf_phi(oper_.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc gf_phi(oper_.grid(), bc_[0], bc_[1], bc_[2]); gf_phi.assign(phi, dis); - pb::GridFunc gf_work(oper_.grid(), bc_[0], bc_[1], bc_[2]); + pb::GridFunc gf_work(oper_.grid(), bc_[0], bc_[1], bc_[2]); gf_work.assign(rhs, dis); bool converged = solve(gf_phi, gf_work); @@ -266,15 +265,21 @@ bool PCGSolver::solve( return converged; } -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; -template class PCGSolver, double>; -template class PCGSolver, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; +template class PCGSolver, double, double>; +template class PCGSolver, double, float>; +template class PCGSolver, float, float>; diff --git a/src/PCGSolver.h b/src/PCGSolver.h index 04e1028f..0782775a 100644 --- a/src/PCGSolver.h +++ b/src/PCGSolver.h @@ -17,23 +17,23 @@ #include -template +template class PCGSolver { private: std::vector grid_; - short lap_type_; + short precond_lap_type_; short bc_[3]; bool fully_periodic_; // operator to solve for - T oper_; + OperatorType oper_; // preconditioner operator for each MG level - std::vector*> precond_oper_; - std::vector*> gf_work_; - std::vector*> gf_rcoarse_; - std::vector*> gf_newv_; + std::vector*> precond_oper_; + std::vector*> gf_work_; + std::vector*> gf_rcoarse_; + std::vector*> gf_newv_; // solver parameters int maxiters_; @@ -48,23 +48,24 @@ class PCGSolver short nlevels_; bool is_precond_setup_; - void preconSolve(pb::GridFunc& gf_v, - const pb::GridFunc& gf_f, const short level = 0); + void preconSolve(pb::GridFunc& gf_v, + const pb::GridFunc& gf_f, const short level = 0); void setupPrecon(); void clear(); public: - PCGSolver(T& oper, const short px, const short py, const short pz) - : oper_(oper) + PCGSolver( + OperatorType& oper, const short px, const short py, const short pz) + : oper_(oper), + maxiters_(10), + tol_(1.e-16), + final_residual_(-1.), + residual_reduction_(-1.), + nu1_(2), + nu2_(2), + max_nlevels_(10), + is_precond_setup_(false) { - maxiters_ = 10; // default - nu1_ = 2; // default - nu2_ = 2; // default - tol_ = 1.e-16; - max_nlevels_ = 10; - final_residual_ = -1.; - residual_reduction_ = -1.; - // boundary conditions bc_[0] = px; bc_[1] = py; @@ -72,8 +73,7 @@ class PCGSolver fully_periodic_ = ((bc_[0] == 1) && (bc_[1] == 1) && (bc_[2] == 1)); Control& ct = *(Control::instance()); - lap_type_ = ct.lap_type; - is_precond_setup_ = false; + precond_lap_type_ = ct.lap_type; }; void setup(const short nu1, const short nu2, const short max_sweeps, @@ -87,10 +87,13 @@ class PCGSolver setupPrecon(); } - bool solve(pb::GridFunc& gf_phi, - const pb::GridFunc& gf_rhs); + bool solve(pb::GridFunc& gf_phi, + const pb::GridFunc& gf_rhs); - bool solve(ScalarType* phi, ScalarType* rhs, const char dis); + /*! + * Interface for raw pointers + */ + bool solve(ScalarDataType* phi, ScalarDataType* rhs, const char dis); double getFinalResidual() const { return final_residual_; } double getResidualReduction() const { return residual_reduction_; } diff --git a/src/Poisson.h b/src/Poisson.h index ba5daa39..b64c0301 100644 --- a/src/Poisson.h +++ b/src/Poisson.h @@ -7,9 +7,8 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -// $Id$ -#ifndef included_Poisson -#define included_Poisson +#ifndef MGMOL_included_Poisson +#define MGMOL_included_Poisson #include "PoissonInterface.h" @@ -24,8 +23,6 @@ class Poisson : public PoissonInterface { protected: - // static Timer poisson_tm_; - const pb::Grid& grid_; pb::GridFunc* vh_; @@ -50,7 +47,7 @@ class Poisson : public PoissonInterface }; // Destructor - ~Poisson() override { delete vh_; }; + virtual ~Poisson() override { delete vh_; }; virtual void setup(const short nu1, const short nu2, const short max_sweeps, const double tol, const short max_nlevels, diff --git a/src/PoissonSolverFactory.h b/src/PoissonSolverFactory.h index 91b99bf2..879dfc8a 100644 --- a/src/PoissonSolverFactory.h +++ b/src/PoissonSolverFactory.h @@ -12,24 +12,79 @@ #include "Control.h" #include "Hartree.h" #include "Hartree_CG.h" -#include "Mesh.h" -#include "PBdiel.h" -#include "PBdiel_CG.h" -#include "ShiftedHartree.h" -#include "mputils.h" - -#include "GridFunc.h" #include "Laph2.h" #include "Laph4.h" #include "Laph4M.h" #include "Laph4MP.h" #include "Laph6.h" #include "Laph8.h" +#include "MGmol_MPI.h" +#include "Mesh.h" +#include "PBdiel.h" +#include "PBdiel_CG.h" +#include "ShiftedHartree.h" #include "ShiftedLaph4M.h" -class PoissonSolverFactory +/*! + * Create Hartree_CG solver templated on data type and preconditioner data type + */ +template +Poisson* createHartreeCG( + PoissonFDtype lap_type, const pb::Grid& myGrid, const short bc[3]) { + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + if (mmpi.instancePE0()) + { + std::cout << "create HartreeCG with precision " + << 8 * sizeof(ScalarType) << std::endl; + std::cout << "HartreeCG with preconditioner in precision " + << 8 * sizeof(PDataType) << std::endl; + } + Poisson* poisson_solver = nullptr; + + switch (lap_type) + { + case PoissonFDtype::h4M: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h2: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h4: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h6: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h8: + poisson_solver + = new Hartree_CG, ScalarType, PDataType>( + myGrid, bc); + break; + case PoissonFDtype::h4MP: + poisson_solver = new Hartree_CG, ScalarType, + PDataType>(myGrid, bc); + break; + default: + std::cerr << "createHartreeCG(), Undefined option: " + << static_cast(lap_type) << std::endl; + } + return poisson_solver; +} +/*! + * Main factory + */ +class PoissonSolverFactory +{ public: /*! * return specific Poisson solver needed to solve Hartree problem @@ -48,13 +103,13 @@ class PoissonSolverFactory { case PoissonFDtype::h4M: poisson_solver - = new ShiftedHartree>( + = new ShiftedHartree>( myGrid, bc, screening_const); break; default: - (*MPIdata::sout) - << "Electrostatic, shifted, Undefined option: " - << static_cast(lap_type) << std::endl; + std::cerr << "PoissonSolverFactory, shifted, Undefined " + "option: " + << static_cast(lap_type) << std::endl; } } else @@ -63,32 +118,31 @@ class PoissonSolverFactory { case PoissonFDtype::h4M: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h2: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h4: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h6: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h8: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; case PoissonFDtype::h4MP: poisson_solver - = new Hartree>(myGrid, bc); + = new Hartree>(myGrid, bc); break; default: - (*MPIdata::sout) - << "Electrostatic, Undefined option: " - << static_cast(lap_type) << std::endl; + std::cerr << "PoissonSolverFactory, Undefined option: " + << static_cast(lap_type) << std::endl; } } } @@ -100,47 +154,30 @@ class PoissonSolverFactory { case PoissonFDtype::h4M: poisson_solver - = new ShiftedHartree>( + = new ShiftedHartree>( myGrid, bc, screening_const); break; default: - (*MPIdata::sout) - << "PCG Electrostatic, shifted, Undefined option: " + std::cerr + << "PoissonSolverFactory, with screening_const, " + "Undefined option: " << static_cast(lap_type) << std::endl; } } else { - switch (lap_type) + const short precision = ct.poisson_pc_data_; + if (precision == 32) + poisson_solver + = createHartreeCG(lap_type, myGrid, bc); + else if (precision == 64) + poisson_solver + = createHartreeCG(lap_type, myGrid, bc); + else { - case PoissonFDtype::h4M: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h2: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h4: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h6: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h8: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - case PoissonFDtype::h4MP: - poisson_solver - = new Hartree_CG>(myGrid, bc); - break; - default: - (*MPIdata::sout) - << "PCG Electrostatic, Undefined option: " - << static_cast(lap_type) << std::endl; + std::cerr + << "PoissonSolverFactory: Unknown precision option " + << precision << std::endl; } } } @@ -160,32 +197,31 @@ class PoissonSolverFactory switch (lap_type) { case PoissonFDtype::h4M: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h2: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h6: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h8: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4MP: - poisson_solver = new PBdiel>( + poisson_solver = new PBdiel>( pbGrid, bc, e0, rho0, drho0); break; default: - (*MPIdata::sout) - << "Electrostatic, Undefined option" << std::endl; + std::cerr << "createDiel(), Undefined option" << std::endl; } } else // use PCG for Poisson Solver @@ -193,32 +229,31 @@ class PoissonSolverFactory switch (lap_type) { case PoissonFDtype::h4M: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h2: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h6: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h8: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; case PoissonFDtype::h4MP: - poisson_solver = new PBdiel_CG>( + poisson_solver = new PBdiel_CG>( pbGrid, bc, e0, rho0, drho0); break; default: - (*MPIdata::sout) - << "Electrostatic, Undefined option" << std::endl; + std::cerr << "createDiel(), Undefined option" << std::endl; } } return poisson_solver; diff --git a/src/global.h b/src/global.h index 54353f24..8af7da35 100644 --- a/src/global.h +++ b/src/global.h @@ -34,6 +34,4 @@ typedef double POTDTYPE; typedef double KBPROJDTYPE; -typedef float POISSONPRECONDTYPE; - #endif diff --git a/src/read_config.cc b/src/read_config.cc index 9e9bad39..4d520b74 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -96,7 +96,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, po::value()->default_value("periodic"), "boundary condition z")("Poisson.diel", po::value()->default_value("off"), - "continuum solvent: on/off")("Run.type", + "continuum solvent: on/off")("Poisson.precond_precision", + po::value()->default_value(32), + "Precision for Poisson Preconditioner")("Run.type", po::value()->default_value("QUENCH"), "Run type")( "Quench.solver", po::value()->default_value("ABPG"), "Iterative solver for quench")("Quench.max_steps", From 86fdec69887cbb86f9810010dbfcfea250086c8e Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 27 Nov 2025 19:59:23 -0500 Subject: [PATCH 88/99] Preconditioning precision (#388) * Redesign OrbitalsPreconditioning * Make orbitals preconditioner precision a runtime option --- src/CMakeLists.txt | 2 +- src/Control.cc | 16 ++-- src/Control.h | 2 + ...ioning.cc => MGOrbitalsPreconditioning.cc} | 38 +++++---- src/MGOrbitalsPreconditioning.h | 79 +++++++++++++++++++ src/MGmol.cc | 36 +++++++-- src/MGmol.h | 5 +- src/OrbitalsPreconditioning.h | 66 +++------------- src/Preconditioning.cc | 1 + src/global.h | 2 - src/md.cc | 1 - src/quench.cc | 28 +++++-- src/read_config.cc | 4 +- tests/SiH4/mgmol.cfg | 1 + 14 files changed, 185 insertions(+), 96 deletions(-) rename src/{OrbitalsPreconditioning.cc => MGOrbitalsPreconditioning.cc} (78%) create mode 100644 src/MGOrbitalsPreconditioning.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 27caa790..21f02177 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -44,7 +44,7 @@ set(SOURCES HamiltonianMVP_DMStrategy.cc MVPSolver.cc HamiltonianMVPSolver.cc - OrbitalsPreconditioning.cc + MGOrbitalsPreconditioning.cc DFTsolver.cc NonOrthoDMStrategy.cc FullyOccupiedNonOrthoDMStrategy.cc diff --git a/src/Control.cc b/src/Control.cc index 59858e67..a76abb03 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -263,7 +263,8 @@ void Control::print(std::ostream& os) os << " Localization radius = " << cut_radius << std::endl; os << std::endl; - os << " preconditioner factor:" << precond_factor << std::endl; + os << " preconditioner factor: " << precond_factor << std::endl; + os << " preconditioner precision: " << precond_precision_ << std::endl; if (precond_type_ == 10) { os << " Multigrid preconditioning for wave functions:" << std::endl; @@ -332,7 +333,7 @@ void Control::sync(void) if (onpe0 && verbose > 0) (*MPIdata::sout) << "Control::sync()" << std::endl; // pack - const short size_short_buffer = 92; + const short size_short_buffer = 93; short* short_buffer = new short[size_short_buffer]; if (mype_ == 0) { @@ -423,6 +424,7 @@ void Control::sync(void) short_buffer[89] = MD_last_step_; short_buffer[90] = (short)static_cast(poisson_lap_type_); short_buffer[91] = poisson_pc_data_; + short_buffer[92] = precond_precision_; } else { @@ -635,8 +637,9 @@ void Control::sync(void) max_electronic_steps_tight_ = short_buffer[86]; hartree_reset_ = short_buffer[88]; MD_last_step_ = short_buffer[89]; - poisson_lap_type_ = static_cast(short_buffer[90]); - poisson_pc_data_ = short_buffer[91]; + poisson_lap_type_ = static_cast(short_buffer[90]); + poisson_pc_data_ = short_buffer[91]; + precond_precision_ = short_buffer[92]; numst = int_buffer[0]; nel_ = int_buffer[1]; @@ -1479,8 +1482,9 @@ void Control::setOptions(const boost::program_options::variables_map& vm) std::cout << "Outer solver type: " << str << std::endl; assert(it_algo_type_ >= 0); - mg_levels_ = vm["Quench.preconditioner_num_levels"].as() - 1; - precond_factor = vm["Quench.step_length"].as(); + mg_levels_ = vm["Quench.preconditioner_num_levels"].as() - 1; + precond_precision_ = vm["Quench.preconditioner_precision"].as(); + precond_factor = vm["Quench.step_length"].as(); if (precond_factor < 0.) { switch (lap_type) diff --git a/src/Control.h b/src/Control.h index 9d83a984..c0b28d31 100644 --- a/src/Control.h +++ b/src/Control.h @@ -414,6 +414,8 @@ class Control short orthof; // orthogonalization frequency + short precond_precision_; + // screening constant for potential mixing float screening_const; diff --git a/src/OrbitalsPreconditioning.cc b/src/MGOrbitalsPreconditioning.cc similarity index 78% rename from src/OrbitalsPreconditioning.cc rename to src/MGOrbitalsPreconditioning.cc index 7b63cbed..a12df271 100644 --- a/src/OrbitalsPreconditioning.cc +++ b/src/MGOrbitalsPreconditioning.cc @@ -7,7 +7,7 @@ // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -#include "OrbitalsPreconditioning.h" +#include "MGOrbitalsPreconditioning.h" #include "Control.h" #include "ExtendedGridOrbitals.h" @@ -20,27 +20,30 @@ #include "ProjectedMatricesInterface.h" template -OrbitalsPreconditioning::~OrbitalsPreconditioning() +MGOrbitalsPreconditioning::MGOrbitalsPreconditioning( + const short mg_levels, const short lap_type) + : mg_levels_(mg_levels), lap_type_(lap_type), is_set_(false){}; + +template +MGOrbitalsPreconditioning::~MGOrbitalsPreconditioning() { assert(is_set_); assert(precond_); } template -void OrbitalsPreconditioning::setup( - OrbitalsType& orbitals, const short mg_levels, const short lap_type, - MasksSet* currentMasks, const std::shared_ptr& lrs) +void MGOrbitalsPreconditioning::setup( + OrbitalsType& orbitals, MasksSet* currentMasks, + const std::shared_ptr& lrs) { assert(!is_set_); - lap_type_ = lap_type; - Control& ct(*(Control::instance())); Mesh* mymesh = Mesh::instance(); const pb::Grid& mygrid(mymesh->grid()); precond_ = std::make_shared>( - lap_type, mg_levels, mygrid, ct.bcWF); + lap_type_, mg_levels_, mygrid, ct.bcWF); if (currentMasks != nullptr) { @@ -78,7 +81,7 @@ void OrbitalsPreconditioning::setup( } template -void OrbitalsPreconditioning::precond_mg( +void MGOrbitalsPreconditioning::precond( OrbitalsType& orbitals) { assert(is_set_); @@ -127,15 +130,16 @@ void OrbitalsPreconditioning::precond_mg( #ifdef PRINT_OPERATIONS if (onpe0) - (*MPIdata::sout) << "OrbitalsPreconditioning::" - "precond_mg() done" - << endl; + (*MPIdata::sout) + << "MGOrbitalsPreconditioning::" + "precond_mg() done" + << endl; #endif precond_tm_.stop(); } template -void OrbitalsPreconditioning::setGamma( +void MGOrbitalsPreconditioning::setGamma( const pb::Lap& lapOper, const Potentials& pot, const short mg_levels, ProjectedMatricesInterface* proj_matrices) { @@ -163,11 +167,13 @@ void OrbitalsPreconditioning::setGamma( } template -void OrbitalsPreconditioning::printTimers( +void MGOrbitalsPreconditioning::printTimers( std::ostream& os) { precond_tm_.print(os); } -template class OrbitalsPreconditioning; -template class OrbitalsPreconditioning; +template class MGOrbitalsPreconditioning; +template class MGOrbitalsPreconditioning; +template class MGOrbitalsPreconditioning; +template class MGOrbitalsPreconditioning; diff --git a/src/MGOrbitalsPreconditioning.h b/src/MGOrbitalsPreconditioning.h new file mode 100644 index 00000000..87ffa3a9 --- /dev/null +++ b/src/MGOrbitalsPreconditioning.h @@ -0,0 +1,79 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#ifndef MGMOL_MGOrbitalsPreconditioning_H +#define MGMOL_MGOrbitalsPreconditioning_H + +#include "GridFuncVector.h" +#include "Lap.h" +#include "Map2Masks.h" +#include "OrbitalsPreconditioning.h" +#include "Preconditioning.h" + +#include + +// class Masks4Orbitals; +// class MasksSet; +class ProjectedMatricesInterface; +class Potentials; +// class LocalizationRegions; + +template +class MGOrbitalsPreconditioning : public OrbitalsPreconditioning +{ +private: +#ifdef HAVE_MAGMA + using memory_space_type = MemorySpace::Device; +#else + using memory_space_type = MemorySpace::Host; +#endif + + std::shared_ptr> precond_; + + // work arrays with preconditioner precision + std::shared_ptr> + gfv_work1_; + std::shared_ptr> + gfv_work2_; + + // tmp work array for case ORBDTYPE!=PDataType + std::shared_ptr> gfv_work3_; + + short mg_levels_; + + short lap_type_; + + bool is_set_; + + // coefficient for preconditioning + double gamma_; + + // timers + static Timer precond_tm_; + + std::shared_ptr map2masks_; + +public: + MGOrbitalsPreconditioning(const short mg_levels, const short lap_type); + + ~MGOrbitalsPreconditioning(); + + void setup(OrbitalsType& orbitals, MasksSet*, + const std::shared_ptr&) override; + void precond(OrbitalsType& orbitals) override; + void setGamma(const pb::Lap& lapOper, const Potentials& pot, + const short mg_levels, ProjectedMatricesInterface* proj_matrices); + static void printTimers(std::ostream& os); +}; + +template +Timer MGOrbitalsPreconditioning::precond_tm_( + "MGOrbitalsPreconditioning::precond"); + +#endif diff --git a/src/MGmol.cc b/src/MGmol.cc index 44ee6bf2..4275b290 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -42,6 +42,7 @@ #include "LocalMatrices2ReplicatedMatrix.h" #include "LocalizationRegions.h" #include "MDfiles.h" +#include "MGOrbitalsPreconditioning.h" #include "MGkernels.h" #include "MGmol.h" #include "MLWFTransform.h" @@ -49,7 +50,6 @@ #include "MVPSolver.h" #include "MasksSet.h" #include "Mesh.h" -#include "OrbitalsPreconditioning.h" #include "PackedCommunicationBuffer.h" #include "PoissonInterface.h" #include "Potentials.h" @@ -960,7 +960,8 @@ void MGmol::printTimers() ChebyshevApproximation< dist_matrix::DistMatrix>::printTimers(os_); } - OrbitalsPreconditioning::printTimers(os_); + MGOrbitalsPreconditioning::printTimers(os_); + MGOrbitalsPreconditioning::printTimers(os_); MDfiles::printTimers(os_); ChebyshevApproximationInterface::printTimers(os_); } @@ -1180,10 +1181,35 @@ void MGmol::precond_mg(OrbitalsType& phi) Potentials& pot = hamiltonian_->potential(); pb::Lap* lapOper = hamiltonian_->lapOper(); - orbitals_precond_->setGamma( - *lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); + const short precision = ct.precond_precision_; + if (precision == 32) + { + using OrbitalsPrecond = MGOrbitalsPreconditioning; + + std::shared_ptr orbitals_precond + = std::dynamic_pointer_cast(orbitals_precond_); + + orbitals_precond->setGamma( + *lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); + } + else if (precision == 64) + { + using OrbitalsPrecond = MGOrbitalsPreconditioning; + + std::shared_ptr orbitals_precond + = std::dynamic_pointer_cast(orbitals_precond_); + + orbitals_precond->setGamma( + *lapOper, pot, ct.getMGlevels(), proj_matrices_.get()); + } + else + { + std::cerr << "Precision " << precision + << " not supported for orbitals preconditioner!!!" + << std::endl; + } - orbitals_precond_->precond_mg(phi); + orbitals_precond_->precond(phi); } template diff --git a/src/MGmol.h b/src/MGmol.h index 9cdc345d..2975fe42 100644 --- a/src/MGmol.h +++ b/src/MGmol.h @@ -46,9 +46,9 @@ class IonicAlgorithm; #include "Forces.h" #include "Ions.h" #include "LocGridOrbitals.h" +#include "MGOrbitalsPreconditioning.h" #include "MGmolInterface.h" #include "OrbitalsExtrapolation.h" -#include "OrbitalsPreconditioning.h" #include "Rho.h" #include "SpreadPenaltyInterface.h" #include "SpreadsAndCenters.h" @@ -100,8 +100,7 @@ class MGmol : public MGmolInterface std::shared_ptr h5f_file_; - std::shared_ptr> - orbitals_precond_; + std::shared_ptr> orbitals_precond_; double total_energy_; std::shared_ptr constraints_; diff --git a/src/OrbitalsPreconditioning.h b/src/OrbitalsPreconditioning.h index 0f85e11d..0501388c 100644 --- a/src/OrbitalsPreconditioning.h +++ b/src/OrbitalsPreconditioning.h @@ -10,68 +10,22 @@ #ifndef MGMOL_OrbitalsPreconditioning_H #define MGMOL_OrbitalsPreconditioning_H -#include "GridFuncVector.h" -#include "Lap.h" -#include "Map2Masks.h" -#include "Preconditioning.h" +#include "LocalizationRegions.h" +#include "MasksSet.h" -#include - -class Masks4Orbitals; -class MasksSet; -class ProjectedMatricesInterface; -class Potentials; -class LocalizationRegions; - -template +template class OrbitalsPreconditioning { -private: -#ifdef HAVE_MAGMA - using memory_space_type = MemorySpace::Device; -#else - using memory_space_type = MemorySpace::Host; -#endif - - std::shared_ptr> precond_; - - // work arrays with preconditioner precision - std::shared_ptr> - gfv_work1_; - std::shared_ptr> - gfv_work2_; - - // tmp work array for case ORBDTYPE!=PDataType - std::shared_ptr> gfv_work3_; - - short lap_type_; - - // coefficient for preconditioning - double gamma_; - - bool is_set_; - - // timers - static Timer precond_tm_; - - std::shared_ptr map2masks_; - public: - OrbitalsPreconditioning() { is_set_ = false; }; + OrbitalsPreconditioning(){}; - ~OrbitalsPreconditioning(); + virtual ~OrbitalsPreconditioning(){}; - void setup(OrbitalsType& orbitals, const short mg_levels, - const short lap_type, MasksSet*, - const std::shared_ptr&); - void precond_mg(OrbitalsType& orbitals); - void setGamma(const pb::Lap& lapOper, const Potentials& pot, - const short mg_levels, ProjectedMatricesInterface* proj_matrices); - static void printTimers(std::ostream& os); -}; + virtual void setup(OrbitalsType& orbitals, MasksSet*, + const std::shared_ptr&) + = 0; -template -Timer OrbitalsPreconditioning::precond_tm_( - "OrbitalsPreconditioning::precond"); + virtual void precond(OrbitalsType& orbitals) = 0; +}; #endif diff --git a/src/Preconditioning.cc b/src/Preconditioning.cc index e92b1e24..495d9703 100644 --- a/src/Preconditioning.cc +++ b/src/Preconditioning.cc @@ -216,3 +216,4 @@ void Preconditioning::mg(pb::GridFuncVector& gfv_v, } template class Preconditioning; +template class Preconditioning; diff --git a/src/global.h b/src/global.h index 8af7da35..a9176b60 100644 --- a/src/global.h +++ b/src/global.h @@ -28,8 +28,6 @@ typedef double RHODTYPE; typedef double MATDTYPE; -typedef float MGPRECONDTYPE; - typedef double POTDTYPE; typedef double KBPROJDTYPE; diff --git a/src/md.cc b/src/md.cc index b34ca8c8..027991c6 100644 --- a/src/md.cc +++ b/src/md.cc @@ -26,7 +26,6 @@ #include "Mesh.h" #include "OrbitalsExtrapolation.h" #include "OrbitalsExtrapolationFactory.h" -#include "OrbitalsPreconditioning.h" #include "Potentials.h" #include "ProjectedMatricesMehrstellen.h" #include "ProjectedMatricesSparse.h" diff --git a/src/quench.cc b/src/quench.cc index 87b3239e..c4b45d2a 100644 --- a/src/quench.cc +++ b/src/quench.cc @@ -28,11 +28,11 @@ #include "Ions.h" #include "KBPsiMatrixSparse.h" #include "LocalizationRegions.h" +#include "MGOrbitalsPreconditioning.h" #include "MGmol.h" #include "MPIdata.h" #include "MasksSet.h" #include "Mesh.h" -#include "OrbitalsPreconditioning.h" #include "OrbitalsTransform.h" #include "PolakRibiereSolver.h" #include "Potentials.h" @@ -572,10 +572,28 @@ int MGmol::quench(OrbitalsType& orbitals, Ions& ions, applyAOMMprojection(orbitals); } - orbitals_precond_.reset( - new OrbitalsPreconditioning()); - orbitals_precond_->setup( - orbitals, ct.getMGlevels(), ct.lap_type, currentMasks_.get(), lrs_); + const short precision = ct.precond_precision_; + if (precision == 32) + { + orbitals_precond_.reset( + new MGOrbitalsPreconditioning( + ct.getMGlevels(), ct.lap_type)); + } + else if (precision == 64) + { + orbitals_precond_.reset( + new MGOrbitalsPreconditioning( + ct.getMGlevels(), ct.lap_type)); + } + else + { + std::cerr << "Unknown precision option for orbitals preconditioner!!!" + << std::endl; + MGmol_MPI& mmpi = *(MGmol_MPI::instance()); + mmpi.abort(); + } + + orbitals_precond_->setup(orbitals, currentMasks_.get(), lrs_); // solve electronic structure problem // (inner iterations) diff --git a/src/read_config.cc b/src/read_config.cc index 4d520b74..de11dff9 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -120,7 +120,9 @@ int read_config(int argc, char** argv, po::variables_map& vm, "Quench.preconditioner_num_levels", po::value()->default_value(2), "Number of levels for MG preconditioner")( - "Quench.spread_penalty_damping", + "Quench.preconditioner_precision", + po::value()->default_value(32), + "Precision for MG preconditioner")("Quench.spread_penalty_damping", po::value()->default_value(0.), "Spread penalty damping factor")("Quench.spread_penalty_target", po::value()->default_value(2.), diff --git a/tests/SiH4/mgmol.cfg b/tests/SiH4/mgmol.cfg index c6a72cdb..aea52881 100644 --- a/tests/SiH4/mgmol.cfg +++ b/tests/SiH4/mgmol.cfg @@ -20,6 +20,7 @@ type=QUENCH max_steps=45 atol=1.e-9 num_lin_iterations=2 +preconditioner_precision=64 [Orbitals] initial_type=Gaussian initial_width=2. From 09a613c91a79eebeb49237e1bff395795340a38d Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Sun, 30 Nov 2025 12:05:47 -0500 Subject: [PATCH 89/99] Template manage_memory on datatype (#389) --- src/MGmol.cc | 2 +- src/manage_memory.cc | 24 ++++++++++++------------ src/manage_memory.h | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/MGmol.cc b/src/MGmol.cc index 4275b290..8a7eaf76 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -283,7 +283,7 @@ int MGmol::initial() ct.numst, ct.bcWF, proj_matrices_.get(), lrs_, currentMasks_.get(), corrMasks_.get(), local_cluster_.get(), true); - increaseMemorySlotsForOrbitals(); + increaseMemorySlotsForOrbitals(); Potentials& pot = hamiltonian_->potential(); pb::Lap* lapOper = hamiltonian_->lapOper(); diff --git a/src/manage_memory.cc b/src/manage_memory.cc index 94e0b278..f118fd6d 100644 --- a/src/manage_memory.cc +++ b/src/manage_memory.cc @@ -12,7 +12,7 @@ // Increase memory slots in BlockVector as needed based on runtime // options -template +template void increaseMemorySlotsForOrbitals() { Control& ct = *(Control::instance()); @@ -22,18 +22,18 @@ void increaseMemorySlotsForOrbitals() case OuterSolverType::ABPG: { // r_k-1, phi_k-1 - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } case OuterSolverType::PolakRibiere: { // r_k-1, z_k, z_k-1, p_k - BlockVector::incMaxAllocInstances(4); + BlockVector::incMaxAllocInstances(4); break; } case OuterSolverType::Davidson: { - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } default: @@ -44,17 +44,17 @@ void increaseMemorySlotsForOrbitals() { case WFExtrapolationType::Reversible: { - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } case WFExtrapolationType::Order2: { - BlockVector::incMaxAllocInstances(1); + BlockVector::incMaxAllocInstances(1); break; } case WFExtrapolationType::Order3: { - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); break; } default: @@ -62,21 +62,21 @@ void increaseMemorySlotsForOrbitals() } for (short i = 1; i < ct.wf_m; i++) - BlockVector::incMaxAllocInstances(2); + BlockVector::incMaxAllocInstances(2); if (ct.use_kernel_functions) - BlockVector::incMaxAllocInstances(1); + BlockVector::incMaxAllocInstances(1); switch (ct.AtomsDynamic()) { case AtomsDynamicType::LBFGS: - BlockVector::incMaxAllocInstances(1); + BlockVector::incMaxAllocInstances(1); break; default: break; } } -template void increaseMemorySlotsForOrbitals(); +template void increaseMemorySlotsForOrbitals(); #ifdef HAVE_MAGMA -template void increaseMemorySlotsForOrbitals(); +template void increaseMemorySlotsForOrbitals(); #endif diff --git a/src/manage_memory.h b/src/manage_memory.h index 4e4c584c..864fed55 100644 --- a/src/manage_memory.h +++ b/src/manage_memory.h @@ -6,5 +6,5 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE -template +template void increaseMemorySlotsForOrbitals(); From 9c29c4c5bab3d50ba83b24a263214cc1e92e6148 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Sun, 30 Nov 2025 12:06:19 -0500 Subject: [PATCH 90/99] New DotProduct family of classes (#390) --- src/CMakeLists.txt | 4 ++ src/DotProductDiagonal.cc | 52 ++++++++++++++ src/DotProductDiagonal.h | 23 +++++++ src/DotProductManager.h | 23 +++++++ src/DotProductManagerFactory.h | 46 +++++++++++++ src/DotProductSimple.cc | 35 ++++++++++ src/DotProductSimple.h | 21 ++++++ src/DotProductWithDM.cc | 35 ++++++++++ src/DotProductWithDM.h | 21 ++++++ src/DotProductWithInvS.cc | 35 ++++++++++ src/DotProductWithInvS.h | 21 ++++++ src/ExtendedGridOrbitals.cc | 113 +++++++----------------------- src/ExtendedGridOrbitals.h | 13 +--- src/LocGridOrbitals.cc | 122 +++++++-------------------------- src/LocGridOrbitals.h | 11 +-- src/setup.cc | 2 +- 16 files changed, 367 insertions(+), 210 deletions(-) create mode 100644 src/DotProductDiagonal.cc create mode 100644 src/DotProductDiagonal.h create mode 100644 src/DotProductManager.h create mode 100644 src/DotProductManagerFactory.h create mode 100644 src/DotProductSimple.cc create mode 100644 src/DotProductSimple.h create mode 100644 src/DotProductWithDM.cc create mode 100644 src/DotProductWithDM.h create mode 100644 src/DotProductWithInvS.cc create mode 100644 src/DotProductWithInvS.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 21f02177..9a1a02bd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,10 @@ set(SOURCES mgmol_run.cc read_config.cc jade.cc + DotProductSimple.cc + DotProductDiagonal.cc + DotProductWithInvS.cc + DotProductWithDM.cc LocalMatrices2ReplicatedMatrix.cc ReplicatedMatrix2SquareLocalMatrices.cc DielectricControl.cc diff --git a/src/DotProductDiagonal.cc b/src/DotProductDiagonal.cc new file mode 100644 index 00000000..ad8a7cda --- /dev/null +++ b/src/DotProductDiagonal.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductDiagonal.h" + +#include "Control.h" +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template <> +double DotProductDiagonal::dotProduct( + ExtendedGridOrbitals& phi0, const ExtendedGridOrbitals& phi1) +{ + const int chromatic_number = phi0.chromatic_number(); + std::vector ss(chromatic_number); + phi0.computeDiagonalElementsDotProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->getTraceDiagProductWithInvS(ss); +} + +template <> +double DotProductDiagonal::dotProduct( + LocGridOrbitals& phi0, const LocGridOrbitals& phi1) +{ + const int numst = phi0.numst(); + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + assert(proj_matrices != nullptr); + + std::vector ss; + Control& ct = *(Control::instance()); + if (ct.short_sighted) + { + phi0.computeDiagonalElementsDotProductLocal(phi1, ss); + } + else + { + ss.resize(numst); + phi0.computeDiagonalElementsDotProduct(phi1, ss); + } + + return proj_matrices->getTraceDiagProductWithInvS(ss); +} diff --git a/src/DotProductDiagonal.h b/src/DotProductDiagonal.h new file mode 100644 index 00000000..a5d5dec2 --- /dev/null +++ b/src/DotProductDiagonal.h @@ -0,0 +1,23 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerDiagonal_H +#define MGMOL_DotProductManagerDiagonal_H + +#include "DotProductManager.h" + +template +class DotProductDiagonal : public DotProductManager +{ +public: + DotProductDiagonal(){}; + + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/DotProductManager.h b/src/DotProductManager.h new file mode 100644 index 00000000..6491d696 --- /dev/null +++ b/src/DotProductManager.h @@ -0,0 +1,23 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManager_H +#define MGMOL_DotProductManager_H + +template +class DotProductManager +{ +public: + DotProductManager(){}; + + virtual ~DotProductManager(){}; + + virtual double dotProduct(T& a, const T& b) = 0; +}; + +#endif diff --git a/src/DotProductManagerFactory.h b/src/DotProductManagerFactory.h new file mode 100644 index 00000000..ebd783a4 --- /dev/null +++ b/src/DotProductManagerFactory.h @@ -0,0 +1,46 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE + +#ifndef MGMOL_DotProductManagerFACTORY_H +#define MGMOL_DotProductManagerFACTORY_H + +#include "DotProductDiagonal.h" +#include "DotProductSimple.h" +#include "DotProductWithDM.h" +#include "DotProductWithInvS.h" + +template +class DotProductManagerFactory +{ +public: + static DotProductManager* create(const short type) + { + DotProductManager* dot_product_manager = nullptr; + switch (type) + { + case 0: + dot_product_manager = new DotProductDiagonal(); + break; + case 1: + dot_product_manager = new DotProductWithInvS(); + break; + case 2: + dot_product_manager = new DotProductWithDM(); + break; + case 3: + dot_product_manager = new DotProductSimple(); + break; + default: + std::cerr << "DotProductManager* create() --- option invalid\n"; + } + return dot_product_manager; + } +}; + +#endif diff --git a/src/DotProductSimple.cc b/src/DotProductSimple.cc new file mode 100644 index 00000000..d0a16bdf --- /dev/null +++ b/src/DotProductSimple.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductSimple.h" + +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template +double DotProductSimple::dotProduct(T& phi0, const T& phi1) +{ + Mesh* mymesh = Mesh::instance(); + const int subdivx = mymesh->subdivx(); + const int chromatic_number = phi0.chromatic_number(); + + SquareLocalMatrices ss( + subdivx, chromatic_number); + + phi0.computeLocalProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->dotProductSimple(ss); +} + +template class DotProductSimple; +template class DotProductSimple; diff --git a/src/DotProductSimple.h b/src/DotProductSimple.h new file mode 100644 index 00000000..38aa18b5 --- /dev/null +++ b/src/DotProductSimple.h @@ -0,0 +1,21 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerSimple_H +#define MGMOL_DotProductManagerSimple_H + +#include "DotProductManager.h" + +template +class DotProductSimple : public DotProductManager +{ +public: + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/DotProductWithDM.cc b/src/DotProductWithDM.cc new file mode 100644 index 00000000..aeabe515 --- /dev/null +++ b/src/DotProductWithDM.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductWithDM.h" + +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template +double DotProductWithDM::dotProduct(T& phi0, const T& phi1) +{ + Mesh* mymesh = Mesh::instance(); + const int subdivx = mymesh->subdivx(); + const int chromatic_number = phi0.chromatic_number(); + + SquareLocalMatrices ss( + subdivx, chromatic_number); + + phi0.computeLocalProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->dotProductWithDM(ss); +} + +template class DotProductWithDM; +template class DotProductWithDM; diff --git a/src/DotProductWithDM.h b/src/DotProductWithDM.h new file mode 100644 index 00000000..96c713d6 --- /dev/null +++ b/src/DotProductWithDM.h @@ -0,0 +1,21 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerWithDM_H +#define MGMOL_DotProductManagerWithDM_H + +#include "DotProductManager.h" + +template +class DotProductWithDM : public DotProductManager +{ +public: + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/DotProductWithInvS.cc b/src/DotProductWithInvS.cc new file mode 100644 index 00000000..44991406 --- /dev/null +++ b/src/DotProductWithInvS.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "DotProductWithInvS.h" + +#include "ExtendedGridOrbitals.h" +#include "LocGridOrbitals.h" +#include "Mesh.h" +#include "ProjectedMatricesInterface.h" +#include "SquareLocalMatrices.h" + +template +double DotProductWithInvS::dotProduct(T& phi0, const T& phi1) +{ + Mesh* mymesh = Mesh::instance(); + const int subdivx = mymesh->subdivx(); + const int chromatic_number = phi0.chromatic_number(); + + SquareLocalMatrices ss( + subdivx, chromatic_number); + + phi0.computeLocalProduct(phi1, ss); + + ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); + + return proj_matrices->dotProductWithInvS(ss); +} + +template class DotProductWithInvS; +template class DotProductWithInvS; diff --git a/src/DotProductWithInvS.h b/src/DotProductWithInvS.h new file mode 100644 index 00000000..868d8f9d --- /dev/null +++ b/src/DotProductWithInvS.h @@ -0,0 +1,21 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC and +// UT-Battelle, LLC. +// Produced at the Lawrence Livermore National Laboratory and the Oak Ridge +// National Laboratory. +// LLNL-CODE-743438 +// All rights reserved. +// This file is part of MGmol. For details, see https://github.com/llnl/mgmol. +// Please also read this link https://github.com/llnl/mgmol/LICENSE +#ifndef MGMOL_DotProductManagerWithInvS_H +#define MGMOL_DotProductManagerWithInvS_H + +#include "DotProductManager.h" + +template +class DotProductWithInvS : public DotProductManager +{ +public: + double dotProduct(T& phi0, const T& phi1) override; +}; + +#endif diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index de4c8a87..d16ef798 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -6,17 +6,13 @@ // All rights reserved. // This file is part of MGmol. For details, see https://github.com/llnl/mgmol. // Please also read this link https://github.com/llnl/mgmol/LICENSE +#include "ExtendedGridOrbitals.h" #include "global.h" -#include - #include "Control.h" -#include "DistMatrix.h" -#include "ExtendedGridOrbitals.h" +#include "DotProductManagerFactory.h" #include "GridFunc.h" -#include "HDFrestart.h" -#include "Laph2.h" #include "Laph4M.h" #include "LocalMatrices2DistMatrix.h" #include "LocalizationRegions.h" @@ -31,19 +27,22 @@ #include #include +#include #include #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -int ExtendedGridOrbitals::lda_ = 0; -int ExtendedGridOrbitals::numpt_ = 0; -ExtendedGridOrbitalsPtrFunc ExtendedGridOrbitals::dotProduct_ - = &ExtendedGridOrbitals::dotProductDiagonal; +int ExtendedGridOrbitals::lda_ = 0; +int ExtendedGridOrbitals::numpt_ = 0; int ExtendedGridOrbitals::data_wghosts_index_ = -1; int ExtendedGridOrbitals::numst_ = -1; std::vector> ExtendedGridOrbitals::overlapping_gids_; +DotProductManager* + ExtendedGridOrbitals::dotProductManager_ + = nullptr; + Timer ExtendedGridOrbitals::matB_tm_("ExtendedGridOrbitals::matB"); Timer ExtendedGridOrbitals::invBmat_tm_("ExtendedGridOrbitals::invBmat"); Timer ExtendedGridOrbitals::overlap_tm_("ExtendedGridOrbitals::overlap"); @@ -137,14 +136,11 @@ void ExtendedGridOrbitals::copyDataFrom(const ExtendedGridOrbitals& src) void ExtendedGridOrbitals::setDotProduct(const short dot_type) { - if (dot_type == 0) - dotProduct_ = &ExtendedGridOrbitals::dotProductDiagonal; - else if (dot_type == 1) - dotProduct_ = &ExtendedGridOrbitals::dotProductWithInvS; - else if (dot_type == 2) - dotProduct_ = &ExtendedGridOrbitals::dotProductWithDM; - else if (dot_type == 3) - dotProduct_ = &ExtendedGridOrbitals::dotProductSimple; + DotProductManagerFactory factory; + + dotProductManager_ = factory.create(dot_type); + + assert(dotProductManager_ != nullptr); } void ExtendedGridOrbitals::setup() @@ -1102,55 +1098,10 @@ void ExtendedGridOrbitals::checkCond(const double tol, const bool flag_stop) proj_matrices_->checkCond(tol, flag_stop); } -double ExtendedGridOrbitals::dotProductWithDM( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - SquareLocalMatrices ss(1, numst_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithDM(ss); -} - -double ExtendedGridOrbitals::dotProductWithInvS( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - SquareLocalMatrices ss(1, numst_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithInvS(ss); -} - -double ExtendedGridOrbitals::dotProductDiagonal( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - std::vector ss(numst_); - computeDiagonalElementsDotProduct(orbitals, ss); - return proj_matrices_->getTraceDiagProductWithInvS(ss); -} - -double ExtendedGridOrbitals::dotProductSimple( - const ExtendedGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - SquareLocalMatrices ss(1, numst_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductSimple(ss); -} - double ExtendedGridOrbitals::dotProduct(const ExtendedGridOrbitals& orbitals) { - return (this->*dotProduct_)(orbitals); // call through pointer member + assert(dotProductManager_ != nullptr); + return dotProductManager_->dotProduct(*this, orbitals); } double ExtendedGridOrbitals::dotProduct( @@ -1162,31 +1113,13 @@ double ExtendedGridOrbitals::dotProduct( assert(1 > 0); assert(1 < 1000); - double dot = 0.; - if (dot_type == 0) - { - dot = dotProductDiagonal(orbitals); - } - else if (dot_type == 1) - { - dot = dotProductWithInvS(orbitals); - } - else if (dot_type == 2) - { - dot = dotProductWithDM(orbitals); - } - else if (dot_type == 3) - { - dot = dotProductSimple(orbitals); - } - else - { - (*MPIdata::serr) << "ExtendedGridOrbitals::dot_product() --- unknown " - "dot product type" - << std::endl; - Control& ct = *(Control::instance()); - ct.global_exit(); - } + DotProductManagerFactory factory; + DotProductManager* manager = factory.create(dot_type); + assert(manager != nullptr); + + double dot = manager->dotProduct(*this, orbitals); + + delete manager; dot_product_tm_.stop(); diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index baa114d3..9bb23b9c 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -12,6 +12,7 @@ #include "BlockVector.h" #include "DistMatrix.h" +#include "DotProductManager.h" #include "GridFunc.h" #include "HDFrestart.h" #include "Lap.h" @@ -37,9 +38,6 @@ class ExtendedGridOrbitals; class MasksSet; class ClusterOrbitals; -typedef double (ExtendedGridOrbitals::*ExtendedGridOrbitalsPtrFunc)( - const ExtendedGridOrbitals&); - class ExtendedGridOrbitals : public Orbitals { private: @@ -61,9 +59,7 @@ class ExtendedGridOrbitals : public Orbitals static int lda_; // leading dimension for storage static int numpt_; - // static double (ExtendedGridOrbitals::*dotProduct_)(const - // ExtendedGridOrbitals&); - static ExtendedGridOrbitalsPtrFunc dotProduct_; + static DotProductManager* dotProductManager_; static int data_wghosts_index_; @@ -101,11 +97,6 @@ class ExtendedGridOrbitals : public Orbitals void computeMatB(const ExtendedGridOrbitals&, const pb::Lap&); - double dotProductDiagonal(const ExtendedGridOrbitals& orbitals); - double dotProductWithDM(const ExtendedGridOrbitals& orbitals); - double dotProductWithInvS(const ExtendedGridOrbitals& orbitals); - double dotProductSimple(const ExtendedGridOrbitals& orbitals); - void computeLocalProduct(const ORBDTYPE* const, const int, LocalMatrices&, const bool transpose = false); diff --git a/src/LocGridOrbitals.cc b/src/LocGridOrbitals.cc index 8db9c1e0..1822c3ff 100644 --- a/src/LocGridOrbitals.cc +++ b/src/LocGridOrbitals.cc @@ -14,11 +14,11 @@ #include "ColoredRegions.h" #include "Control.h" #include "DistMatrix.h" +#include "DotProductManagerFactory.h" #include "FunctionsPacking.h" #include "GridFunc.h" #include "GridMask.h" #include "HDFrestart.h" -#include "Laph2.h" #include "Laph4M.h" #include "LocGridOrbitals.h" #include "LocalMatrices2DistMatrix.h" @@ -27,8 +27,6 @@ #include "Masks4Orbitals.h" #include "MasksSet.h" #include "Mesh.h" -#include "Potentials.h" -#include "Preconditioning.h" #include "ProjectedMatrices.h" #include "ReplicatedWorkSpace.h" #include "SquareLocalMatrices.h" @@ -46,11 +44,14 @@ #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -short LocGridOrbitals::subdivx_ = 0; -int LocGridOrbitals::lda_ = 0; -int LocGridOrbitals::numpt_ = 0; -int LocGridOrbitals::loc_numpt_ = 0; -PtrFunc LocGridOrbitals::dotProduct_ = &LocGridOrbitals::dotProductDiagonal; +short LocGridOrbitals::subdivx_ = 0; +int LocGridOrbitals::lda_ = 0; +int LocGridOrbitals::numpt_ = 0; +int LocGridOrbitals::loc_numpt_ = 0; + +DotProductManager* LocGridOrbitals::dotProductManager_ + = nullptr; + int LocGridOrbitals::data_wghosts_index_ = -1; Timer LocGridOrbitals::get_dm_tm_("LocGridOrbitals::get_dm"); @@ -202,14 +203,11 @@ void LocGridOrbitals::copyDataFrom(const LocGridOrbitals& src) void LocGridOrbitals::setDotProduct(const short dot_type) { - if (dot_type == 0) - dotProduct_ = &LocGridOrbitals::dotProductDiagonal; - else if (dot_type == 1) - dotProduct_ = &LocGridOrbitals::dotProductWithInvS; - else if (dot_type == 2) - dotProduct_ = &LocGridOrbitals::dotProductWithDM; - else if (dot_type == 3) - dotProduct_ = &LocGridOrbitals::dotProductSimple; + DotProductManagerFactory factory; + + dotProductManager_ = factory.create(dot_type); + + assert(dotProductManager_ != nullptr); } void LocGridOrbitals::setGids2Storage() @@ -1723,66 +1721,10 @@ void LocGridOrbitals::checkCond(const double tol, const bool flag_stop) proj_matrices_->checkCond(tol, flag_stop); } -double LocGridOrbitals::dotProductWithDM(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - assert(chromatic_number_ == orbitals.chromatic_number_); - - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithDM(ss); -} - -double LocGridOrbitals::dotProductWithInvS(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - assert(chromatic_number_ == orbitals.chromatic_number_); - - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductWithInvS(ss); -} - -double LocGridOrbitals::dotProductDiagonal(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - - std::vector ss; - Control& ct = *(Control::instance()); - if (ct.short_sighted) - { - computeDiagonalElementsDotProductLocal(orbitals, ss); - } - else - { - ss.resize(numst_); - computeDiagonalElementsDotProduct(orbitals, ss); - } - return proj_matrices_->getTraceDiagProductWithInvS(ss); -} - -double LocGridOrbitals::dotProductSimple(const LocGridOrbitals& orbitals) -{ - assert(proj_matrices_ != nullptr); - assert(chromatic_number_ == orbitals.chromatic_number_); - - SquareLocalMatrices ss( - subdivx_, chromatic_number_); - - computeLocalProduct(orbitals, ss); - - return proj_matrices_->dotProductSimple(ss); -} - double LocGridOrbitals::dotProduct(const LocGridOrbitals& orbitals) { - return (this->*dotProduct_)(orbitals); // call through pointer member + assert(dotProductManager_ != nullptr); + return dotProductManager_->dotProduct(*this, orbitals); } double LocGridOrbitals::dotProduct( @@ -1794,31 +1736,13 @@ double LocGridOrbitals::dotProduct( assert(subdivx_ > 0); assert(subdivx_ < 1000); - double dot = 0.; - if (dot_type == 0) - { - dot = dotProductDiagonal(orbitals); - } - else if (dot_type == 1) - { - dot = dotProductWithInvS(orbitals); - } - else if (dot_type == 2) - { - dot = dotProductWithDM(orbitals); - } - else if (dot_type == 3) - { - dot = dotProductSimple(orbitals); - } - else - { - MGmol_MPI& mmpi = *(MGmol_MPI::instance()); - (*MPIdata::serr) - << "LocGridOrbitals::dot_product() --- unknown dot product type" - << std::endl; - mmpi.abort(); - } + DotProductManagerFactory factory; + DotProductManager* manager = factory.create(dot_type); + assert(manager != nullptr); + + double dot = manager->dotProduct(*this, orbitals); + + delete manager; dot_product_tm_.stop(); diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index 68d865f3..2e47e7d9 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -13,6 +13,7 @@ #include "BlockVector.h" #include "ClusterOrbitals.h" #include "DataDistribution.h" +#include "DotProductManager.h" #include "FunctionsPacking.h" #include "GridFunc.h" #include "HDFrestart.h" @@ -40,8 +41,6 @@ class MasksSet; class LocGridOrbitals; class Masks4Orbitals; -typedef double (LocGridOrbitals::*PtrFunc)(const LocGridOrbitals&); - class LocGridOrbitals : public Orbitals { private: @@ -66,8 +65,7 @@ class LocGridOrbitals : public Orbitals static int numpt_; static int loc_numpt_; - // static double (LocGridOrbitals::*dotProduct_)(const LocGridOrbitals&); - static PtrFunc dotProduct_; + static DotProductManager* dotProductManager_; static int data_wghosts_index_; @@ -126,11 +124,6 @@ class LocGridOrbitals : public Orbitals void matrixToLocalMatrix( const short, const DISTMATDTYPE* const, DISTMATDTYPE* const) const; - double dotProductDiagonal(const LocGridOrbitals& orbitals); - double dotProductWithDM(const LocGridOrbitals& orbitals); - double dotProductWithInvS(const LocGridOrbitals& orbitals); - double dotProductSimple(const LocGridOrbitals& orbitals); - void computeLocalProduct(const ORBDTYPE* const, const int, LocalMatrices&, const bool transpose = false); diff --git a/src/setup.cc b/src/setup.cc index 8ad59f9e..9b4c3d22 100644 --- a/src/setup.cc +++ b/src/setup.cc @@ -103,7 +103,7 @@ int MGmol::setupFromInput(const std::string filename) if (ct.rmatrices) ReplicatedMatrix::setMPIcomm(mmpi.commSpin()); - LocGridOrbitals::setDotProduct(ct.dot_product_type); + OrbitalsType::setDotProduct(ct.dot_product_type); mgmol_check(); From 1d7943785e4ad34ddb1f05f4f7bbbe3be4558c29 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 1 Dec 2025 15:45:38 -0500 Subject: [PATCH 91/99] Template GridOrbitals classes (#391) --- drivers/check_input.cc | 10 +- drivers/example1.cc | 10 +- src/ABPG.cc | 4 +- src/AOMMprojector.cc | 16 +- src/AOMMprojector.h | 12 +- src/AndersonMix.cc | 4 +- src/DFTsolver.cc | 4 +- src/DMStrategyFactory.cc | 83 +-- src/DavidsonSolver.cc | 4 +- src/DotProductDiagonal.cc | 9 +- src/DotProductSimple.cc | 4 +- src/DotProductWithDM.cc | 4 +- src/DotProductWithInvS.cc | 4 +- src/EigenDMStrategy.cc | 4 +- src/Electrostatic.cc | 16 +- src/Energy.cc | 4 +- src/EnergySpreadPenalty.cc | 4 +- src/ExtendedGridOrbitals.cc | 606 ++++++++++-------- src/ExtendedGridOrbitals.h | 62 +- src/FIRE.cc | 4 +- src/Forces.cc | 4 +- src/FullyOccupiedNonOrthoDMStrategy.cc | 4 +- src/GrassmanCG.cc | 6 +- src/GrassmanCGFactory.cc | 29 +- src/GrassmanCGSparse.cc | 2 +- src/GrassmanLineMinimization.cc | 8 +- src/Hamiltonian.cc | 86 +-- src/HamiltonianMVPSolver.cc | 9 +- src/HamiltonianMVP_DMStrategy.cc | 9 +- src/IonicAlgorithm.cc | 4 +- src/KBPsiMatrixSparse.cc | 14 +- src/LBFGS.cc | 8 +- src/LDAonGrid.cc | 4 +- src/LDAonGridSpin.cc | 4 +- src/LocGridOrbitals.cc | 534 ++++++++------- src/LocGridOrbitals.h | 47 +- src/LocalizationRegions.cc | 6 +- src/MGOrbitalsPreconditioning.cc | 9 +- src/MGmol.cc | 34 +- src/MVPSolver.cc | 8 +- src/MVP_DMStrategy.cc | 9 +- src/NonOrthoDMStrategy.cc | 4 +- src/OrbitalsExtrapolation.cc | 4 +- src/OrbitalsExtrapolationOrder2.cc | 8 +- src/OrbitalsExtrapolationOrder3.cc | 18 +- src/OrthoAndersonMix.cc | 4 +- src/PBEonGrid.cc | 4 +- src/PBEonGridSpin.cc | 4 +- src/PolakRibiereSolver.cc | 12 +- src/Rho.cc | 43 +- src/SinCosOps.cc | 4 +- src/SpreadPenalty.cc | 4 +- src/SpreadPenaltyVolume.cc | 4 +- src/SpreadsAndCenters.cc | 4 +- src/SubspaceProjector.cc | 4 +- src/computeHij.cc | 24 +- src/lbfgsrlx.cc | 8 +- src/main.cc | 10 +- src/md.cc | 4 +- src/mlwf.cc | 4 +- src/quench.cc | 25 +- src/readInput.cc | 4 +- src/restart.cc | 4 +- src/runfire.cc | 8 +- src/setup.cc | 4 +- .../testDMandEnergyAndForces.cc | 16 +- tests/EnergyAndForces/testEnergyAndForces.cc | 10 +- .../testRestartEnergyAndForces.cc | 11 +- tests/RhoVhRestart/testRhoVhRestart.cc | 13 +- .../testWFEnergyAndForces.cc | 16 +- 70 files changed, 1128 insertions(+), 860 deletions(-) diff --git a/drivers/check_input.cc b/drivers/check_input.cc index 14dbaa09..56a9d5d8 100644 --- a/drivers/check_input.cc +++ b/drivers/check_input.cc @@ -70,11 +70,13 @@ int main(int argc, char** argv) { MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); *MPIdata::sout << " Input parameters OK\n"; diff --git a/drivers/example1.cc b/drivers/example1.cc index 561f5e08..60acb123 100644 --- a/drivers/example1.cc +++ b/drivers/example1.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { diff --git a/src/ABPG.cc b/src/ABPG.cc index de501c44..a8a7708e 100644 --- a/src/ABPG.cc +++ b/src/ABPG.cc @@ -148,5 +148,5 @@ void ABPG::printTimers(std::ostream& os) update_states_tm_.print(os); } -template class ABPG; -template class ABPG; +template class ABPG>; +template class ABPG>; diff --git a/src/AOMMprojector.cc b/src/AOMMprojector.cc index 3e670e45..a35acf13 100644 --- a/src/AOMMprojector.cc +++ b/src/AOMMprojector.cc @@ -14,8 +14,8 @@ #include "ProjectedMatricesSparse.h" #include "SubspaceProjector.h" -AOMMprojector::AOMMprojector( - LocGridOrbitals& phi, const std::shared_ptr& lrs) +AOMMprojector::AOMMprojector(LocGridOrbitals& phi, + const std::shared_ptr& lrs) { Control& ct = *(Control::instance()); Mesh* mymesh = Mesh::instance(); @@ -48,7 +48,7 @@ AOMMprojector::AOMMprojector( ct.numst, with_spin, ct.occ_width); // kernel functions use their own projected matrices and masks - kernel_phi_ = new LocGridOrbitals( + kernel_phi_ = new LocGridOrbitals( "AOMM", phi, kernel_proj_matrices_, kernelMasks_, nullptr); kernel_phi_->initGauss(0.5 * radius, lrs); @@ -59,7 +59,8 @@ AOMMprojector::AOMMprojector( kernel_phi_->computeGramAndInvS(ct.verbose); - kernelprojector_ = new SubspaceProjector(*kernel_phi_); + kernelprojector_ + = new SubspaceProjector>(*kernel_phi_); matrix_mask_ = new SquareLocalMatrices( subdivx, kernel_phi_->chromatic_number()); @@ -77,7 +78,7 @@ AOMMprojector::AOMMprojector( // matrix_mask_->setMaskThreshold(threshold, 10000.); } -void AOMMprojector::resetProjectors(LocGridOrbitals& phi) +void AOMMprojector::resetProjectors(LocGridOrbitals& phi) { if (onpe0) std::cout << "AOMM: reset projectors..." << std::endl; @@ -89,10 +90,11 @@ void AOMMprojector::resetProjectors(LocGridOrbitals& phi) kernel_phi_->computeGramAndInvS(0); delete kernelprojector_; - kernelprojector_ = new SubspaceProjector(*kernel_phi_); + kernelprojector_ + = new SubspaceProjector>(*kernel_phi_); } -void AOMMprojector::projectOut(LocGridOrbitals& phi) +void AOMMprojector::projectOut(LocGridOrbitals& phi) { assert(kernelprojector_ != nullptr); assert(matrix_mask_ != nullptr); diff --git a/src/AOMMprojector.h b/src/AOMMprojector.h index 54888cec..c9089d13 100644 --- a/src/AOMMprojector.h +++ b/src/AOMMprojector.h @@ -22,9 +22,9 @@ class MasksSet; class AOMMprojector { private: - LocGridOrbitals* kernel_phi_; + LocGridOrbitals* kernel_phi_; - SubspaceProjector* kernelprojector_; + SubspaceProjector>* kernelprojector_; MasksSet* kernelMasks_; @@ -35,13 +35,13 @@ class AOMMprojector short counter_; public: - AOMMprojector( - LocGridOrbitals& phi, const std::shared_ptr& lrs); + AOMMprojector(LocGridOrbitals& phi, + const std::shared_ptr& lrs); ~AOMMprojector(); - void projectOut(LocGridOrbitals& phi); + void projectOut(LocGridOrbitals& phi); - void resetProjectors(LocGridOrbitals& phi); + void resetProjectors(LocGridOrbitals& phi); }; #endif diff --git a/src/AndersonMix.cc b/src/AndersonMix.cc index 7edae7d3..2892c9f9 100644 --- a/src/AndersonMix.cc +++ b/src/AndersonMix.cc @@ -321,6 +321,6 @@ void AndersonMix::update(T& f, T& work, ostream& os, const bool verbose) #ifdef TESTING template class AndersonMix; #else -template class AndersonMix; -template class AndersonMix; +template class AndersonMix>; +template class AndersonMix>; #endif diff --git a/src/DFTsolver.cc b/src/DFTsolver.cc index 206fba91..55e6df26 100644 --- a/src/DFTsolver.cc +++ b/src/DFTsolver.cc @@ -425,5 +425,5 @@ void DFTsolver::printTimers(std::ostream& os) solve_tm_.print(os); } -template class DFTsolver; -template class DFTsolver; +template class DFTsolver>; +template class DFTsolver>; diff --git a/src/DMStrategyFactory.cc b/src/DMStrategyFactory.cc index 61aec57c..115c8cbe 100644 --- a/src/DMStrategyFactory.cc +++ b/src/DMStrategyFactory.cc @@ -2,47 +2,52 @@ #include "ReplicatedMatrix.h" template <> -DMStrategy* DMStrategyFactory>* +DMStrategyFactory, dist_matrix::DistMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& os, Ions& ions, Rho* rho, - Energy* energy, Electrostatic* electrostat, - Hamiltonian* hamiltonian, - MGmol* mgmol_strategy, - ProjectedMatricesInterface* /*proj_matrices*/, LocGridOrbitals* orbitals, - const bool short_sighted) + std::ostream& os, Ions& ions, Rho>* rho, + Energy>* energy, Electrostatic* electrostat, + Hamiltonian>* hamiltonian, + MGmol>* mgmol_strategy, + ProjectedMatricesInterface* /*proj_matrices*/, + LocGridOrbitals* orbitals, const bool short_sighted) { if (short_sighted) { - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy, - ProjectedMatricesSparse, LocGridOrbitals>(comm, os, ions, rho, - energy, electrostat, hamiltonian, mgmol_strategy, orbitals); + ProjectedMatricesSparse, LocGridOrbitals>(comm, os, + ions, rho, energy, electrostat, hamiltonian, mgmol_strategy, + orbitals); return dm_strategy; } else { - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy< dist_matrix::DistMatrix, ProjectedMatrices>, - LocGridOrbitals>(comm, os, ions, rho, energy, electrostat, - hamiltonian, mgmol_strategy, orbitals); + LocGridOrbitals>(comm, os, ions, rho, energy, + electrostat, hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } } template <> -DMStrategy* DMStrategyFactory>* +DMStrategyFactory, ReplicatedMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& /*os*/, Ions& /*ions*/, Rho* /*rho*/, - Energy* /*energy*/, Electrostatic* /*electrostat*/, - Hamiltonian* /*hamiltonian*/, - MGmol* /*mgmol_strategy*/, + std::ostream& /*os*/, Ions& /*ions*/, + Rho>* /*rho*/, + Energy>* /*energy*/, + Electrostatic* /*electrostat*/, + Hamiltonian>* /*hamiltonian*/, + MGmol>* /*mgmol_strategy*/, ProjectedMatricesInterface* /*proj_matrices*/, - LocGridOrbitals* /*orbitals*/, const bool /*short_sighted*/) + LocGridOrbitals* /*orbitals*/, const bool /*short_sighted*/) { std::cerr << "DMStrategy not implemented" << std::endl; @@ -52,44 +57,46 @@ DMStrategy* DMStrategyFactory -DMStrategy* DMStrategyFactory>* +DMStrategyFactory, dist_matrix::DistMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& os, Ions& ions, Rho* rho, - Energy* energy, Electrostatic* electrostat, - Hamiltonian* hamiltonian, - MGmol* mgmol_strategy, + std::ostream& os, Ions& ions, Rho>* rho, + Energy>* energy, Electrostatic* electrostat, + Hamiltonian>* hamiltonian, + MGmol>* mgmol_strategy, ProjectedMatricesInterface* /*proj_matrices*/, - ExtendedGridOrbitals* orbitals, const bool short_sighted) + ExtendedGridOrbitals* orbitals, const bool short_sighted) { (void)short_sighted; - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy, ProjectedMatrices>, - ExtendedGridOrbitals>(comm, os, ions, rho, energy, electrostat, - hamiltonian, mgmol_strategy, orbitals); + ExtendedGridOrbitals>(comm, os, ions, rho, energy, + electrostat, hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } template <> -DMStrategy* DMStrategyFactory>* +DMStrategyFactory, ReplicatedMatrix>::createHamiltonianMVP_DMStrategy(MPI_Comm comm, - std::ostream& os, Ions& ions, Rho* rho, - Energy* energy, Electrostatic* electrostat, - Hamiltonian* hamiltonian, - MGmol* mgmol_strategy, + std::ostream& os, Ions& ions, Rho>* rho, + Energy>* energy, Electrostatic* electrostat, + Hamiltonian>* hamiltonian, + MGmol>* mgmol_strategy, ProjectedMatricesInterface* /*proj_matrices*/, - ExtendedGridOrbitals* orbitals, const bool short_sighted) + ExtendedGridOrbitals* orbitals, const bool short_sighted) { (void)short_sighted; - DMStrategy* dm_strategy + DMStrategy>* dm_strategy = new HamiltonianMVP_DMStrategy, ExtendedGridOrbitals>(comm, os, - ions, rho, energy, electrostat, hamiltonian, mgmol_strategy, - orbitals); + ProjectedMatrices, + ExtendedGridOrbitals>(comm, os, ions, rho, energy, + electrostat, hamiltonian, mgmol_strategy, orbitals); return dm_strategy; } diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index 59c303e6..85f6fc9f 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -857,6 +857,6 @@ void DavidsonSolver::printTimers(std::ostream& os) target_tm_.print(os); } -template class DavidsonSolver, dist_matrix::DistMatrix>; -template class DavidsonSolver; +template class DavidsonSolver, ReplicatedMatrix>; diff --git a/src/DotProductDiagonal.cc b/src/DotProductDiagonal.cc index ad8a7cda..764e7446 100644 --- a/src/DotProductDiagonal.cc +++ b/src/DotProductDiagonal.cc @@ -16,8 +16,9 @@ #include "SquareLocalMatrices.h" template <> -double DotProductDiagonal::dotProduct( - ExtendedGridOrbitals& phi0, const ExtendedGridOrbitals& phi1) +double DotProductDiagonal>::dotProduct( + ExtendedGridOrbitals& phi0, + const ExtendedGridOrbitals& phi1) { const int chromatic_number = phi0.chromatic_number(); std::vector ss(chromatic_number); @@ -29,8 +30,8 @@ double DotProductDiagonal::dotProduct( } template <> -double DotProductDiagonal::dotProduct( - LocGridOrbitals& phi0, const LocGridOrbitals& phi1) +double DotProductDiagonal>::dotProduct( + LocGridOrbitals& phi0, const LocGridOrbitals& phi1) { const int numst = phi0.numst(); ProjectedMatricesInterface* proj_matrices = phi0.getProjMatrices(); diff --git a/src/DotProductSimple.cc b/src/DotProductSimple.cc index d0a16bdf..44e62b5a 100644 --- a/src/DotProductSimple.cc +++ b/src/DotProductSimple.cc @@ -31,5 +31,5 @@ double DotProductSimple::dotProduct(T& phi0, const T& phi1) return proj_matrices->dotProductSimple(ss); } -template class DotProductSimple; -template class DotProductSimple; +template class DotProductSimple>; +template class DotProductSimple>; diff --git a/src/DotProductWithDM.cc b/src/DotProductWithDM.cc index aeabe515..652a6683 100644 --- a/src/DotProductWithDM.cc +++ b/src/DotProductWithDM.cc @@ -31,5 +31,5 @@ double DotProductWithDM::dotProduct(T& phi0, const T& phi1) return proj_matrices->dotProductWithDM(ss); } -template class DotProductWithDM; -template class DotProductWithDM; +template class DotProductWithDM>; +template class DotProductWithDM>; diff --git a/src/DotProductWithInvS.cc b/src/DotProductWithInvS.cc index 44991406..08b079ce 100644 --- a/src/DotProductWithInvS.cc +++ b/src/DotProductWithInvS.cc @@ -31,5 +31,5 @@ double DotProductWithInvS::dotProduct(T& phi0, const T& phi1) return proj_matrices->dotProductWithInvS(ss); } -template class DotProductWithInvS; -template class DotProductWithInvS; +template class DotProductWithInvS>; +template class DotProductWithInvS>; diff --git a/src/EigenDMStrategy.cc b/src/EigenDMStrategy.cc index 358ed5f9..cf803e0f 100644 --- a/src/EigenDMStrategy.cc +++ b/src/EigenDMStrategy.cc @@ -48,5 +48,5 @@ int EigenDMStrategy::update(OrbitalsType& orbitals) return 0; } -template class EigenDMStrategy; -template class EigenDMStrategy; +template class EigenDMStrategy>; +template class EigenDMStrategy>; diff --git a/src/Electrostatic.cc b/src/Electrostatic.cc index aba09d07..d912f5f3 100644 --- a/src/Electrostatic.cc +++ b/src/Electrostatic.cc @@ -336,14 +336,16 @@ void Electrostatic::computeVh(const Ions& ions, Rho& rho, Potentials& pot) solve_tm_.stop(); } -template void Electrostatic::computeVhRho(Rho& rho); +template void Electrostatic::computeVhRho(Rho>& rho); template void Electrostatic::computeVh( - const Ions& ions, Rho& rho, Potentials& pot); + const Ions& ions, Rho>& rho, Potentials& pot); template void Electrostatic::computeVh(const pb::GridFunc& vhinit, - const Ions& ions, Rho& rho, Potentials& pot); + const Ions& ions, Rho>& rho, Potentials& pot); -template void Electrostatic::computeVhRho(Rho& rho); -template void Electrostatic::computeVh( - const Ions& ions, Rho& rho, Potentials& pot); +template void Electrostatic::computeVhRho( + Rho>& rho); +template void Electrostatic::computeVh(const Ions& ions, + Rho>& rho, Potentials& pot); template void Electrostatic::computeVh(const pb::GridFunc& vhinit, - const Ions& ions, Rho& rho, Potentials& pot); + const Ions& ions, Rho>& rho, + Potentials& pot); diff --git a/src/Energy.cc b/src/Energy.cc index f2e2607f..ad371afc 100644 --- a/src/Energy.cc +++ b/src/Energy.cc @@ -199,5 +199,5 @@ double Energy::evaluateTotal(const double ts, // in [Ha] return energy_sc; } -template class Energy; -template class Energy; +template class Energy>; +template class Energy>; diff --git a/src/EnergySpreadPenalty.cc b/src/EnergySpreadPenalty.cc index 0299fbe8..5d9a1a5d 100644 --- a/src/EnergySpreadPenalty.cc +++ b/src/EnergySpreadPenalty.cc @@ -231,5 +231,5 @@ double EnergySpreadPenalty::evaluateEnergy(const T& phi) return alpha_ * total_energy; } -template class EnergySpreadPenalty; -template class EnergySpreadPenalty; +template class EnergySpreadPenalty>; +template class EnergySpreadPenalty>; diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index d16ef798..589e1f5b 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -33,29 +33,60 @@ #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -int ExtendedGridOrbitals::lda_ = 0; -int ExtendedGridOrbitals::numpt_ = 0; -int ExtendedGridOrbitals::data_wghosts_index_ = -1; -int ExtendedGridOrbitals::numst_ = -1; -std::vector> ExtendedGridOrbitals::overlapping_gids_; - -DotProductManager* - ExtendedGridOrbitals::dotProductManager_ +template +DotProductManager>* + ExtendedGridOrbitals::dotProductManager_ = nullptr; -Timer ExtendedGridOrbitals::matB_tm_("ExtendedGridOrbitals::matB"); -Timer ExtendedGridOrbitals::invBmat_tm_("ExtendedGridOrbitals::invBmat"); -Timer ExtendedGridOrbitals::overlap_tm_("ExtendedGridOrbitals::overlap"); -Timer ExtendedGridOrbitals::dot_product_tm_( - "ExtendedGridOrbitals::dot_product"); -Timer ExtendedGridOrbitals::addDot_tm_("ExtendedGridOrbitals::addDot"); -Timer ExtendedGridOrbitals::prod_matrix_tm_( - "ExtendedGridOrbitals::prod_matrix"); -Timer ExtendedGridOrbitals::assign_tm_("ExtendedGridOrbitals::assign"); -Timer ExtendedGridOrbitals::normalize_tm_("ExtendedGridOrbitals::normalize"); -Timer ExtendedGridOrbitals::axpy_tm_("ExtendedGridOrbitals::axpy"); - -ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, +template +int ExtendedGridOrbitals::lda_ = 0; +template +int ExtendedGridOrbitals::numpt_ = 0; +template +int ExtendedGridOrbitals::data_wghosts_index_ = -1; +template +int ExtendedGridOrbitals::numst_ = -1; +template +std::vector> + ExtendedGridOrbitals::overlapping_gids_; + +template +Timer ExtendedGridOrbitals::matB_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::matB"); +template +Timer ExtendedGridOrbitals::invBmat_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::invBmat"); +template +Timer ExtendedGridOrbitals::overlap_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::overlap"); +template +Timer ExtendedGridOrbitals::dot_product_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::dot_product"); +template +Timer ExtendedGridOrbitals::addDot_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::addDot"); +template +Timer ExtendedGridOrbitals::prod_matrix_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::prod_matrix"); +template +Timer ExtendedGridOrbitals::assign_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::assign"); +template +Timer ExtendedGridOrbitals::normalize_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::normalize"); +template +Timer ExtendedGridOrbitals::axpy_tm_( + "ExtendedGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::axpy"); + +template +ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, const pb::Grid& my_grid, const short subdivx, const int numst, const short bc[3], ProjectedMatricesInterface* proj_matrices, std::shared_ptr lrs, MasksSet* masks, @@ -91,28 +122,31 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(std::string name, if (setup_flag) setup(); } -ExtendedGridOrbitals::~ExtendedGridOrbitals() +template +ExtendedGridOrbitals::~ExtendedGridOrbitals() { assert(proj_matrices_ != nullptr); } -ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, - const ExtendedGridOrbitals& A, const bool copy_data) +template +ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, + const ExtendedGridOrbitals& A, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(A.proj_matrices_), block_vector_(A.block_vector_, copy_data), grid_(A.grid_) { - // if(onpe0)cout<<"call ExtendedGridOrbitals(const ExtendedGridOrbitals &A, - // const bool copy_data)"< +ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, + const ExtendedGridOrbitals& A, + ProjectedMatricesInterface* proj_matrices, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(proj_matrices), @@ -125,7 +159,9 @@ ExtendedGridOrbitals::ExtendedGridOrbitals(const std::string& name, proj_matrices_->setup(overlapping_gids_); } -void ExtendedGridOrbitals::copyDataFrom(const ExtendedGridOrbitals& src) +template +void ExtendedGridOrbitals::copyDataFrom( + const ExtendedGridOrbitals& src) { assert(proj_matrices_ != nullptr); @@ -134,7 +170,8 @@ void ExtendedGridOrbitals::copyDataFrom(const ExtendedGridOrbitals& src) setIterativeIndex(src); } -void ExtendedGridOrbitals::setDotProduct(const short dot_type) +template +void ExtendedGridOrbitals::setDotProduct(const short dot_type) { DotProductManagerFactory factory; @@ -143,7 +180,8 @@ void ExtendedGridOrbitals::setDotProduct(const short dot_type) assert(dotProductManager_ != nullptr); } -void ExtendedGridOrbitals::setup() +template +void ExtendedGridOrbitals::setup() { Control& ct = *(Control::instance()); @@ -167,8 +205,9 @@ void ExtendedGridOrbitals::setup() "ExtendedGridOrbitals::setup() done...", (*MPIdata::sout)); } -void ExtendedGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, - std::shared_ptr lrs) +template +void ExtendedGridOrbitals::reset(MasksSet* masks, + MasksSet* corrmasks, std::shared_ptr lrs) { (void)masks; (void)corrmasks; @@ -182,7 +221,9 @@ void ExtendedGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, setup(); } -void ExtendedGridOrbitals::assign(const ExtendedGridOrbitals& orbitals) +template +void ExtendedGridOrbitals::assign( + const ExtendedGridOrbitals& orbitals) { assert(proj_matrices_ != nullptr); @@ -195,9 +236,10 @@ void ExtendedGridOrbitals::assign(const ExtendedGridOrbitals& orbitals) assign_tm_.stop(); } +template template -void ExtendedGridOrbitals::axpy( - const CoeffType alpha, const ExtendedGridOrbitals& orbitals) +void ExtendedGridOrbitals::axpy( + const CoeffType alpha, const ExtendedGridOrbitals& orbitals) { axpy_tm_.start(); @@ -208,7 +250,8 @@ void ExtendedGridOrbitals::axpy( axpy_tm_.stop(); } -void ExtendedGridOrbitals::initGauss( +template +void ExtendedGridOrbitals::initGauss( const double rc, const std::shared_ptr lrs) { assert(numst_ >= 0); @@ -241,13 +284,13 @@ void ExtendedGridOrbitals::initGauss( const double rmax = 6. * rc; for (int icolor = 0; icolor < numst_; icolor++) { - ORBDTYPE* ipsi = psi(icolor); + ScalarType* ipsi = psi(icolor); unsigned int const ipsi_size = numpt_; - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(ipsi_size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( ipsi, ipsi_size, ipsi_host_view); - MemorySpace::Memory::set( + MemorySpace::Memory::set( ipsi_host_view, ipsi_size, 0); const Vector3D& center(lrs->getCenter(icolor)); @@ -266,7 +309,7 @@ void ExtendedGridOrbitals::initGauss( const double r = xc.minimage(center, ll, ct.bcWF); if (r < rmax) ipsi_host_view[ix * incx + iy * incy + iz] - = static_cast(exp(-r * r * invrc2)); + = static_cast(exp(-r * r * invrc2)); else ipsi_host_view[ix * incx + iy * incy + iz] = 0.; @@ -277,15 +320,16 @@ void ExtendedGridOrbitals::initGauss( xc[0] += hgrid[0]; } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, ipsi_size, ipsi); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } resetIterativeIndex(); } -void ExtendedGridOrbitals::initFourier() +template +void ExtendedGridOrbitals::initFourier() { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 2) @@ -322,13 +366,13 @@ void ExtendedGridOrbitals::initFourier() const double kk[3] = { dk[0] * (double)kvector[0], dk[1] * (double)kvector[1], dk[2] * (double)kvector[2] }; - ORBDTYPE* ipsi = psi(icolor); + ScalarType* ipsi = psi(icolor); unsigned int const ipsi_size = numpt_; - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(ipsi_size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( ipsi, ipsi_size, ipsi_host_view); - MemorySpace::Memory::set( + MemorySpace::Memory::set( ipsi_host_view, numpt_, 0); // TODO this can be done on the GPU with OpenMP @@ -344,9 +388,9 @@ void ExtendedGridOrbitals::initFourier() { ipsi_host_view[ix * incx + iy * incy + iz] = 1. - - static_cast(std::cos(kk[0] * x) - * std::cos(kk[1] * y) - * std::cos(kk[2] * z)); + - static_cast(std::cos(kk[0] * x) + * std::cos(kk[1] * y) + * std::cos(kk[2] * z)); z += hgrid[2]; } @@ -355,17 +399,18 @@ void ExtendedGridOrbitals::initFourier() x += hgrid[0]; } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, ipsi_size, ipsi); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } resetIterativeIndex(); } -void ExtendedGridOrbitals::multiply_by_matrix( +template +void ExtendedGridOrbitals::multiply_by_matrix( const dist_matrix::DistMatrix& dmatrix, - ORBDTYPE* const product, const int ldp) + ScalarType* const product, const int ldp) { #if 0 (*MPIdata::sout)<<"self multiply_by_matrix"< +void ExtendedGridOrbitals::multiply_by_matrix( + const DISTMATDTYPE* const matrix, ScalarType* product, const int ldp) const { prod_matrix_tm_.start(); unsigned int const product_size = numst_ * ldp; - ORBDTYPE* product_host_view - = MemorySpace::Memory::allocate_host_view( - product_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* product_host_view = MemorySpace::Memory::allocate_host_view(product_size); + MemorySpace::Memory::copy_view_to_host( product, product_size, product_host_view); - memset(product_host_view, 0, ldp * numst_ * sizeof(ORBDTYPE)); + memset(product_host_view, 0, ldp * numst_ * sizeof(ScalarType)); unsigned int const phi_size = numpt_ * numst_; - ORBDTYPE* phi_host_view - = MemorySpace::Memory::allocate_host_view( - phi_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( getPsi(0), phi_size, phi_host_view); // TODO this can be done on the GPU LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., phi_host_view, lda_, matrix, numst_, 0., product_host_view, ldp); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( phi_host_view); - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( product_host_view, product_size, product); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( product_host_view); prod_matrix_tm_.stop(); } #ifdef HAVE_MAGMA -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const + ScalarType* product, const int ldp) const { - SquareLocalMatrices matdev( + SquareLocalMatrices matdev( matrix.nmat(), matrix.m()); matdev.assign(matrix); @@ -429,9 +474,10 @@ void ExtendedGridOrbitals::multiplyByMatrix( } #endif -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const + ScalarType* product, const int ldp) const { assert(matrix.nmat() == 1); @@ -448,10 +494,11 @@ void ExtendedGridOrbitals::multiplyByMatrix( // Here the result is stored in one of the matrices used in the multiplication, // so a temporary arry is necessary #ifdef HAVE_MAGMA -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix) { - SquareLocalMatrices matdev( + SquareLocalMatrices matdev( matrix.nmat(), matrix.m()); matdev.assign(matrix); @@ -459,42 +506,62 @@ void ExtendedGridOrbitals::multiplyByMatrix( } #endif -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix) { - ORBDTYPE* product - = MemorySpace::Memory::allocate( + ScalarType* product + = MemorySpace::Memory::allocate( numpt_ * numst_); multiplyByMatrix(matrix, product, numpt_); - MemorySpace::Memory::copy( + MemorySpace::Memory::copy( product, numpt_ * numst_, getPsi(0)); - MemorySpace::Memory::free(product); + MemorySpace::Memory::free(product); } -void ExtendedGridOrbitals::multiplyByMatrix( +template +void ExtendedGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ExtendedGridOrbitals& product) const + ExtendedGridOrbitals& product) const { multiplyByMatrix(matrix, product.psi(0), product.lda_); } -void ExtendedGridOrbitals::multiply_by_matrix( - const DISTMATDTYPE* const matrix, ExtendedGridOrbitals& product) const +template +void ExtendedGridOrbitals::multiply_by_matrix( + const DISTMATDTYPE* const matrix, + ExtendedGridOrbitals& product) const { multiply_by_matrix(matrix, product.psi(0), product.lda_); } template <> -void ExtendedGridOrbitals::multiply_by_matrix( +template <> +void ExtendedGridOrbitals::multiply_by_matrix( + const dist_matrix::DistMatrix& matrix) +{ + multiply_by_DistMatrix(matrix); +} + +template <> +template <> +void ExtendedGridOrbitals::multiply_by_matrix( + const ReplicatedMatrix& matrix) +{ + multiply_by_ReplicatedMatrix(matrix); +} + +template +void ExtendedGridOrbitals::multiply_by_DistMatrix( const dist_matrix::DistMatrix& matrix) { prod_matrix_tm_.start(); - ORBDTYPE* product = new ORBDTYPE[numpt_ * numst_]; - memset(product, 0, numpt_ * numst_ * sizeof(ORBDTYPE)); + ScalarType* product = new ScalarType[numpt_ * numst_]; + memset(product, 0, numpt_ * numst_ * sizeof(ScalarType)); ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); @@ -502,13 +569,12 @@ void ExtendedGridOrbitals::multiply_by_matrix( matrix.allgather(work_matrix, numst_); - const size_t slnumpt = numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = numpt_ * sizeof(ScalarType); unsigned int const phi_size = numpt_ * numst_; - ORBDTYPE* phi_host_view - = MemorySpace::Memory::allocate_host_view( - phi_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( getPsi(0), phi_size, phi_host_view); // TODO this can be done on the GPU @@ -518,9 +584,9 @@ void ExtendedGridOrbitals::multiply_by_matrix( for (int color = 0; color < numst_; color++) memcpy(phi_host_view + color * lda_, product + color * numpt_, slnumpt); - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( phi_host_view, phi_size, getPsi(0)); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( phi_host_view); delete[] product; @@ -528,8 +594,9 @@ void ExtendedGridOrbitals::multiply_by_matrix( prod_matrix_tm_.stop(); } -template <> -void ExtendedGridOrbitals::multiply_by_matrix(const ReplicatedMatrix& matrix) +template +void ExtendedGridOrbitals::multiply_by_ReplicatedMatrix( + const ReplicatedMatrix& matrix) { prod_matrix_tm_.start(); @@ -539,33 +606,35 @@ void ExtendedGridOrbitals::multiply_by_matrix(const ReplicatedMatrix& matrix) auto& magma_singleton = MagmaSingleton::get_magma_singleton(); - ORBDTYPE* tmp - = MemorySpace::Memory::allocate( + ScalarType* tmp + = MemorySpace::Memory::allocate( numst_ * lda_); magmablas_dgemm(magma_transa, magma_transb, numpt_, numst_, numst_, 1., block_vector_.vect(0), lda_, matrix.data(), matrix.ld(), 0., tmp, lda_, magma_singleton.queue_); - MemorySpace::Memory::copy( + MemorySpace::Memory::copy( tmp, numst_ * lda_, block_vector_.vect(0)); - MemorySpace::Memory::free(tmp); + MemorySpace::Memory::free(tmp); #else - ORBDTYPE* tmp = MemorySpace::Memory::allocate( - numst_ * lda_); + ScalarType* tmp + = MemorySpace::Memory::allocate( + numst_ * lda_); LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., block_vector_.vect(0), lda_, matrix.data(), matrix.ld(), 0., tmp, lda_); - memcpy(block_vector_.vect(0), tmp, numst_ * lda_ * sizeof(ORBDTYPE)); + memcpy(block_vector_.vect(0), tmp, numst_ * lda_ * sizeof(ScalarType)); - MemorySpace::Memory::free(tmp); + MemorySpace::Memory::free(tmp); #endif prod_matrix_tm_.stop(); } -int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) +template +int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) { assert(proj_matrices_ != nullptr); @@ -575,9 +644,9 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) int ierr = read_func_hdf5(h5f_file, name); if (ierr < 0) { - (*MPIdata::serr) - << "ExtendedGridOrbitals::read_hdf5(): error in reading " << name - << ", size=" << name.size() << std::endl; + (*MPIdata::serr) << "ExtendedGridOrbitals::read_hdf5(): " + "error in reading " + << name << ", size=" << name.size() << std::endl; return ierr; } else if (onpe0 && ct.verbose > 2) @@ -592,9 +661,9 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) ierr = proj_matrices_->readDM(h5f_file); if (ierr < 0) { - (*MPIdata::serr) - << "ExtendedGridOrbitals::read_hdf5(): error in reading DM" - << std::endl; + (*MPIdata::serr) << "ExtendedGridOrbitals::read_hdf5():" + " error in reading DM" + << std::endl; return ierr; } } @@ -604,7 +673,9 @@ int ExtendedGridOrbitals::read_hdf5(HDFrestart& h5f_file) return ierr; } -int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) +template +int ExtendedGridOrbitals::write( + HDFrestart& h5f_file, const std::string& name) { if (onpe0) (*MPIdata::sout) << "ExtendedGridOrbitals::write_func_hdf5()...\n"; @@ -697,9 +768,9 @@ int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) } // iwrite unsigned int const psi_size = numpt_; - ORBDTYPE* psi_host_view = MemorySpace::Memory::allocate_host_view(psi_size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(color), psi_size, psi_host_view); int ierr = h5f_file.writeData( @@ -712,9 +783,10 @@ int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) herr_t status = H5Dclose(dset_id); if (status < 0) { - (*MPIdata::serr) << "ExtendedGridOrbitals::write_func_hdf5:" - "H5Dclose failed!!!" - << std::endl; + (*MPIdata::serr) + << "ExtendedGridOrbitals::write_func_hdf5:" + "H5Dclose failed!!!" + << std::endl; return -1; } } @@ -745,7 +817,8 @@ int ExtendedGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) } // read all the data sets with names starting with "name" -int ExtendedGridOrbitals::read_func_hdf5( +template +int ExtendedGridOrbitals::read_func_hdf5( HDFrestart& h5f_file, const std::string& name) { assert(numst_ >= 0); @@ -766,18 +839,18 @@ int ExtendedGridOrbitals::read_func_hdf5( // memory dataspace identifier hid_t memspace = (h5f_file.active()) ? h5f_file.createMemspace() : 0; - ORBDTYPE* buffer = new ORBDTYPE[block[0] * block[1] * block[2]]; + ScalarType* buffer = new ScalarType[block[0] * block[1] * block[2]]; if (onpe0 && ct.verbose > 2) { if (h5f_file.gatherDataX()) { - (*MPIdata::sout) - << "ExtendedGridOrbitals::read_func_hdf5(): Read wave " - "functions from " - << grid_.mype_env().n_mpi_task(1) - * grid_.mype_env().n_mpi_task(2) - << " PEs" << std::endl; + (*MPIdata::sout) << "ExtendedGridOrbitals::read_func_" + "hdf5(): Read wave " + "functions from " + << grid_.mype_env().n_mpi_task(1) + * grid_.mype_env().n_mpi_task(2) + << " PEs" << std::endl; } else { @@ -806,18 +879,19 @@ int ExtendedGridOrbitals::read_func_hdf5( hid_t dset_id = h5f_file.open_dset(datasetname); if (dset_id < 0) { - (*MPIdata::serr) - << "ExtendedGridOrbitals::read_func_hdf5() --- cannot open " - << datasetname << std::endl; + (*MPIdata::serr) << "ExtendedGridOrbitals::read_func_" + "hdf5() --- cannot open " + << datasetname << std::endl; return dset_id; } herr_t status = h5f_file.readData(buffer, memspace, dset_id, precision); if (status < 0) { - (*MPIdata::serr) << "ExtendedGridOrbitals::read_func_hdf5() --- " - "H5Dread failed!!!" - << std::endl; + (*MPIdata::serr) + << "ExtendedGridOrbitals::read_func_hdf5() --- " + "H5Dread failed!!!" + << std::endl; return -1; } @@ -828,16 +902,16 @@ int ExtendedGridOrbitals::read_func_hdf5( } #ifdef HAVE_MAGMA - ORBDTYPE* buffer_dev - = MemorySpace::Memory::allocate( + ScalarType* buffer_dev + = MemorySpace::Memory::allocate( numpt_); MemorySpace::copy_to_dev(buffer, numpt_, buffer_dev); #else - ORBDTYPE* buffer_dev = buffer; + ScalarType* buffer_dev = buffer; #endif block_vector_.assignLocal(icolor, 0, buffer_dev); #ifdef HAVE_MAGMA - MemorySpace::Memory::free(buffer_dev); + MemorySpace::Memory::free(buffer_dev); #endif } @@ -859,8 +933,10 @@ int ExtendedGridOrbitals::read_func_hdf5( // compute the matrix // output: matB -void ExtendedGridOrbitals::computeMatB( - const ExtendedGridOrbitals& orbitals, const pb::Lap& LapOper) +template +void ExtendedGridOrbitals::computeMatB( + const ExtendedGridOrbitals& orbitals, + const pb::Lap& LapOper) { if (numst_ == 0) return; @@ -876,17 +952,16 @@ void ExtendedGridOrbitals::computeMatB( SquareLocalMatrices ss(1, numst_); - ORBDTYPE* work = new ORBDTYPE[lda_ * bcolor]; - memset(work, 0, lda_ * bcolor * sizeof(ORBDTYPE)); + ScalarType* work = new ScalarType[lda_ * bcolor]; + memset(work, 0, lda_ * bcolor * sizeof(ScalarType)); - ORBDTYPE* const orbitals_psi + ScalarType* const orbitals_psi = (numst_ > 0) ? orbitals.block_vector_.vect(0) : nullptr; const unsigned int orbitals_psi_size = orbitals.block_vector_.get_allocated_size_storage(); - ORBDTYPE* orbitals_psi_host_view - = MemorySpace::Memory::allocate_host_view( - orbitals_psi_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* orbitals_psi_host_view = MemorySpace::Memory::allocate_host_view(orbitals_psi_size); + MemorySpace::Memory::copy_view_to_host( orbitals_psi, orbitals_psi_size, orbitals_psi_host_view); setDataWithGhosts(); @@ -911,7 +986,7 @@ void ExtendedGridOrbitals::computeMatB( ss0 + icolor * numst_, numst_); } - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( orbitals_psi_host_view); delete[] work; @@ -921,7 +996,9 @@ void ExtendedGridOrbitals::computeMatB( } // compute and its inverse -void ExtendedGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) +template +void ExtendedGridOrbitals::computeBAndInvB( + const pb::Lap& LapOper) { assert(proj_matrices_ != nullptr); @@ -936,7 +1013,8 @@ void ExtendedGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) invBmat_tm_.stop(); } -void ExtendedGridOrbitals::getLocalOverlap( +template +void ExtendedGridOrbitals::getLocalOverlap( SquareLocalMatrices& ss) { assert(numst_ >= 0); @@ -949,7 +1027,9 @@ void ExtendedGridOrbitals::getLocalOverlap( } } -void ExtendedGridOrbitals::getLocalOverlap(const ExtendedGridOrbitals& orbitals, +template +void ExtendedGridOrbitals::getLocalOverlap( + const ExtendedGridOrbitals& orbitals, SquareLocalMatrices& ss) { assert(numst_ >= 0); @@ -961,8 +1041,9 @@ void ExtendedGridOrbitals::getLocalOverlap(const ExtendedGridOrbitals& orbitals, } } -void ExtendedGridOrbitals::computeLocalProduct( - const ExtendedGridOrbitals& orbitals, +template +void ExtendedGridOrbitals::computeLocalProduct( + const ExtendedGridOrbitals& orbitals, LocalMatrices& ss, const bool transpose) { // assert( orbitals.numst_>=0 ); @@ -973,11 +1054,12 @@ void ExtendedGridOrbitals::computeLocalProduct( } #ifdef HAVE_MAGMA -void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, - const int ld, LocalMatrices& ss, - const bool transpose) +template +void ExtendedGridOrbitals::computeLocalProduct( + const ScalarType* const array, const int ld, + LocalMatrices& ss, const bool transpose) { - LocalMatrices sdev( + LocalMatrices sdev( ss.nmat(), ss.m(), ss.n()); computeLocalProduct(array, ld, sdev, transpose); @@ -986,9 +1068,10 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, } #endif -void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, - const int ld, LocalMatrices& ss, - const bool transpose) +template +void ExtendedGridOrbitals::computeLocalProduct( + const ScalarType* const array, const int ld, + LocalMatrices& ss, const bool transpose) { assert(numpt_ > 0); assert(numpt_ <= ld); @@ -996,8 +1079,8 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, assert(numst_ != 0); assert(grid_.vel() > 0.); - const ORBDTYPE* const a = transpose ? array : block_vector_.vect(0); - const ORBDTYPE* const b = transpose ? block_vector_.vect(0) : array; + const ScalarType* const a = transpose ? array : block_vector_.vect(0); + const ScalarType* const b = transpose ? block_vector_.vect(0) : array; const int lda = transpose ? ld : lda_; const int ldb = transpose ? lda_ : ld; @@ -1006,8 +1089,10 @@ void ExtendedGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, grid_.vel(), a, lda, b, ldb, 0., ss.getRawPtr(0), ss.m()); } -void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( - const ExtendedGridOrbitals& orbitals, std::vector& ss) const +template +void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( + const ExtendedGridOrbitals& orbitals, + std::vector& ss) const { assert(numst_ > 0); assert(grid_.vel() > 0.); @@ -1025,7 +1110,8 @@ void ExtendedGridOrbitals::computeDiagonalElementsDotProduct( mmpi.allreduce(&tmp[0], &ss[0], numst_, MPI_SUM); } -void ExtendedGridOrbitals::computeGram( +template +void ExtendedGridOrbitals::computeGram( dist_matrix::DistMatrix& gram_mat) { SquareLocalMatrices ss(1, numst_); @@ -1039,7 +1125,9 @@ void ExtendedGridOrbitals::computeGram( sl2dm->accumulate(ss, gram_mat); } -void ExtendedGridOrbitals::computeGram(const ExtendedGridOrbitals& orbitals, +template +void ExtendedGridOrbitals::computeGram( + const ExtendedGridOrbitals& orbitals, dist_matrix::DistMatrix& gram_mat) { SquareLocalMatrices ss(1, numst_); @@ -1055,7 +1143,8 @@ void ExtendedGridOrbitals::computeGram(const ExtendedGridOrbitals& orbitals, } // compute the lower-triangular part of the overlap matrix -void ExtendedGridOrbitals::computeGram(const int verbosity) +template +void ExtendedGridOrbitals::computeGram(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1081,7 +1170,8 @@ void ExtendedGridOrbitals::computeGram(const int verbosity) overlap_tm_.stop(); } -void ExtendedGridOrbitals::computeGramAndInvS(const int verbosity) +template +void ExtendedGridOrbitals::computeGramAndInvS(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1091,28 +1181,29 @@ void ExtendedGridOrbitals::computeGramAndInvS(const int verbosity) proj_matrices_->computeInvS(); } -void ExtendedGridOrbitals::checkCond(const double tol, const bool flag_stop) +template +void ExtendedGridOrbitals::checkCond( + const double tol, const bool flag_stop) { assert(proj_matrices_ != nullptr); proj_matrices_->checkCond(tol, flag_stop); } -double ExtendedGridOrbitals::dotProduct(const ExtendedGridOrbitals& orbitals) +template +double ExtendedGridOrbitals::dotProduct( + const ExtendedGridOrbitals& orbitals) { assert(dotProductManager_ != nullptr); return dotProductManager_->dotProduct(*this, orbitals); } -double ExtendedGridOrbitals::dotProduct( - const ExtendedGridOrbitals& orbitals, const short dot_type) +template +double ExtendedGridOrbitals::dotProduct( + const ExtendedGridOrbitals& orbitals, const short dot_type) { dot_product_tm_.start(); - assert(numst_ >= 0); - assert(1 > 0); - assert(1 < 1000); - DotProductManagerFactory factory; DotProductManager* manager = factory.create(dot_type); assert(manager != nullptr); @@ -1126,7 +1217,9 @@ double ExtendedGridOrbitals::dotProduct( return dot; } -void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, +template +void ExtendedGridOrbitals::orthonormalizeLoewdin( + const bool overlap_uptodate, SquareLocalMatrices* matrixTransform, const bool update_matrices) { @@ -1180,7 +1273,8 @@ void ExtendedGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, if (matrixTransform == nullptr) delete localP; } -double ExtendedGridOrbitals::norm() const +template +double ExtendedGridOrbitals::norm() const { double norm = 0; @@ -1191,7 +1285,8 @@ double ExtendedGridOrbitals::norm() const return norm; } -double ExtendedGridOrbitals::normState(const int gid) const +template +double ExtendedGridOrbitals::normState(const int gid) const { assert(gid >= 0); @@ -1208,7 +1303,9 @@ double ExtendedGridOrbitals::normState(const int gid) const return grid_.vel() * norm; } -void ExtendedGridOrbitals::orthonormalize2states(const int st1, const int st2) +template +void ExtendedGridOrbitals::orthonormalize2states( + const int st1, const int st2) { assert(st1 >= 0); assert(st2 >= 0); @@ -1278,15 +1375,16 @@ void ExtendedGridOrbitals::orthonormalize2states(const int st1, const int st2) #endif } -void ExtendedGridOrbitals::multiplyByMatrix2states(const int st1, const int st2, - const double* mat, ExtendedGridOrbitals& product) +template +void ExtendedGridOrbitals::multiplyByMatrix2states(const int st1, + const int st2, const double* mat, ExtendedGridOrbitals& product) { assert(st1 >= 0); assert(st2 >= 0); assert(1 == 1); // if( onpe0 && ct.verbose>2 ) - // (*MPIdata::sout)<<"ExtendedGridOrbitals::multiplyByMatrix2states()"<::multiplyByMatrix2states()"< +void ExtendedGridOrbitals::computeInvNorms2( std::vector>& inv_norms2) const { std::vector diagS(numst_); @@ -1315,7 +1414,8 @@ void ExtendedGridOrbitals::computeInvNorms2( } } -void ExtendedGridOrbitals::normalize() +template +void ExtendedGridOrbitals::normalize() { normalize_tm_.start(); @@ -1323,7 +1423,8 @@ void ExtendedGridOrbitals::normalize() assert(numst_ >= 0); // if( onpe0 && ct.verbose>2 ) - // (*MPIdata::sout)<<"Normalize ExtendedGridOrbitals"<"< diagS(numst_); @@ -1350,8 +1451,10 @@ void ExtendedGridOrbitals::normalize() } // modify argument orbitals, by projecting out its component -// along ExtendedGridOrbitals -void ExtendedGridOrbitals::projectOut(ExtendedGridOrbitals& orbitals) +// along ExtendedGridOrbitals +template +void ExtendedGridOrbitals::projectOut( + ExtendedGridOrbitals& orbitals) { projectOut(orbitals.psi(0), lda_); @@ -1366,7 +1469,9 @@ void ExtendedGridOrbitals::projectOut(ExtendedGridOrbitals& orbitals) orbitals.incrementIterativeIndex(); } -void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) +template +void ExtendedGridOrbitals::projectOut( + ScalarType* const array, const int lda) { assert(lda > 1); assert(numpt_ > 0); @@ -1384,14 +1489,13 @@ void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) #endif proj_matrices_->applyInvS(lmatrix); - ORBDTYPE* tproduct = new ORBDTYPE[numpt_ * numst_]; - memset(tproduct, 0, numpt_ * numst_ * sizeof(ORBDTYPE)); + ScalarType* tproduct = new ScalarType[numpt_ * numst_]; + memset(tproduct, 0, numpt_ * numst_ * sizeof(ScalarType)); unsigned int const phi_size = numpt_ * numst_; - ORBDTYPE* phi_host_view - = MemorySpace::Memory::allocate_host_view( - phi_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( getPsi(0), phi_size, phi_host_view); MATDTYPE* localMat = lmatrix.getRawPtr(); @@ -1401,32 +1505,32 @@ void ExtendedGridOrbitals::projectOut(ORBDTYPE* const array, const int lda) LinearAlgebraUtils::MPgemmNN(numpt_, numst_, numst_, 1., phi_host_view, lda_, localMat, numst_, 0., tproduct, numpt_); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( phi_host_view); - ORBDTYPE* parray = array + 0 * numpt_; + ScalarType* parray = array + 0 * numpt_; unsigned int const parray_size = numst_ * lda; - ORBDTYPE* parray_host_view - = MemorySpace::Memory::allocate_host_view( - parray_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* parray_host_view = MemorySpace::Memory::allocate_host_view(parray_size); + MemorySpace::Memory::copy_view_to_host( parray, parray_size, parray_host_view); - ORBDTYPE minus = -1.; + ScalarType minus = -1.; for (int j = 0; j < numst_; j++) LinearAlgebraUtils::MPaxpy( numpt_, minus, tproduct + j * numpt_, parray_host_view + j * lda); - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( parray_host_view, parray_size, parray); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( parray_host_view); delete[] tproduct; } -void ExtendedGridOrbitals::initRand() +template +void ExtendedGridOrbitals::initRand() { Control& ct = *(Control::instance()); @@ -1465,10 +1569,10 @@ void ExtendedGridOrbitals::initRand() for (unsigned int idx = 0; idx < grid_.gdim(2); idx++) zrand[idx] = ran0() - 0.5; - unsigned int const size = numpt_; - ORBDTYPE* psi_state_view = MemorySpace::Memory::allocate_host_view(size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(istate), size, psi_state_view); for (int ix = loc_length * 0; ix < loc_length; ix++) @@ -1484,9 +1588,9 @@ void ExtendedGridOrbitals::initRand() < static_cast(lda_)); } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( psi_state_view, size, psi(istate)); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( psi_state_view); } @@ -1494,8 +1598,18 @@ void ExtendedGridOrbitals::initRand() } template <> -void ExtendedGridOrbitals::addDotWithNcol2Matrix( - ExtendedGridOrbitals& Apsi, dist_matrix::DistMatrix& matrix) const +template <> +void ExtendedGridOrbitals::addDotWithNcol2Matrix( + ExtendedGridOrbitals& Apsi, + dist_matrix::DistMatrix& matrix) const +{ + addDotWithNcol2DistMatrix(Apsi, matrix); +} + +template +void ExtendedGridOrbitals::addDotWithNcol2DistMatrix( + ExtendedGridOrbitals& Apsi, + dist_matrix::DistMatrix& matrix) const { addDot_tm_.start(); @@ -1509,17 +1623,15 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( memset(work.data(), 0, size_work * sizeof(double)); unsigned int const block_vector_size = numpt_ * numst_; - ORBDTYPE* block_vector_host_view - = MemorySpace::Memory::allocate_host_view( - block_vector_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* block_vector_host_view = MemorySpace::Memory::allocate_host_view(block_vector_size); + MemorySpace::Memory::copy_view_to_host( block_vector_.vect(0), block_vector_size, block_vector_host_view); unsigned int const phi_size = numpt_ * numst_; - ORBDTYPE* phi_host_view - = MemorySpace::Memory::allocate_host_view( - phi_size); - MemorySpace::Memory::copy_view_to_host( + ScalarType* phi_host_view = MemorySpace::Memory::allocate_host_view(phi_size); + MemorySpace::Memory::copy_view_to_host( Apsi.getPsi(0), phi_size, phi_host_view); // TODO this can be done on the GPU @@ -1527,10 +1639,10 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( block_vector_host_view + 0 * numpt_, lda_, phi_host_view, lda_, 1., work.data(), numst_); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( phi_host_view); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( block_vector_host_view); std::vector work2(size_work); @@ -1544,8 +1656,16 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( } template <> -void ExtendedGridOrbitals::addDotWithNcol2Matrix( - ExtendedGridOrbitals& Apsi, ReplicatedMatrix& matrix) const +template <> +void ExtendedGridOrbitals::addDotWithNcol2Matrix( + ExtendedGridOrbitals& Apsi, ReplicatedMatrix& matrix) const +{ + addDotWithNcol2ReplicatedMatrix(Apsi, matrix); +} + +template +void ExtendedGridOrbitals::addDotWithNcol2ReplicatedMatrix( + ExtendedGridOrbitals& Apsi, ReplicatedMatrix& matrix) const { addDot_tm_.start(); @@ -1574,7 +1694,8 @@ void ExtendedGridOrbitals::addDotWithNcol2Matrix( addDot_tm_.stop(); } -void ExtendedGridOrbitals::computeGlobalIndexes() +template +void ExtendedGridOrbitals::computeGlobalIndexes() { overlapping_gids_.clear(); overlapping_gids_.resize(1); @@ -1585,7 +1706,8 @@ void ExtendedGridOrbitals::computeGlobalIndexes() } } -void ExtendedGridOrbitals::printTimers(std::ostream& os) +template +void ExtendedGridOrbitals::printTimers(std::ostream& os) { matB_tm_.print(os); invBmat_tm_.print(os); @@ -1598,7 +1720,8 @@ void ExtendedGridOrbitals::printTimers(std::ostream& os) axpy_tm_.print(os); } -void ExtendedGridOrbitals::initWF( +template +void ExtendedGridOrbitals::initWF( const std::shared_ptr lrs) { Control& ct = *(Control::instance()); @@ -1633,10 +1756,10 @@ void ExtendedGridOrbitals::initWF( if (ct.globalColoring()) { // smooth out random functions - pb::Laph4M myoper(grid_); - pb::GridFunc gf_work( + pb::Laph4M myoper(grid_); + pb::GridFunc gf_work( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); - pb::GridFunc gf_psi( + pb::GridFunc gf_psi( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); if (onpe0 && ct.verbose > 2) @@ -1687,24 +1810,7 @@ void ExtendedGridOrbitals::initWF( #endif } -template void ExtendedGridOrbitals::axpy( - const double alpha, const ExtendedGridOrbitals&); -#ifdef MGMOL_USE_MIXEDP -template void ExtendedGridOrbitals::axpy( - const float alpha, const ExtendedGridOrbitals&); -#endif - -template void ExtendedGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); -template void ExtendedGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); - -template void ExtendedGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); -template void ExtendedGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); +template void ExtendedGridOrbitals::axpy( + const ORBDTYPE alpha, const ExtendedGridOrbitals&); -template void ExtendedGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); -template void ExtendedGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); +template class ExtendedGridOrbitals; diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 9bb23b9c..52ece157 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -17,11 +17,12 @@ #include "HDFrestart.h" #include "Lap.h" #include "MPIdata.h" +#include "MasksSet.h" #include "Mesh.h" #include "Orbitals.h" +#include "ReplicatedMatrix.h" #include "SinCosOps.h" #include "SquareLocalMatrices.h" -#include "global.h" #include "hdf5.h" #include @@ -29,15 +30,11 @@ #include #include -class Potentials; -template -class ProjectedMatrices; class ProjectedMatricesInterface; class LocalizationRegions; -class ExtendedGridOrbitals; -class MasksSet; class ClusterOrbitals; +template class ExtendedGridOrbitals : public Orbitals { private: @@ -59,7 +56,8 @@ class ExtendedGridOrbitals : public Orbitals static int lda_; // leading dimension for storage static int numpt_; - static DotProductManager* dotProductManager_; + static DotProductManager>* + dotProductManager_; static int data_wghosts_index_; @@ -75,33 +73,37 @@ class ExtendedGridOrbitals : public Orbitals //////////////////////////////////////////////////////// // instance specific data //////////////////////////////////////////////////////// - BlockVector block_vector_; + BlockVector block_vector_; //////////////////////////////////////////////////////// // // private functions // - void projectOut(ORBDTYPE* const, const int); + void projectOut(ScalarType* const, const int); + + void multiply_by_ReplicatedMatrix(const ReplicatedMatrix& matrix); + void multiply_by_DistMatrix( + const dist_matrix::DistMatrix& matrix); void multiply_by_matrix( - const DISTMATDTYPE* const, ORBDTYPE*, const int) const; + const DISTMATDTYPE* const, ScalarType*, const int) const; void multiply_by_matrix(const dist_matrix::DistMatrix& matrix, - ORBDTYPE* const product, const int ldp); + ScalarType* const product, const int ldp); void scal(const int i, const double alpha) { block_vector_.scal(i, alpha); } - virtual void assign(const int i, const ORBDTYPE* const v, const int n = 1) + virtual void assign(const int i, const ScalarType* const v, const int n = 1) { block_vector_.assign(i, v, n); } ExtendedGridOrbitals& operator=(const ExtendedGridOrbitals& orbitals); ExtendedGridOrbitals(); - void computeMatB(const ExtendedGridOrbitals&, const pb::Lap&); + void computeMatB(const ExtendedGridOrbitals&, const pb::Lap&); - void computeLocalProduct(const ORBDTYPE* const, const int, + void computeLocalProduct(const ScalarType* const, const int, LocalMatrices&, const bool transpose = false); #ifdef HAVE_MAGMA - void computeLocalProduct(const ORBDTYPE* const, const int, + void computeLocalProduct(const ScalarType* const, const int, LocalMatrices&, const bool transpose = false); #endif @@ -110,20 +112,28 @@ class ExtendedGridOrbitals : public Orbitals void computeInvNorms2(std::vector>& inv_norms2) const; void computeDiagonalGram(VariableSizeMatrix& diagS) const; + /*! + * Specialized functions + */ + void addDotWithNcol2DistMatrix( + ExtendedGridOrbitals&, dist_matrix::DistMatrix&) const; + void addDotWithNcol2ReplicatedMatrix( + ExtendedGridOrbitals&, ReplicatedMatrix&) const; + void initFourier(); void initRand(); - ORBDTYPE* psi(const int i) const { return block_vector_.vect(i); } + ScalarType* psi(const int i) const { return block_vector_.vect(i); } - void app_mask(const int, ORBDTYPE*, const short) const {}; + void app_mask(const int, ScalarType*, const short) const {}; #ifdef HAVE_MAGMA void multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const; + ScalarType* product, const int ldp) const; #endif void multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const; + ScalarType* product, const int ldp) const; void setup(); @@ -210,7 +220,7 @@ class ExtendedGridOrbitals : public Orbitals block_vector_.setDataWithGhosts(data_wghosts); } - pb::GridFunc& getFuncWithGhosts(const int i) + pb::GridFunc& getFuncWithGhosts(const int i) { //(*MPIdata::sout)<<" data_wghosts_index_="< -OrbitalsStepper* GrassmanCGFactory::create( - Hamiltonian* hamiltonian, +OrbitalsStepper>* +GrassmanCGFactory>::create( + Hamiltonian>* hamiltonian, ProjectedMatricesInterface* proj_matrices, - MGmol* mgmol_strategy, Ions& ions, std::ostream& os, - const bool short_sighted) + MGmol>* mgmol_strategy, Ions& ions, + std::ostream& os, const bool short_sighted) { - OrbitalsStepper* stepper; + OrbitalsStepper>* stepper; if (short_sighted) { - stepper = new GrassmanCGSparse( + stepper = new GrassmanCGSparse>( hamiltonian, proj_matrices, mgmol_strategy, ions, os); } else { - stepper = new GrassmanCG( + stepper = new GrassmanCG>( hamiltonian, proj_matrices, mgmol_strategy, ions, os); } @@ -27,15 +28,15 @@ OrbitalsStepper* GrassmanCGFactory::create( } template <> -OrbitalsStepper* -GrassmanCGFactory::create( - Hamiltonian* hamiltonian, +OrbitalsStepper>* +GrassmanCGFactory>::create( + Hamiltonian>* hamiltonian, ProjectedMatricesInterface* proj_matrices, - MGmol* mgmol_strategy, Ions& ions, std::ostream& os, - const bool /*short_sighted*/) + MGmol>* mgmol_strategy, Ions& ions, + std::ostream& os, const bool /*short_sighted*/) { - OrbitalsStepper* stepper - = new GrassmanCG( + OrbitalsStepper>* stepper + = new GrassmanCG>( hamiltonian, proj_matrices, mgmol_strategy, ions, os); return stepper; diff --git a/src/GrassmanCGSparse.cc b/src/GrassmanCGSparse.cc index 00f6fd77..936b0850 100644 --- a/src/GrassmanCGSparse.cc +++ b/src/GrassmanCGSparse.cc @@ -328,4 +328,4 @@ void GrassmanCGSparse::parallelTransportUpdate( // proj_matrices_->applyInvS(ss); } -template class GrassmanCGSparse; +template class GrassmanCGSparse>; diff --git a/src/GrassmanLineMinimization.cc b/src/GrassmanLineMinimization.cc index f642e0f4..186e8c82 100644 --- a/src/GrassmanLineMinimization.cc +++ b/src/GrassmanLineMinimization.cc @@ -112,7 +112,7 @@ void GrassmanLineMinimization::update_states( // Grassman line minimization method double lambda = computeStepSize(orbitals); // orbitals.projectOut(*sdir_); - orbitals.axpy(lambda, *sdir_); + orbitals.axpy((ORBDTYPE)lambda, *sdir_); // recompute overlap and inverse for new wavefunctions orbitals.computeGramAndInvS(); if (onpe0 && ct.verbose > 1) @@ -124,7 +124,7 @@ void GrassmanLineMinimization::update_states( { // Preconditioned Power Method // orbitals.projectOut(*sdir_); - orbitals.axpy(alpha, *sdir_); + orbitals.axpy((ORBDTYPE)alpha, *sdir_); // if(onpe0)cout<<"alpha = "<::printTimers(std::ostream& os) update_states_tm_.print(os); } -template class GrassmanLineMinimization; -template class GrassmanLineMinimization; +template class GrassmanLineMinimization>; +template class GrassmanLineMinimization>; diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index c7efa2f6..77a27093 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -165,9 +165,9 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) // corresponding to the local part of the Hamiltonian template <> template <> -void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, - LocGridOrbitals& phi2, dist_matrix::DistMatrix& hij, - const bool force) +void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals& phi1, LocGridOrbitals& phi2, + dist_matrix::DistMatrix& hij, const bool force) { applyLocal(phi2, force); @@ -181,8 +181,8 @@ void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, template <> template <> -void Hamiltonian::addHlocal2matrix( - ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, +void Hamiltonian>::addHlocal2matrix( + ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, dist_matrix::DistMatrix& hij, const bool force) { applyLocal(phi2, force); @@ -201,8 +201,8 @@ void Hamiltonian::addHlocal2matrix( template <> template <> -void Hamiltonian::addHlocal2matrix( - ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, +void Hamiltonian>::addHlocal2matrix( + ExtendedGridOrbitals& phi1, ExtendedGridOrbitals& phi2, ReplicatedMatrix& hij, const bool force) { applyLocal(phi2, force); @@ -218,8 +218,9 @@ void Hamiltonian::addHlocal2matrix( template <> template <> -void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, - LocGridOrbitals& phi2, ReplicatedMatrix& hij, const bool force) +void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals& phi1, LocGridOrbitals& phi2, + ReplicatedMatrix& hij, const bool force) { (void)hij; @@ -261,8 +262,9 @@ void Hamiltonian::addHlocalij( template <> template <> -void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, - LocGridOrbitals& phi2, VariableSizeMatrix& mat, const bool force) +void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals& phi1, LocGridOrbitals& phi2, + VariableSizeMatrix& mat, const bool force) { Control& ct = *(Control::instance()); @@ -282,33 +284,39 @@ void Hamiltonian::addHlocal2matrix(LocGridOrbitals& phi1, mat.insertMatrixElements(ss, phi1.getOverlappingGids(), ct.numst); } -template Hamiltonian::Hamiltonian(); -template Hamiltonian::Hamiltonian(); - -template Hamiltonian::~Hamiltonian(); -template Hamiltonian::~Hamiltonian(); - -template void Hamiltonian::setup(pb::Grid const&, int); -template void Hamiltonian::setup(pb::Grid const&, int); - -template const LocGridOrbitals& Hamiltonian::applyLocal( - LocGridOrbitals&, const bool); -template const ExtendedGridOrbitals& -Hamiltonian::applyLocal( - ExtendedGridOrbitals&, const bool); -template void Hamiltonian::addHlocalij(LocGridOrbitals&, - LocGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocalij( - ExtendedGridOrbitals&, ExtendedGridOrbitals&, +template Hamiltonian>::Hamiltonian(); +template Hamiltonian>::Hamiltonian(); + +template Hamiltonian>::~Hamiltonian(); +template Hamiltonian>::~Hamiltonian(); + +template void Hamiltonian>::setup( + pb::Grid const&, int); +template void Hamiltonian>::setup( + pb::Grid const&, int); + +template const LocGridOrbitals& +Hamiltonian>::applyLocal( + LocGridOrbitals&, const bool); +template const ExtendedGridOrbitals& +Hamiltonian>::applyLocal( + ExtendedGridOrbitals&, const bool); +template void Hamiltonian>::addHlocalij( + LocGridOrbitals&, LocGridOrbitals&, + ProjectedMatricesInterface* proj_matrices); +template void Hamiltonian>::addHlocalij( + ExtendedGridOrbitals&, ExtendedGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocalij( - LocGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocalij( - ExtendedGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian::addHlocal2matrix(LocGridOrbitals&, - LocGridOrbitals&, VariableSizeMatrix& mat, const bool force); -template void Hamiltonian::addHlocal2matrix(LocGridOrbitals&, - LocGridOrbitals&, dist_matrix::DistMatrix& hij, +template void Hamiltonian>::addHlocalij( + LocGridOrbitals&, ProjectedMatricesInterface* proj_matrices); +template void Hamiltonian>::addHlocalij( + ExtendedGridOrbitals&, ProjectedMatricesInterface* proj_matrices); +template void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals&, LocGridOrbitals&, + VariableSizeMatrix& mat, const bool force); +template void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals&, LocGridOrbitals&, + dist_matrix::DistMatrix& hij, const bool force); +template void Hamiltonian>::addHlocal2matrix( + LocGridOrbitals&, LocGridOrbitals&, ReplicatedMatrix&, const bool force); -template void Hamiltonian::addHlocal2matrix( - LocGridOrbitals&, LocGridOrbitals&, ReplicatedMatrix&, const bool force); diff --git a/src/HamiltonianMVPSolver.cc b/src/HamiltonianMVPSolver.cc index 17f1f66b..c697b5eb 100644 --- a/src/HamiltonianMVPSolver.cc +++ b/src/HamiltonianMVPSolver.cc @@ -351,13 +351,14 @@ void HamiltonianMVPSolver, - ProjectedMatrices>, LocGridOrbitals>; + ProjectedMatrices>, + LocGridOrbitals>; template class HamiltonianMVPSolver, - ProjectedMatricesSparse, LocGridOrbitals>; + ProjectedMatricesSparse, LocGridOrbitals>; template class HamiltonianMVPSolver, ProjectedMatrices>, - ExtendedGridOrbitals>; + ExtendedGridOrbitals>; template class HamiltonianMVPSolver, ExtendedGridOrbitals>; + ProjectedMatrices, ExtendedGridOrbitals>; diff --git a/src/HamiltonianMVP_DMStrategy.cc b/src/HamiltonianMVP_DMStrategy.cc index c0ef609e..1d03a9b3 100644 --- a/src/HamiltonianMVP_DMStrategy.cc +++ b/src/HamiltonianMVP_DMStrategy.cc @@ -99,12 +99,13 @@ void HamiltonianMVP_DMStrategy, - ProjectedMatrices>, LocGridOrbitals>; + ProjectedMatrices>, + LocGridOrbitals>; template class HamiltonianMVP_DMStrategy, - ProjectedMatricesSparse, LocGridOrbitals>; + ProjectedMatricesSparse, LocGridOrbitals>; template class HamiltonianMVP_DMStrategy, ProjectedMatrices>, - ExtendedGridOrbitals>; + ExtendedGridOrbitals>; template class HamiltonianMVP_DMStrategy, ExtendedGridOrbitals>; + ProjectedMatrices, ExtendedGridOrbitals>; diff --git a/src/IonicAlgorithm.cc b/src/IonicAlgorithm.cc index e804077a..fa8e1690 100644 --- a/src/IonicAlgorithm.cc +++ b/src/IonicAlgorithm.cc @@ -235,5 +235,5 @@ void IonicAlgorithm::updatePotAndMasks() mgmol_strategy_.move_orbitals(orbitals_); } -template class IonicAlgorithm; -template class IonicAlgorithm; +template class IonicAlgorithm>; +template class IonicAlgorithm>; diff --git a/src/KBPsiMatrixSparse.cc b/src/KBPsiMatrixSparse.cc index e9ac4544..b12604da 100644 --- a/src/KBPsiMatrixSparse.cc +++ b/src/KBPsiMatrixSparse.cc @@ -685,14 +685,16 @@ double KBPsiMatrixSparse::getTraceDM( } template void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, - LocGridOrbitals& orbitals, const int first_color, const int nb_colors, - const bool flag); -template void KBPsiMatrixSparse::computeAll(const Ions&, LocGridOrbitals&); + LocGridOrbitals& orbitals, const int first_color, + const int nb_colors, const bool flag); +template void KBPsiMatrixSparse::computeAll( + const Ions&, LocGridOrbitals&); template void KBPsiMatrixSparse::computeKBpsi(const Ions& ions, - ExtendedGridOrbitals& orbitals, const int first_color, const int nb_colors, - const bool flag); -template void KBPsiMatrixSparse::computeAll(const Ions&, ExtendedGridOrbitals&); + ExtendedGridOrbitals& orbitals, const int first_color, + const int nb_colors, const bool flag); +template void KBPsiMatrixSparse::computeAll( + const Ions&, ExtendedGridOrbitals&); template double KBPsiMatrixSparse::getEvnl(const Ions& ions, ProjectedMatrices>* proj_matrices); diff --git a/src/LBFGS.cc b/src/LBFGS.cc index 9154b7b3..e8064202 100644 --- a/src/LBFGS.cc +++ b/src/LBFGS.cc @@ -128,7 +128,7 @@ void LBFGS::updateRefs() } template <> -void LBFGS::updateRefMasks() +void LBFGS>::updateRefMasks() { Control& ct = *(Control::instance()); @@ -147,7 +147,7 @@ void LBFGS::updateRefMasks() } template <> -void LBFGS::updateRefMasks() +void LBFGS>::updateRefMasks() { } @@ -177,5 +177,5 @@ bool LBFGS::lbfgsLastStepNotAccepted() const return !stepper_->check_last_step_accepted(); } -template class LBFGS; -template class LBFGS; +template class LBFGS>; +template class LBFGS>; diff --git a/src/LDAonGrid.cc b/src/LDAonGrid.cc index 77a4bb30..ed659447 100644 --- a/src/LDAonGrid.cc +++ b/src/LDAonGrid.cc @@ -26,5 +26,5 @@ void LDAonGrid::update() get_xc_tm_.stop(); } -template class LDAonGrid; -template class LDAonGrid; +template class LDAonGrid>; +template class LDAonGrid>; diff --git a/src/LDAonGridSpin.cc b/src/LDAonGridSpin.cc index fa5841e3..2f9ca2d9 100644 --- a/src/LDAonGridSpin.cc +++ b/src/LDAonGridSpin.cc @@ -42,5 +42,5 @@ double LDAonGridSpin::getExc() const // in [Ha] return mygrid.vel() * lda_->computeRhoDotExc(); } -template class LDAonGridSpin; -template class LDAonGridSpin; +template class LDAonGridSpin>; +template class LDAonGridSpin>; diff --git a/src/LocGridOrbitals.cc b/src/LocGridOrbitals.cc index 1822c3ff..92793a46 100644 --- a/src/LocGridOrbitals.cc +++ b/src/LocGridOrbitals.cc @@ -44,31 +44,63 @@ #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); -short LocGridOrbitals::subdivx_ = 0; -int LocGridOrbitals::lda_ = 0; -int LocGridOrbitals::numpt_ = 0; -int LocGridOrbitals::loc_numpt_ = 0; - -DotProductManager* LocGridOrbitals::dotProductManager_ +template +DotProductManager>* + LocGridOrbitals::dotProductManager_ = nullptr; -int LocGridOrbitals::data_wghosts_index_ = -1; - -Timer LocGridOrbitals::get_dm_tm_("LocGridOrbitals::get_dm"); -Timer LocGridOrbitals::matB_tm_("LocGridOrbitals::matB"); -Timer LocGridOrbitals::invBmat_tm_("LocGridOrbitals::invBmat"); -Timer LocGridOrbitals::overlap_tm_("LocGridOrbitals::overlap"); -Timer LocGridOrbitals::dot_product_tm_("LocGridOrbitals::dot_product"); -Timer LocGridOrbitals::addDot_tm_("LocGridOrbitals::addDot"); -Timer LocGridOrbitals::mask_tm_("LocGridOrbitals::mask"); -Timer LocGridOrbitals::prod_matrix_tm_("LocGridOrbitals::prod_matrix"); -Timer LocGridOrbitals::assign_tm_("LocGridOrbitals::assign"); -Timer LocGridOrbitals::normalize_tm_("LocGridOrbitals::normalize"); -Timer LocGridOrbitals::axpy_tm_("LocGridOrbitals::axpy"); - -LocGridOrbitals::LocGridOrbitals(std::string name, const pb::Grid& my_grid, - const short subdivx, const int numst, const short bc[3], - ProjectedMatricesInterface* proj_matrices, +template +short LocGridOrbitals::subdivx_ = 0; +template +int LocGridOrbitals::lda_ = 0; +template +int LocGridOrbitals::numpt_ = 0; +template +int LocGridOrbitals::loc_numpt_ = 0; + +template +int LocGridOrbitals::data_wghosts_index_ = -1; + +template +Timer LocGridOrbitals::get_dm_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::get_dm"); +template +Timer LocGridOrbitals::matB_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::matB"); +template +Timer LocGridOrbitals::invBmat_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::invBmat"); +template +Timer LocGridOrbitals::overlap_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::overlap"); +template +Timer LocGridOrbitals::dot_product_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::dot_product"); +template +Timer LocGridOrbitals::addDot_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::addDot"); +template +Timer LocGridOrbitals::mask_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::mask"); +template +Timer LocGridOrbitals::prod_matrix_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + + "::prod_matrix"); +template +Timer LocGridOrbitals::assign_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::assign"); +template +Timer LocGridOrbitals::normalize_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::normalize"); +template +Timer LocGridOrbitals::axpy_tm_( + "LocGridOrbitals" + std::to_string(8 * sizeof(ScalarType)) + "::axpy"); + +template +LocGridOrbitals::LocGridOrbitals(std::string name, + const pb::Grid& my_grid, const short subdivx, const int numst, + const short bc[3], ProjectedMatricesInterface* proj_matrices, std::shared_ptr lrs, MasksSet* masks, MasksSet* corrmasks, ClusterOrbitals* local_cluster, const bool setup_flag) : name_(std::move(name)), @@ -108,7 +140,8 @@ LocGridOrbitals::LocGridOrbitals(std::string name, const pb::Grid& my_grid, if (setup_flag) setup(lrs); } -LocGridOrbitals::~LocGridOrbitals() +template +LocGridOrbitals::~LocGridOrbitals() { assert(proj_matrices_ != nullptr); assert(pack_); @@ -120,8 +153,9 @@ LocGridOrbitals::~LocGridOrbitals() gidToStorage_ = nullptr; } -LocGridOrbitals::LocGridOrbitals( - const std::string& name, const LocGridOrbitals& A, const bool copy_data) +template +LocGridOrbitals::LocGridOrbitals(const std::string& name, + const LocGridOrbitals& A, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(A.proj_matrices_), @@ -142,9 +176,11 @@ LocGridOrbitals::LocGridOrbitals( setGids2Storage(); } -LocGridOrbitals::LocGridOrbitals(const std::string& name, - const LocGridOrbitals& A, ProjectedMatricesInterface* proj_matrices, - MasksSet* masks, MasksSet* corrmasks, const bool copy_data) +template +LocGridOrbitals::LocGridOrbitals(const std::string& name, + const LocGridOrbitals& A, + ProjectedMatricesInterface* proj_matrices, MasksSet* masks, + MasksSet* corrmasks, const bool copy_data) : Orbitals(A, copy_data), name_(name), proj_matrices_(proj_matrices), @@ -171,7 +207,9 @@ LocGridOrbitals::LocGridOrbitals(const std::string& name, proj_matrices_->setup(overlapping_gids_); } -void LocGridOrbitals::copySharedData(const LocGridOrbitals& A) +template +void LocGridOrbitals::copySharedData( + const LocGridOrbitals& A) { assert(A.gidToStorage_ != nullptr); assert(A.pack_); @@ -192,7 +230,9 @@ void LocGridOrbitals::copySharedData(const LocGridOrbitals& A) distributor_normalize_ = A.distributor_normalize_; } -void LocGridOrbitals::copyDataFrom(const LocGridOrbitals& src) +template +void LocGridOrbitals::copyDataFrom( + const LocGridOrbitals& src) { assert(proj_matrices_ != nullptr); @@ -201,7 +241,8 @@ void LocGridOrbitals::copyDataFrom(const LocGridOrbitals& src) setIterativeIndex(src); } -void LocGridOrbitals::setDotProduct(const short dot_type) +template +void LocGridOrbitals::setDotProduct(const short dot_type) { DotProductManagerFactory factory; @@ -210,7 +251,8 @@ void LocGridOrbitals::setDotProduct(const short dot_type) assert(dotProductManager_ != nullptr); } -void LocGridOrbitals::setGids2Storage() +template +void LocGridOrbitals::setGids2Storage() { assert(chromatic_number_ >= 0); assert(subdivx_ > 0); @@ -218,25 +260,26 @@ void LocGridOrbitals::setGids2Storage() if (gidToStorage_ != nullptr) gidToStorage_->clear(); else - gidToStorage_ = new std::vector>(); + gidToStorage_ = new std::vector>(); gidToStorage_->resize(subdivx_); for (short iloc = 0; iloc < subdivx_; iloc++) { - std::map& gid2st((*gidToStorage_)[iloc]); + std::map& gid2st((*gidToStorage_)[iloc]); for (int color = 0; color < chromatic_number_; color++) { const int gid = overlapping_gids_[iloc][color]; if (gid != -1) { gid2st.insert( - std::pair(gid, getPsi(color, iloc))); + std::pair(gid, getPsi(color, iloc))); } } } } // return pointer to const data -const ORBDTYPE* LocGridOrbitals::getGidStorage( +template +const ScalarType* LocGridOrbitals::getGidStorage( const int gid, const short iloc) const { assert(numst_ >= 0); @@ -245,15 +288,15 @@ const ORBDTYPE* LocGridOrbitals::getGidStorage( assert(gid < numst_); assert(iloc < (short)gidToStorage_->size()); - std::map::const_iterator p - = (*gidToStorage_)[iloc].find(gid); + auto p = (*gidToStorage_)[iloc].find(gid); if (p != (*gidToStorage_)[iloc].end()) return p->second; else return nullptr; } -void LocGridOrbitals::setup(MasksSet* masks, MasksSet* corrmasks, +template +void LocGridOrbitals::setup(MasksSet* masks, MasksSet* corrmasks, std::shared_ptr lrs) { assert(masks != nullptr); @@ -268,7 +311,9 @@ void LocGridOrbitals::setup(MasksSet* masks, MasksSet* corrmasks, setup(lrs); } -void LocGridOrbitals::setup(std::shared_ptr lrs) +template +void LocGridOrbitals::setup( + std::shared_ptr lrs) { Control& ct = *(Control::instance()); @@ -311,7 +356,8 @@ void LocGridOrbitals::setup(std::shared_ptr lrs) "LocGridOrbitals::setup() done...", (*MPIdata::sout)); } -void LocGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, +template +void LocGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, std::shared_ptr lrs) { // free some old data @@ -322,7 +368,9 @@ void LocGridOrbitals::reset(MasksSet* masks, MasksSet* corrmasks, setup(masks, corrmasks, lrs); } -void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) +template +void LocGridOrbitals::assign( + const LocGridOrbitals& orbitals) { assign_tm_.start(); @@ -344,9 +392,9 @@ void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 2) - (*MPIdata::sout) - << "LocGridOrbitals::Assign orbitals to different LR" - << std::endl; + (*MPIdata::sout) << "LocGridOrbitals::Assign orbitals " + "to different LR" + << std::endl; for (int color = 0; color < chromatic_number_; color++) { // assign state @@ -356,7 +404,7 @@ void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) if (gid != -1) { // find storage location in orbitals - const ORBDTYPE* const val + const ScalarType* const val = orbitals.getGidStorage(gid, iloc); // copy into new psi_ if (val != nullptr) @@ -371,7 +419,9 @@ void LocGridOrbitals::assign(const LocGridOrbitals& orbitals) assign_tm_.stop(); } -void LocGridOrbitals::axpy(const double alpha, const LocGridOrbitals& orbitals) +template +void LocGridOrbitals::axpy( + const double alpha, const LocGridOrbitals& orbitals) { axpy_tm_.start(); @@ -397,7 +447,7 @@ void LocGridOrbitals::axpy(const double alpha, const LocGridOrbitals& orbitals) if (gid != -1) { // find orbital storage in orbitals - const ORBDTYPE* const val + const ScalarType* const val = orbitals.getGidStorage(gid, iloc); // copy into new psi_ if (val != nullptr) @@ -414,7 +464,8 @@ void LocGridOrbitals::axpy(const double alpha, const LocGridOrbitals& orbitals) axpy_tm_.stop(); } -short LocGridOrbitals::checkOverlap( +template +short LocGridOrbitals::checkOverlap( const int st1, const int st2, const short level) { assert(masks4orbitals_); @@ -422,7 +473,8 @@ short LocGridOrbitals::checkOverlap( return masks4orbitals_->checkOverlap(st1, st2, level); } -void LocGridOrbitals::applyMask(const bool first_time) +template +void LocGridOrbitals::applyMask(const bool first_time) { assert(chromatic_number_ >= 0); assert(subdivx_ > 0); @@ -449,16 +501,17 @@ void LocGridOrbitals::applyMask(const bool first_time) mask_tm_.stop(); } -void LocGridOrbitals::applyCorrMask(const bool first_time) +template +void LocGridOrbitals::applyCorrMask(const bool first_time) { mask_tm_.start(); for (int color = 0; color < chromatic_number_; color++) { - const unsigned int size = block_vector_.get_allocated_size_storage(); - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(color), size, ipsi_host_view); for (short iloc = 0; iloc < subdivx_; iloc++) @@ -472,9 +525,9 @@ void LocGridOrbitals::applyCorrMask(const bool first_time) else block_vector_.set_zero(color, iloc); } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, size, psi(color)); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } incrementIterativeIndex(); @@ -482,8 +535,9 @@ void LocGridOrbitals::applyCorrMask(const bool first_time) mask_tm_.stop(); } -void LocGridOrbitals::app_mask( - const int color, ORBDTYPE* u, const short level) const +template +void LocGridOrbitals::app_mask( + const int color, ScalarType* u, const short level) const { mask_tm_.start(); assert(masks4orbitals_); @@ -501,13 +555,14 @@ void LocGridOrbitals::app_mask( (masks4orbitals_->getMask(gid)).apply(u, level, iloc); } else - memset(u + iloc * lnumpt, 0, lnumpt * sizeof(ORBDTYPE)); + memset(u + iloc * lnumpt, 0, lnumpt * sizeof(ScalarType)); } mask_tm_.stop(); } -void LocGridOrbitals::app_mask( - const int color, pb::GridFunc& gu, const short level) const +template +void LocGridOrbitals::app_mask( + const int color, pb::GridFunc& gu, const short level) const { mask_tm_.start(); @@ -532,23 +587,25 @@ void LocGridOrbitals::app_mask( { int offset = (shift + dim0 * iloc) * incx; assert(offset + lnumpt < static_cast(gu.grid().sizeg())); - ORBDTYPE* pu = gu.uu() + offset; - memset(pu, 0, lnumpt * sizeof(ORBDTYPE)); + ScalarType* pu = gu.uu() + offset; + memset(pu, 0, lnumpt * sizeof(ScalarType)); } } mask_tm_.stop(); } -void LocGridOrbitals::init2zero() +template +void LocGridOrbitals::init2zero() { for (int icolor = 0; icolor < chromatic_number_; icolor++) { - ORBDTYPE* ipsi = psi(icolor); - memset(ipsi, 0, numpt_ * sizeof(ORBDTYPE)); + ScalarType* ipsi = psi(icolor); + memset(ipsi, 0, numpt_ * sizeof(ScalarType)); } } -void LocGridOrbitals::initGauss( +template +void LocGridOrbitals::initGauss( const double rc, const std::shared_ptr lrs) { assert(chromatic_number_ >= 0); @@ -582,13 +639,13 @@ void LocGridOrbitals::initGauss( const double rmax = 6. * rc; for (int icolor = 0; icolor < chromatic_number_; icolor++) { - const unsigned int size = numpt_; - ORBDTYPE* ipsi_host_view = MemorySpace::Memory::allocate_host_view(size); - MemorySpace::Memory::copy_view_to_host( + MemorySpace::Memory::copy_view_to_host( psi(icolor), size, ipsi_host_view); - memset(ipsi_host_view, 0, numpt_ * sizeof(ORBDTYPE)); + memset(ipsi_host_view, 0, numpt_ * sizeof(ScalarType)); for (short iloc = 0; iloc < subdivx_; iloc++) { @@ -611,7 +668,7 @@ void LocGridOrbitals::initGauss( const double r = xc.minimage(center, ll, ct.bcWF); if (r < rmax) ipsi_host_view[ix * incx + iy * incy + iz] - = (ORBDTYPE)exp(-r * r * invrc2); + = (ScalarType)exp(-r * r * invrc2); else ipsi_host_view[ix * incx + iy * incy + iz] = 0.; @@ -623,15 +680,16 @@ void LocGridOrbitals::initGauss( } } } - MemorySpace::Memory::copy_view_to_dev( + MemorySpace::Memory::copy_view_to_dev( ipsi_host_view, size, psi(icolor)); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( ipsi_host_view); } resetIterativeIndex(); } -void LocGridOrbitals::initFourier() +template +void LocGridOrbitals::initFourier() { Control& ct = *(Control::instance()); if (onpe0 && ct.verbose > 2) @@ -667,8 +725,8 @@ void LocGridOrbitals::initFourier() const double kk[3] = { dk[0] * (double)kvector[0], dk[1] * (double)kvector[1], dk[2] * (double)kvector[2] }; - ORBDTYPE* ipsi = psi(icolor); - memset(ipsi, 0, numpt_ * sizeof(ORBDTYPE)); + ScalarType* ipsi = psi(icolor); + memset(ipsi, 0, numpt_ * sizeof(ScalarType)); for (short iloc = 0; iloc < subdivx_; iloc++) { @@ -688,8 +746,8 @@ void LocGridOrbitals::initFourier() { ipsi[ix * incx + iy * incy + iz] = 1. - - (ORBDTYPE)(cos(kk[0] * x) * cos(kk[1] * y) - * cos(kk[2] * z)); + - (ScalarType)(cos(kk[0] * x) * cos(kk[1] * y) + * cos(kk[2] * z)); z += hgrid[2]; } @@ -703,7 +761,9 @@ void LocGridOrbitals::initFourier() resetIterativeIndex(); } -int LocGridOrbitals::packStates(std::shared_ptr lrs) +template +int LocGridOrbitals::packStates( + std::shared_ptr lrs) { assert(lrs); @@ -730,9 +790,10 @@ int LocGridOrbitals::packStates(std::shared_ptr lrs) return pack_->chromatic_number(); } -void LocGridOrbitals::multiply_by_matrix( +template +void LocGridOrbitals::multiply_by_matrix( const dist_matrix::DistMatrix& dmatrix, - ORBDTYPE* const product, const int ldp) + ScalarType* const product, const int ldp) { ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); @@ -744,8 +805,9 @@ void LocGridOrbitals::multiply_by_matrix( multiply_by_matrix(0, chromatic_number_, work_matrix, product, ldp); } -void LocGridOrbitals::multiply_by_matrix(const int first_color, - const int ncolors, const DISTMATDTYPE* const matrix, ORBDTYPE* product, +template +void LocGridOrbitals::multiply_by_matrix(const int first_color, + const int ncolors, const DISTMATDTYPE* const matrix, ScalarType* product, const int ldp) const { prod_matrix_tm_.start(); @@ -754,7 +816,7 @@ void LocGridOrbitals::multiply_by_matrix(const int first_color, assert((first_color + ncolors) <= chromatic_number_); assert(subdivx_ > 0); - memset(product, 0, ldp * ncolors * sizeof(ORBDTYPE)); + memset(product, 0, ldp * ncolors * sizeof(ScalarType)); DISTMATDTYPE* matrix_local = new DISTMATDTYPE[chromatic_number_ * ncolors]; @@ -787,9 +849,10 @@ void LocGridOrbitals::multiply_by_matrix(const int first_color, prod_matrix_tm_.stop(); } -void LocGridOrbitals::multiplyByMatrix( +template +void LocGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const + ScalarType* product, const int ldp) const { prod_matrix_tm_.start(); @@ -826,7 +889,8 @@ void LocGridOrbitals::multiplyByMatrix( // Here the result is stored in one of the matrices used in the multiplication, // so a temporary arry is necessary -void LocGridOrbitals::multiplyByMatrix( +template +void LocGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix) { prod_matrix_tm_.start(); @@ -834,38 +898,38 @@ void LocGridOrbitals::multiplyByMatrix( if (chromatic_number_ > 0) { unsigned int const product_size = loc_numpt_ * chromatic_number_; - std::unique_ptr product( - MemorySpace::Memory::allocate( + std::unique_ptr product( + MemorySpace::Memory::allocate( product_size), - MemorySpace::Memory::free); + MemorySpace::Memory::free); // We want to to use: - // MemorySpace::Memory::set( + // MemorySpace::Memory::set( // product.get(), product_size, 0.); // but we get an error at linking time from nvptx-none-gcc #ifdef HAVE_MAGMA #ifdef HAVE_OPENMP_OFFLOAD - ORBDTYPE* tmp = product.get(); + ScalarType* tmp = product.get(); #pragma omp target teams distribute parallel for is_device_ptr(tmp) for (unsigned int i = 0; i < product_size; ++i) tmp[i] = 0; #else - ORBDTYPE* product_host - = MemorySpace::Memory::allocate( + ScalarType* product_host + = MemorySpace::Memory::allocate( product_size); - std::memset(product_host, 0, product_size * sizeof(ORBDTYPE)); + std::memset(product_host, 0, product_size * sizeof(ScalarType)); MemorySpace::copy_to_dev(product_host, product_size, product.get()); - MemorySpace::Memory::free(product_host); + MemorySpace::Memory::free(product_host); #endif #else - std::memset(product.get(), 0, product_size * sizeof(ORBDTYPE)); + std::memset(product.get(), 0, product_size * sizeof(ScalarType)); #endif - const size_t slnumpt = loc_numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = loc_numpt_ * sizeof(ScalarType); // loop over subdomains for (short iloc = 0; iloc < subdivx_; iloc++) { - ORBDTYPE* phi = getPsi(0, iloc); + ScalarType* phi = getPsi(0, iloc); const MATDTYPE* const mat = matrix.getSubMatrix(iloc); #ifdef HAVE_MAGMA int const mat_size = matrix.m() * matrix.n(); @@ -886,7 +950,7 @@ void LocGridOrbitals::multiplyByMatrix( chromatic_number_, 0., product.get(), loc_numpt_); for (int color = 0; color < chromatic_number_; color++) - MemorySpace::Memory::copy( + MemorySpace::Memory::copy( product.get() + color * loc_numpt_, slnumpt, phi + color); } } @@ -894,35 +958,40 @@ void LocGridOrbitals::multiplyByMatrix( prod_matrix_tm_.stop(); } -void LocGridOrbitals::multiplyByMatrix( +template +void LocGridOrbitals::multiplyByMatrix( const SquareLocalMatrices& matrix, - LocGridOrbitals& product) const + LocGridOrbitals& product) const { multiplyByMatrix(matrix, product.psi(0), product.lda_); } -void LocGridOrbitals::multiply_by_matrix(const int first_color, +template +void LocGridOrbitals::multiply_by_matrix(const int first_color, const int ncolors, const DISTMATDTYPE* const matrix, - LocGridOrbitals& product) const + LocGridOrbitals& product) const { multiply_by_matrix( first_color, ncolors, matrix, product.psi(0), product.lda_); } -void LocGridOrbitals::multiply_by_matrix( - const DISTMATDTYPE* const matrix, LocGridOrbitals& product) const +template +void LocGridOrbitals::multiply_by_matrix( + const DISTMATDTYPE* const matrix, + LocGridOrbitals& product) const { multiply_by_matrix( 0, chromatic_number_, matrix, product.psi(0), product.lda_); } -void LocGridOrbitals::multiply_by_matrix( +template +void LocGridOrbitals::multiply_by_matrix( const dist_matrix::DistMatrix& matrix) { prod_matrix_tm_.start(); - ORBDTYPE* product = new ORBDTYPE[loc_numpt_ * chromatic_number_]; - memset(product, 0, loc_numpt_ * chromatic_number_ * sizeof(ORBDTYPE)); + ScalarType* product = new ScalarType[loc_numpt_ * chromatic_number_]; + memset(product, 0, loc_numpt_ * chromatic_number_ * sizeof(ScalarType)); ReplicatedWorkSpace& wspace( ReplicatedWorkSpace::instance()); @@ -933,12 +1002,12 @@ void LocGridOrbitals::multiply_by_matrix( DISTMATDTYPE* matrix_local = new DISTMATDTYPE[chromatic_number_ * chromatic_number_]; - const size_t slnumpt = loc_numpt_ * sizeof(ORBDTYPE); + const size_t slnumpt = loc_numpt_ * sizeof(ScalarType); // loop over subdomains for (short iloc = 0; iloc < subdivx_; iloc++) { - ORBDTYPE* phi = getPsi(0, iloc); + ScalarType* phi = getPsi(0, iloc); matrixToLocalMatrix(iloc, work_matrix, matrix_local); @@ -957,7 +1026,8 @@ void LocGridOrbitals::multiply_by_matrix( prod_matrix_tm_.stop(); } -int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) +template +int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) { assert(proj_matrices_ != nullptr); @@ -983,9 +1053,9 @@ int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) ierr = proj_matrices_->readDM(h5f_file); if (ierr < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::read_hdf5(): error in reading DM" - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::read_hdf5(): " + "error in reading DM" + << std::endl; return ierr; } } @@ -993,7 +1063,9 @@ int LocGridOrbitals::read_hdf5(HDFrestart& h5f_file) return ierr; } -int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) +template +int LocGridOrbitals::write( + HDFrestart& h5f_file, const std::string& name) { Control& ct = *(Control::instance()); hid_t file_id = h5f_file.file_id(); @@ -1020,7 +1092,7 @@ int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) const short precision = ct.out_restart_info > 3 ? 2 : 1; if (onpe0 && ct.verbose > 2) - (*MPIdata::sout) << "Write LocGridOrbitals " << name + (*MPIdata::sout) << "Write LocGridOrbitals " << name << " with precision " << precision << std::endl; // loop over global (storage) functions for (int color = 0; color < chromatic_number_; color++) @@ -1123,9 +1195,9 @@ int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) herr_t status = H5Dclose(dset_id); if (status < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::write_func_hdf5:H5Dclose failed!!!" - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::write_func_" + "hdf5:H5Dclose failed!!!" + << std::endl; return -1; } } @@ -1155,7 +1227,8 @@ int LocGridOrbitals::write(HDFrestart& h5f_file, const std::string& name) return 0; } -int LocGridOrbitals::read_func_hdf5( +template +int LocGridOrbitals::read_func_hdf5( HDFrestart& h5f_file, const std::string& name) { assert(chromatic_number_ >= 0); @@ -1181,7 +1254,7 @@ int LocGridOrbitals::read_func_hdf5( hid_t memspace = H5P_DEFAULT; if (h5f_file.active()) memspace = h5f_file.createMemspace(); - ORBDTYPE* buffer = new ORBDTYPE[block[0] * block[1] * block[2]]; + ScalarType* buffer = new ScalarType[block[0] * block[1] * block[2]]; if (onpe0 && ct.verbose > 2) { @@ -1195,9 +1268,9 @@ int LocGridOrbitals::read_func_hdf5( } else { - (*MPIdata::sout) - << "LocGridOrbitals::read_func_hdf5(): Read wave functions " - << name << " from all tasks..." << std::endl; + (*MPIdata::sout) << "LocGridOrbitals::read_func_hdf5():" + " Read wave functions " + << name << " from all tasks..." << std::endl; } } @@ -1237,18 +1310,18 @@ int LocGridOrbitals::read_func_hdf5( hid_t dset_id = h5f_file.open_dset(key); if (dset_id < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::read_func_hdf5() --- cannot open " << key - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::read_func_hdf5() " + "--- cannot open " + << key << std::endl; return dset_id; } herr_t status = h5f_file.readData(buffer, memspace, dset_id, precision); if (status < 0) { - (*MPIdata::serr) - << "LocGridOrbitals::read_func_hdf5() --- H5Dread failed!!!" - << std::endl; + (*MPIdata::serr) << "LocGridOrbitals::read_func_hdf5() " + "--- H5Dread failed!!!" + << std::endl; return -1; } @@ -1367,13 +1440,15 @@ int LocGridOrbitals::read_func_hdf5( // initialize matrix chromatic_number_ by ncolor (for columns first_color to // first_color+ncolor) -void LocGridOrbitals::matrixToLocalMatrix(const short iloc, +template +void LocGridOrbitals::matrixToLocalMatrix(const short iloc, const DISTMATDTYPE* const matrix, DISTMATDTYPE* const lmatrix) const { matrixToLocalMatrix(iloc, matrix, lmatrix, 0, chromatic_number_); } -void LocGridOrbitals::matrixToLocalMatrix(const short iloc, +template +void LocGridOrbitals::matrixToLocalMatrix(const short iloc, const DISTMATDTYPE* const matrix, DISTMATDTYPE* const lmatrix, const int first_color, const int ncolor) const { @@ -1401,8 +1476,10 @@ void LocGridOrbitals::matrixToLocalMatrix(const short iloc, // compute the matrix // output: matB -void LocGridOrbitals::computeMatB( - const LocGridOrbitals& orbitals, const pb::Lap& LapOper) +template +void LocGridOrbitals::computeMatB( + const LocGridOrbitals& orbitals, + const pb::Lap& LapOper) { if (numst_ == 0) return; @@ -1419,10 +1496,10 @@ void LocGridOrbitals::computeMatB( SquareLocalMatrices ss( subdivx_, chromatic_number_); - ORBDTYPE* work = new ORBDTYPE[lda_ * bcolor]; - memset(work, 0, lda_ * bcolor * sizeof(ORBDTYPE)); + ScalarType* work = new ScalarType[lda_ * bcolor]; + memset(work, 0, lda_ * bcolor * sizeof(ScalarType)); - const ORBDTYPE* const orbitals_psi + const ScalarType* const orbitals_psi = (chromatic_number_ > 0) ? orbitals.block_vector_.vect(0) : nullptr; setDataWithGhosts(); @@ -1462,7 +1539,9 @@ void LocGridOrbitals::computeMatB( } // compute and its inverse -void LocGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) +template +void LocGridOrbitals::computeBAndInvB( + const pb::Lap& LapOper) { assert(proj_matrices_ != nullptr); @@ -1477,7 +1556,8 @@ void LocGridOrbitals::computeBAndInvB(const pb::Lap& LapOper) invBmat_tm_.stop(); } -void LocGridOrbitals::getLocalOverlap( +template +void LocGridOrbitals::getLocalOverlap( SquareLocalMatrices& ss) { assert(chromatic_number_ >= 0); @@ -1490,7 +1570,7 @@ void LocGridOrbitals::getLocalOverlap( #ifdef MGMOL_USE_MIXEDP getLocalOverlap(*this, ss); #else - const ORBDTYPE* const psi = block_vector_.vect(0); + const ScalarType* const psi = block_vector_.vect(0); for (short iloc = 0; iloc < subdivx_; iloc++) { @@ -1505,7 +1585,9 @@ void LocGridOrbitals::getLocalOverlap( } } -void LocGridOrbitals::getLocalOverlap(const LocGridOrbitals& orbitals, +template +void LocGridOrbitals::getLocalOverlap( + const LocGridOrbitals& orbitals, SquareLocalMatrices& ss) { assert(chromatic_number_ >= 0); @@ -1517,7 +1599,9 @@ void LocGridOrbitals::getLocalOverlap(const LocGridOrbitals& orbitals, } } -void LocGridOrbitals::computeLocalProduct(const LocGridOrbitals& orbitals, +template +void LocGridOrbitals::computeLocalProduct( + const LocGridOrbitals& orbitals, LocalMatrices& ss, const bool transpose) { // assert( orbitals.chromatic_number_>=0 ); @@ -1527,9 +1611,10 @@ void LocGridOrbitals::computeLocalProduct(const LocGridOrbitals& orbitals, computeLocalProduct(orbitals.psi(0), orbitals.lda_, ss, transpose); } -void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, - const int ld, LocalMatrices& ss, - const bool transpose) +template +void LocGridOrbitals::computeLocalProduct( + const ScalarType* const array, const int ld, + LocalMatrices& ss, const bool transpose) { assert(loc_numpt_ > 0); assert(loc_numpt_ <= ld); @@ -1538,39 +1623,37 @@ void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, assert(grid_.vel() > 0.); assert(subdivx_ > 0); - const ORBDTYPE* const a = transpose ? array : block_vector_.vect(0); - const ORBDTYPE* const b = transpose ? block_vector_.vect(0) : array; + const ScalarType* const a = transpose ? array : block_vector_.vect(0); + const ScalarType* const b = transpose ? block_vector_.vect(0) : array; const int lda = transpose ? ld : lda_; const int ldb = transpose ? lda_ : ld; unsigned int const a_size = numpt_ * ss.m(); - ORBDTYPE* a_host_view - = MemorySpace::Memory::allocate_host_view( - a_size); - MemorySpace::Memory::copy_view_to_host( - const_cast(a), a_size, a_host_view); + ScalarType* a_host_view = MemorySpace::Memory::allocate_host_view(a_size); + MemorySpace::Memory::copy_view_to_host( + const_cast(a), a_size, a_host_view); unsigned int const b_size = numpt_ * ss.n(); - ORBDTYPE* b_host_view - = MemorySpace::Memory::allocate_host_view( - b_size); - MemorySpace::Memory::copy_view_to_host( - const_cast(b), b_size, b_host_view); + ScalarType* b_host_view = MemorySpace::Memory::allocate_host_view(b_size); + MemorySpace::Memory::copy_view_to_host( + const_cast(b), b_size, b_host_view); #ifdef MGMOL_USE_MIXEDP // use temporary float data for matrix ss - LocalMatrices ssf(ss.nmat(), ss.m(), ss.n()); + LocalMatrices ssf(ss.nmat(), ss.m(), ss.n()); #else - LocalMatrices& ssf(ss); + LocalMatrices& ssf(ss); #endif for (short iloc = 0; iloc < subdivx_; iloc++) { ssf.gemm(iloc, loc_numpt_, a_host_view + iloc * loc_numpt_, lda, b_host_view + iloc * loc_numpt_, ldb); } - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( a_host_view); - MemorySpace::Memory::free_host_view( + MemorySpace::Memory::free_host_view( b_host_view); #ifdef MGMOL_USE_MIXEDP ss.copy(ssf); @@ -1579,8 +1662,9 @@ void LocGridOrbitals::computeLocalProduct(const ORBDTYPE* const array, ss.scal(grid_.vel()); } -void LocGridOrbitals::computeDiagonalElementsDotProduct( - const LocGridOrbitals& orbitals, std::vector& ss) +template +void LocGridOrbitals::computeDiagonalElementsDotProduct( + const LocGridOrbitals& orbitals, std::vector& ss) { assert(numst_ > 0); assert(grid_.vel() > 0.); @@ -1605,8 +1689,9 @@ void LocGridOrbitals::computeDiagonalElementsDotProduct( mmpi.allreduce(&tmp[0], &ss[0], numst_, MPI_SUM); } -void LocGridOrbitals::computeDiagonalElementsDotProductLocal( - const LocGridOrbitals& orbitals, std::vector& ss) +template +void LocGridOrbitals::computeDiagonalElementsDotProductLocal( + const LocGridOrbitals& orbitals, std::vector& ss) { assert(grid_.vel() > 0.); @@ -1653,13 +1738,16 @@ void LocGridOrbitals::computeDiagonalElementsDotProductLocal( } } -void LocGridOrbitals::computeGram( +template +void LocGridOrbitals::computeGram( dist_matrix::DistMatrix& gram_mat) { computeGram(*this, gram_mat); } -void LocGridOrbitals::computeGram(const LocGridOrbitals& orbitals, +template +void LocGridOrbitals::computeGram( + const LocGridOrbitals& orbitals, dist_matrix::DistMatrix& gram_mat) { SquareLocalMatrices ss( @@ -1675,7 +1763,8 @@ void LocGridOrbitals::computeGram(const LocGridOrbitals& orbitals, } // compute the lower-triangular part of the overlap matrix -void LocGridOrbitals::computeGram(const int verbosity) +template +void LocGridOrbitals::computeGram(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1704,7 +1793,8 @@ void LocGridOrbitals::computeGram(const int verbosity) overlap_tm_.stop(); } -void LocGridOrbitals::computeGramAndInvS(const int verbosity) +template +void LocGridOrbitals::computeGramAndInvS(const int verbosity) { assert(proj_matrices_ != nullptr); @@ -1714,21 +1804,26 @@ void LocGridOrbitals::computeGramAndInvS(const int verbosity) proj_matrices_->computeInvS(); } -void LocGridOrbitals::checkCond(const double tol, const bool flag_stop) +template +void LocGridOrbitals::checkCond( + const double tol, const bool flag_stop) { assert(proj_matrices_ != nullptr); proj_matrices_->checkCond(tol, flag_stop); } -double LocGridOrbitals::dotProduct(const LocGridOrbitals& orbitals) +template +double LocGridOrbitals::dotProduct( + const LocGridOrbitals& orbitals) { assert(dotProductManager_ != nullptr); return dotProductManager_->dotProduct(*this, orbitals); } -double LocGridOrbitals::dotProduct( - const LocGridOrbitals& orbitals, const short dot_type) +template +double LocGridOrbitals::dotProduct( + const LocGridOrbitals& orbitals, const short dot_type) { dot_product_tm_.start(); @@ -1749,7 +1844,9 @@ double LocGridOrbitals::dotProduct( return dot; } -void LocGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, +template +void LocGridOrbitals::orthonormalizeLoewdin( + const bool overlap_uptodate, SquareLocalMatrices* matrixTransform, const bool update_matrices) { @@ -1788,7 +1885,8 @@ void LocGridOrbitals::orthonormalizeLoewdin(const bool overlap_uptodate, if (matrixTransform == nullptr) delete localP; } -double LocGridOrbitals::norm() const +template +double LocGridOrbitals::norm() const { Control& ct = *(Control::instance()); @@ -1801,7 +1899,8 @@ double LocGridOrbitals::norm() const return norm; } -double LocGridOrbitals::normState(const int gid) const +template +double LocGridOrbitals::normState(const int gid) const { assert(gid >= 0); @@ -1833,7 +1932,9 @@ double LocGridOrbitals::normState(const int gid) const return grid_.vel() * norm; } -void LocGridOrbitals::orthonormalize2states(const int st1, const int st2) +template +void LocGridOrbitals::orthonormalize2states( + const int st1, const int st2) { assert(st1 >= 0); assert(st2 >= 0); @@ -1974,8 +2075,9 @@ void LocGridOrbitals::orthonormalize2states(const int st1, const int st2) #endif } -void LocGridOrbitals::multiplyByMatrix2states( - const int st1, const int st2, const double* mat, LocGridOrbitals& product) +template +void LocGridOrbitals::multiplyByMatrix2states(const int st1, + const int st2, const double* mat, LocGridOrbitals& product) { assert(st1 >= 0); assert(st2 >= 0); @@ -2022,7 +2124,8 @@ void LocGridOrbitals::multiplyByMatrix2states( } } -void LocGridOrbitals::computeDiagonalGram( +template +void LocGridOrbitals::computeDiagonalGram( VariableSizeMatrix& diagS) const { const double vel = grid_.vel(); @@ -2054,7 +2157,8 @@ void LocGridOrbitals::computeDiagonalGram( #endif } -void LocGridOrbitals::computeInvNorms2( +template +void LocGridOrbitals::computeInvNorms2( std::vector>& inv_norms2) const { const int initTabSize = 4096; @@ -2086,7 +2190,8 @@ void LocGridOrbitals::computeInvNorms2( } } -void LocGridOrbitals::normalize() +template +void LocGridOrbitals::normalize() { normalize_tm_.start(); @@ -2175,8 +2280,10 @@ void LocGridOrbitals::normalize() } // modify argument orbitals, by projecting out its component -// along LocGridOrbitals -void LocGridOrbitals::projectOut(LocGridOrbitals& orbitals, const double scale) +// along LocGridOrbitals +template +void LocGridOrbitals::projectOut( + LocGridOrbitals& orbitals, const double scale) { projectOut(orbitals.psi(0), lda_, scale); @@ -2191,8 +2298,9 @@ void LocGridOrbitals::projectOut(LocGridOrbitals& orbitals, const double scale) orbitals.incrementIterativeIndex(); } -void LocGridOrbitals::projectOut( - ORBDTYPE* const array, const int lda, const double scale) +template +void LocGridOrbitals::projectOut( + ScalarType* const array, const int lda, const double scale) { assert(lda > 1); assert(loc_numpt_ > 0); @@ -2213,14 +2321,14 @@ void LocGridOrbitals::projectOut( #endif proj_matrices_->applyInvS(pmatrix); - ORBDTYPE* tproduct = new ORBDTYPE[loc_numpt_ * chromatic_number_]; - memset(tproduct, 0, loc_numpt_ * chromatic_number_ * sizeof(ORBDTYPE)); + ScalarType* tproduct = new ScalarType[loc_numpt_ * chromatic_number_]; + memset(tproduct, 0, loc_numpt_ * chromatic_number_ * sizeof(ScalarType)); // loop over subdomains for (short iloc = 0; iloc < subdivx_; iloc++) { - ORBDTYPE* phi = getPsi(0, iloc); - ORBDTYPE* parray = array + iloc * loc_numpt_; + ScalarType* phi = getPsi(0, iloc); + ScalarType* parray = array + iloc * loc_numpt_; MATDTYPE* localMat_iloc = pmatrix.getRawPtr(iloc); @@ -2238,7 +2346,8 @@ void LocGridOrbitals::projectOut( delete[] tproduct; } -void LocGridOrbitals::initRand() +template +void LocGridOrbitals::initRand() { Control& ct = *(Control::instance()); @@ -2344,8 +2453,10 @@ void LocGridOrbitals::initRand() } // Compute nstates column of Psi^T*A*Psi starting at column 0 -void LocGridOrbitals::addDotWithNcol2Matrix( - LocGridOrbitals& Apsi, dist_matrix::DistMatrix& matrix) const +template +void LocGridOrbitals::addDotWithNcol2Matrix( + LocGridOrbitals& Apsi, + dist_matrix::DistMatrix& matrix) const { addDot_tm_.start(); @@ -2388,7 +2499,8 @@ void LocGridOrbitals::addDotWithNcol2Matrix( addDot_tm_.stop(); } -void LocGridOrbitals::computeGlobalIndexes( +template +void LocGridOrbitals::computeGlobalIndexes( std::shared_ptr lrs) { all_overlapping_gids_ = lrs->getOverlapGids(); @@ -2417,7 +2529,8 @@ void LocGridOrbitals::computeGlobalIndexes( } } -void LocGridOrbitals::printTimers(std::ostream& os) +template +void LocGridOrbitals::printTimers(std::ostream& os) { matB_tm_.print(os); invBmat_tm_.print(os); @@ -2432,7 +2545,9 @@ void LocGridOrbitals::printTimers(std::ostream& os) axpy_tm_.print(os); } -void LocGridOrbitals::initWF(const std::shared_ptr lrs) +template +void LocGridOrbitals::initWF( + const std::shared_ptr lrs) { Control& ct = *(Control::instance()); @@ -2466,10 +2581,10 @@ void LocGridOrbitals::initWF(const std::shared_ptr lrs) if (ct.globalColoring()) { // smooth out random functions - pb::Laph4M myoper(grid_); - pb::GridFunc gf_work( + pb::Laph4M myoper(grid_); + pb::GridFunc gf_work( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); - pb::GridFunc gf_psi( + pb::GridFunc gf_psi( grid_, ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); if (onpe0 && ct.verbose > 2) @@ -2513,17 +2628,4 @@ void LocGridOrbitals::initWF(const std::shared_ptr lrs) #endif } -template void LocGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); -template void LocGridOrbitals::setDataWithGhosts( - pb::GridFuncVector* data_wghosts); - -template void LocGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); -template void LocGridOrbitals::setPsi( - const pb::GridFunc& gf_work, const int ist); - -template void LocGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); -template void LocGridOrbitals::setPsi( - const pb::GridFuncVector& gf_work); +template class LocGridOrbitals; diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index 2e47e7d9..a19f42a6 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -24,7 +24,6 @@ #include "SaveData.h" #include "SinCosOps.h" #include "SquareLocalMatrices.h" -#include "global.h" #include "hdf5.h" #include @@ -32,15 +31,12 @@ #include #include -class Potentials; -template -class ProjectedMatrices; class ProjectedMatricesInterface; class LocalizationRegions; class MasksSet; -class LocGridOrbitals; class Masks4Orbitals; +template class LocGridOrbitals : public Orbitals { private: @@ -65,7 +61,7 @@ class LocGridOrbitals : public Orbitals static int numpt_; static int loc_numpt_; - static DotProductManager* dotProductManager_; + static DotProductManager>* dotProductManager_; static int data_wghosts_index_; @@ -80,7 +76,7 @@ class LocGridOrbitals : public Orbitals int chromatic_number_; // map gid -> function storage (for each subdomain) - std::vector>* gidToStorage_; + std::vector>* gidToStorage_; // pointers to objects owned outside class ProjectedMatricesInterface* proj_matrices_; @@ -89,7 +85,7 @@ class LocGridOrbitals : public Orbitals //////////////////////////////////////////////////////// // instance specific data //////////////////////////////////////////////////////// - BlockVector block_vector_; + BlockVector block_vector_; //////////////////////////////////////////////////////// // @@ -97,19 +93,19 @@ class LocGridOrbitals : public Orbitals // void copySharedData(const LocGridOrbitals& A); - const ORBDTYPE* getGidStorage(const int st, const short iloc) const; + const ScalarType* getGidStorage(const int st, const short iloc) const; int packStates(std::shared_ptr lrs); void setAssignedIndexes(); - void projectOut(ORBDTYPE* const, const int, const double scale = 1.); + void projectOut(ScalarType* const, const int, const double scale = 1.); void multiply_by_matrix(const int first_color, const int ncolors, const DISTMATDTYPE* const matrix, LocGridOrbitals& product) const; void multiply_by_matrix(const int, const int, const DISTMATDTYPE* const, - ORBDTYPE*, const int) const; + ScalarType*, const int) const; void multiply_by_matrix(const dist_matrix::DistMatrix& matrix, - ORBDTYPE* const product, const int ldp); + ScalarType* const product, const int ldp); void scal(const int i, const double alpha) { block_vector_.scal(i, alpha); } - virtual void assign(const int i, const ORBDTYPE* const v, const int n = 1) + virtual void assign(const int i, const ScalarType* const v, const int n = 1) { block_vector_.assign(i, v, n); } @@ -118,13 +114,13 @@ class LocGridOrbitals : public Orbitals LocGridOrbitals& operator=(const LocGridOrbitals& orbitals); LocGridOrbitals(); - void computeMatB(const LocGridOrbitals&, const pb::Lap&); + void computeMatB(const LocGridOrbitals&, const pb::Lap&); void matrixToLocalMatrix(const short, const DISTMATDTYPE* const, DISTMATDTYPE* const, const int, const int) const; void matrixToLocalMatrix( const short, const DISTMATDTYPE* const, DISTMATDTYPE* const) const; - void computeLocalProduct(const ORBDTYPE* const, const int, + void computeLocalProduct(const ScalarType* const, const int, LocalMatrices&, const bool transpose = false); @@ -135,12 +131,12 @@ class LocGridOrbitals : public Orbitals void initFourier(); void initRand(); - ORBDTYPE* psi(const int i) const { return block_vector_.vect(i); } + ScalarType* psi(const int i) const { return block_vector_.vect(i); } - void app_mask(const int, ORBDTYPE*, const short level) const; + void app_mask(const int, ScalarType*, const short level) const; void multiplyByMatrix( const SquareLocalMatrices& matrix, - ORBDTYPE* product, const int ldp) const; + ScalarType* product, const int ldp) const; void setup(MasksSet* masks, MasksSet* corrmasks, std::shared_ptr lrs); @@ -246,7 +242,7 @@ class LocGridOrbitals : public Orbitals block_vector_.setDataWithGhosts(data_wghosts); } - pb::GridFunc& getFuncWithGhosts(const int i) + pb::GridFunc& getFuncWithGhosts(const int i) { //(*MPIdata::sout)<<" data_wghosts_index_="<axpy(-2., tmp_orbitals_minus1); - new_orbitals->axpy(1., *orbitals_minus2_); + new_orbitals->axpy((ORBDTYPE)-2., tmp_orbitals_minus1); + new_orbitals->axpy((ORBDTYPE)1., *orbitals_minus2_); delete orbitals_minus2_; } @@ -95,7 +95,7 @@ void OrbitalsExtrapolationOrder3::extrapolate_orbitals( (*MPIdata::sout) << "Extrapolate orbitals using 2nd order " "scheme only for this step..." << std::endl; - new_orbitals->axpy(-1., tmp_orbitals_minus1); + new_orbitals->axpy((ORBDTYPE)-1., tmp_orbitals_minus1); } orbitals_minus2_ = orbitals_minus1_; @@ -122,5 +122,5 @@ void OrbitalsExtrapolationOrder3::extrapolate_orbitals( } } -template class OrbitalsExtrapolationOrder3; -template class OrbitalsExtrapolationOrder3; +template class OrbitalsExtrapolationOrder3>; +template class OrbitalsExtrapolationOrder3>; diff --git a/src/OrthoAndersonMix.cc b/src/OrthoAndersonMix.cc index 64fad2b3..8838cc8d 100644 --- a/src/OrthoAndersonMix.cc +++ b/src/OrthoAndersonMix.cc @@ -25,5 +25,5 @@ void OrthoAndersonMix::postprocessUpdate() } } -template class OrthoAndersonMix; -template class OrthoAndersonMix; +template class OrthoAndersonMix>; +template class OrthoAndersonMix>; diff --git a/src/PBEonGrid.cc b/src/PBEonGrid.cc index 2c0603e3..e2d98c39 100644 --- a/src/PBEonGrid.cc +++ b/src/PBEonGrid.cc @@ -133,5 +133,5 @@ double PBEonGrid::getExc() const return mygrid.vel() * pbe_->computeRhoDotExc(); } -template class PBEonGrid; -template class PBEonGrid; +template class PBEonGrid>; +template class PBEonGrid>; diff --git a/src/PBEonGridSpin.cc b/src/PBEonGridSpin.cc index 8fe58b90..aada0fae 100644 --- a/src/PBEonGridSpin.cc +++ b/src/PBEonGridSpin.cc @@ -185,5 +185,5 @@ double PBEonGridSpin::getExc() const return exc * mygrid.vel(); } -template class PBEonGridSpin; -template class PBEonGridSpin; +template class PBEonGridSpin>; +template class PBEonGridSpin>; diff --git a/src/PolakRibiereSolver.cc b/src/PolakRibiereSolver.cc index 3f46a00e..ca25b1cc 100644 --- a/src/PolakRibiereSolver.cc +++ b/src/PolakRibiereSolver.cc @@ -262,7 +262,7 @@ double PolakRibiereSolver::computeBeta( OrbitalsType& work_orbitals) const { work_orbitals.assign(*r_k_); - work_orbitals.axpy(-1., *r_km1_); + work_orbitals.axpy((ORBDTYPE)(-1.), *r_km1_); double beta = z_k_->dotProduct(work_orbitals, 2); @@ -453,7 +453,7 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, if (onpe0 && ct.verbose > 1) os_ << " PolakRibiereSolver: beta=" << beta << std::endl; p_k_->scal(beta); - p_k_->axpy(1., *z_k_); + p_k_->axpy((ORBDTYPE)1., *z_k_); if (beta > 0.1) p_k_->scal(1. / (1. + beta)); } @@ -462,7 +462,7 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, alpha_k = alpha_; // make new trial step - orbitals.axpy(alpha_k, *p_k_); + orbitals.axpy((ORBDTYPE)alpha_k, *p_k_); // save current "k" vectors into "km1" vectors if (with_preconditioner_) @@ -480,7 +480,7 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, << alpha_k << "..." << std::endl; // half step back - orbitals.axpy(-1. * alpha_k, *p_k_); + orbitals.axpy((ORBDTYPE)(-1. * alpha_k), *p_k_); // return DM to value before trial step dm_strategy_->reset(); @@ -577,5 +577,5 @@ int PolakRibiereSolver::solve(OrbitalsType& orbitals, return retval; } -template class PolakRibiereSolver; -template class PolakRibiereSolver; +template class PolakRibiereSolver>; +template class PolakRibiereSolver>; diff --git a/src/Rho.cc b/src/Rho.cc index bd89b6a2..7b104573 100644 --- a/src/Rho.cc +++ b/src/Rho.cc @@ -302,7 +302,7 @@ void Rho::computeRho( { proj_matrices.updateSubMatX(); - if (std::is_same::value) + if (std::is_same>::value) { SquareLocalMatrices& localX( (orbitals.projMatrices())->getLocalX()); @@ -569,33 +569,34 @@ void Rho::printTimers(std::ostream& os) compute_blas_tm_.print(os); } -template class Rho; -template class Rho; +template class Rho>; +template class Rho>; -template double Rho::dotWithRho( +template double Rho>::dotWithRho( const double* const func) const; -template double Rho::dotWithRho( +template double Rho>::dotWithRho( const double* const func) const; -template void -Rho::computeRho>( - ExtendedGridOrbitals&, ExtendedGridOrbitals&, - const dist_matrix::DistMatrix&, +template void Rho>::computeRho< + dist_matrix::DistMatrix>(ExtendedGridOrbitals&, + ExtendedGridOrbitals&, const dist_matrix::DistMatrix&, const dist_matrix::DistMatrix&, const dist_matrix::DistMatrix&, const dist_matrix::DistMatrix&); +template void Rho>::computeRho< + dist_matrix::DistMatrix>( + ExtendedGridOrbitals&, const dist_matrix::DistMatrix&); template void -Rho::computeRho>( - ExtendedGridOrbitals&, const dist_matrix::DistMatrix&); -template void Rho::computeRho>( - LocGridOrbitals&, const dist_matrix::DistMatrix&); +Rho>::computeRho>( + LocGridOrbitals&, const dist_matrix::DistMatrix&); #ifdef MGMOL_USE_MIXEDP -template double Rho::dotWithRho( +template double Rho>::dotWithRho( const float* const func) const; #endif -template void Rho::computeRho( - ExtendedGridOrbitals&, const ReplicatedMatrix&); -template void Rho::computeRho( - ExtendedGridOrbitals&, ExtendedGridOrbitals&, const ReplicatedMatrix&, - const ReplicatedMatrix&, const ReplicatedMatrix&, const ReplicatedMatrix&); -template void Rho::computeRho( - LocGridOrbitals&, const ReplicatedMatrix&); +template void Rho>::computeRho( + ExtendedGridOrbitals&, const ReplicatedMatrix&); +template void Rho>::computeRho( + ExtendedGridOrbitals&, ExtendedGridOrbitals&, + const ReplicatedMatrix&, const ReplicatedMatrix&, const ReplicatedMatrix&, + const ReplicatedMatrix&); +template void Rho>::computeRho( + LocGridOrbitals&, const ReplicatedMatrix&); diff --git a/src/SinCosOps.cc b/src/SinCosOps.cc index e1c3fdd6..cd3f7a8c 100644 --- a/src/SinCosOps.cc +++ b/src/SinCosOps.cc @@ -829,5 +829,5 @@ void SinCosOps::computeDiag(const T& orbitals, compute_tm_.stop(); } -template class SinCosOps; -template class SinCosOps; +template class SinCosOps>; +template class SinCosOps>; diff --git a/src/SpreadPenalty.cc b/src/SpreadPenalty.cc index d3979f5c..3e13f31e 100644 --- a/src/SpreadPenalty.cc +++ b/src/SpreadPenalty.cc @@ -317,5 +317,5 @@ double SpreadPenalty::evaluateEnergy(const T& phi) return alpha_ * total_energy; } -template class SpreadPenalty; -template class SpreadPenalty; +template class SpreadPenalty>; +template class SpreadPenalty>; diff --git a/src/SpreadPenaltyVolume.cc b/src/SpreadPenaltyVolume.cc index 334c3455..24ad725f 100644 --- a/src/SpreadPenaltyVolume.cc +++ b/src/SpreadPenaltyVolume.cc @@ -307,5 +307,5 @@ double SpreadPenaltyVolume::evaluateEnergy(const T& phi) return 0.; } -template class SpreadPenaltyVolume; -template class SpreadPenaltyVolume; +template class SpreadPenaltyVolume>; +template class SpreadPenaltyVolume>; diff --git a/src/SpreadsAndCenters.cc b/src/SpreadsAndCenters.cc index d06c54e7..cfe5335c 100644 --- a/src/SpreadsAndCenters.cc +++ b/src/SpreadsAndCenters.cc @@ -545,5 +545,5 @@ void SpreadsAndCenters::computeSinCosDiag( mat, orbitals.getAllOverlappingGids(), orbitals.getLocalGids()); } -template class SpreadsAndCenters; -template class SpreadsAndCenters; +template class SpreadsAndCenters>; +template class SpreadsAndCenters>; diff --git a/src/SubspaceProjector.cc b/src/SubspaceProjector.cc index fe412085..67702cb4 100644 --- a/src/SubspaceProjector.cc +++ b/src/SubspaceProjector.cc @@ -82,5 +82,5 @@ void SubspaceProjector::projectOut( orbitals.incrementIterativeIndex(); } -template class SubspaceProjector; -template class SubspaceProjector; +template class SubspaceProjector>; +template class SubspaceProjector>; diff --git a/src/computeHij.cc b/src/computeHij.cc index 2786d4c6..5abd2a2c 100644 --- a/src/computeHij.cc +++ b/src/computeHij.cc @@ -30,8 +30,9 @@ template <> template <> -void MGmol::computeHij(LocGridOrbitals& orbitals_i, - LocGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + LocGridOrbitals& orbitals_i, + LocGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi_i, const KBPsiMatrixSparse* const kbpsi_j, VariableSizeMatrix& mat, const bool consolidate) @@ -72,8 +73,9 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, } template <> -void MGmol::computeHij(LocGridOrbitals& orbitals_i, - LocGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + LocGridOrbitals& orbitals_i, + LocGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi, VariableSizeMatrix& mat, const bool consolidate) { @@ -147,8 +149,9 @@ void MGmol::computeHij_private(OrbitalsType& orbitals_i, template <> template <> -void MGmol::computeHij(LocGridOrbitals& orbitals_i, - LocGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + LocGridOrbitals& orbitals_i, + LocGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi, const KBPsiMatrixSparse* const kbpsi_j, dist_matrix::DistMatrix& hij, const bool consolidate) @@ -160,8 +163,9 @@ void MGmol::computeHij(LocGridOrbitals& orbitals_i, template <> template <> -void MGmol::computeHij(ExtendedGridOrbitals& orbitals_i, - ExtendedGridOrbitals& orbitals_j, const Ions& ions, +void MGmol>::computeHij( + ExtendedGridOrbitals& orbitals_i, + ExtendedGridOrbitals& orbitals_j, const Ions& ions, const KBPsiMatrixSparse* const kbpsi, const KBPsiMatrixSparse* const kbpsi_j, dist_matrix::DistMatrix& hij, const bool consolidate) @@ -385,5 +389,5 @@ void MGmol::getHpsiAndTheta(Ions& ions, OrbitalsType& phi, get_Hpsi_and_Hij_tm_.stop(); } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/lbfgsrlx.cc b/src/lbfgsrlx.cc index ce05c1a3..20326a94 100644 --- a/src/lbfgsrlx.cc +++ b/src/lbfgsrlx.cc @@ -138,7 +138,7 @@ void MGmol::lbfgsrlx(OrbitalsType** orbitals, Ions& ions) } } -template void MGmol::lbfgsrlx( - LocGridOrbitals** orbitals, Ions& ions); -template void MGmol::lbfgsrlx( - ExtendedGridOrbitals** orbitals, Ions& ions); +template void MGmol>::lbfgsrlx( + LocGridOrbitals** orbitals, Ions& ions); +template void MGmol>::lbfgsrlx( + ExtendedGridOrbitals** orbitals, Ions& ions); diff --git a/src/main.cc b/src/main.cc index 621b72de..7b69aeb3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -96,11 +96,13 @@ int main(int argc, char** argv) { MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - coords_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, coords_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - coords_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, coords_filename, lrs_filename, + constraints_filename); mgmol->setup(); diff --git a/src/md.cc b/src/md.cc index 027991c6..2e5fe0f9 100644 --- a/src/md.cc +++ b/src/md.cc @@ -727,5 +727,5 @@ void MGmol::loadRestartFile(const std::string filename) return; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/mlwf.cc b/src/mlwf.cc index 3efdfeb2..dc920134 100644 --- a/src/mlwf.cc +++ b/src/mlwf.cc @@ -339,5 +339,5 @@ int MGmol::get_NOLMO(NOLMOTransform& noot, OrbitalsType& orbitals, return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/quench.cc b/src/quench.cc index c4b45d2a..56305e59 100644 --- a/src/quench.cc +++ b/src/quench.cc @@ -52,8 +52,8 @@ Timer quench_evnl_tm("quench_evnl"); Timer updateCenters_tm("MGmol::updateCenters"); template <> -void MGmol::adaptLR( - const SpreadsAndCenters* /*spreadf*/, +void MGmol>::adaptLR( + const SpreadsAndCenters>* /*spreadf*/, const OrbitalsTransform* /*ot*/) { } @@ -397,7 +397,8 @@ void MGmol::disentangleOrbitals(OrbitalsType& orbitals, } template <> -void MGmol::applyAOMMprojection(LocGridOrbitals& orbitals) +void MGmol>::applyAOMMprojection( + LocGridOrbitals& orbitals) { aomm_.reset(new AOMMprojector(orbitals, lrs_)); aomm_->projectOut(orbitals); @@ -409,8 +410,9 @@ void MGmol::applyAOMMprojection(OrbitalsType&) } template <> -int MGmol::outerSolve(LocGridOrbitals& orbitals, - LocGridOrbitals& work_orbitals, Ions& ions, const int max_steps, +int MGmol>::outerSolve( + LocGridOrbitals& orbitals, + LocGridOrbitals& work_orbitals, Ions& ions, const int max_steps, const int iprint, double& last_eks) { int retval @@ -423,7 +425,7 @@ int MGmol::outerSolve(LocGridOrbitals& orbitals, case OuterSolverType::ABPG: case OuterSolverType::NLCG: { - DFTsolver solver(hamiltonian_.get(), + DFTsolver> solver(hamiltonian_.get(), proj_matrices_.get(), energy_.get(), electrostat_.get(), this, ions, rho_.get(), dm_strategy_.get(), os_); @@ -435,9 +437,10 @@ int MGmol::outerSolve(LocGridOrbitals& orbitals, case OuterSolverType::PolakRibiere: { - PolakRibiereSolver solver(hamiltonian_.get(), - proj_matrices_.get(), energy_.get(), electrostat_.get(), this, - ions, rho_.get(), dm_strategy_.get(), os_); + PolakRibiereSolver> solver( + hamiltonian_.get(), proj_matrices_.get(), energy_.get(), + electrostat_.get(), this, ions, rho_.get(), dm_strategy_.get(), + os_); retval = solver.solve( orbitals, work_orbitals, ions, max_steps, iprint, last_eks); @@ -667,5 +670,5 @@ int MGmol::quench(OrbitalsType& orbitals, Ions& ions, return retval; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/readInput.cc b/src/readInput.cc index bd6a7dd1..bcb823c4 100644 --- a/src/readInput.cc +++ b/src/readInput.cc @@ -201,5 +201,5 @@ int MGmol::readCoordinates( return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/restart.cc b/src/restart.cc index 532043af..b144c0ab 100644 --- a/src/restart.cc +++ b/src/restart.cc @@ -213,5 +213,5 @@ int MGmol::read_restart_data( return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/src/runfire.cc b/src/runfire.cc index 3239e1b1..544685a9 100644 --- a/src/runfire.cc +++ b/src/runfire.cc @@ -137,7 +137,7 @@ void MGmol::runfire(OrbitalsType** orbitals, Ions& ions) } } -template void MGmol::runfire( - LocGridOrbitals** orbitals, Ions& ions); -template void MGmol::runfire( - ExtendedGridOrbitals** orbitals, Ions& ions); +template void MGmol>::runfire( + LocGridOrbitals** orbitals, Ions& ions); +template void MGmol>::runfire( + ExtendedGridOrbitals** orbitals, Ions& ions); diff --git a/src/setup.cc b/src/setup.cc index 9b4c3d22..c7b67c60 100644 --- a/src/setup.cc +++ b/src/setup.cc @@ -194,5 +194,5 @@ int MGmol::setupConstraintsFromInput(const std::string filename) return 0; } -template class MGmol; -template class MGmol; +template class MGmol>; +template class MGmol>; diff --git a/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc b/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc index a5a086fe..72e5dc68 100644 --- a/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc +++ b/tests/DMandEnergyAndForces/testDMandEnergyAndForces.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { @@ -158,9 +160,9 @@ int main(int argc, char** argv) std::shared_ptr projmatrices = mgmol->getProjectedMatrices(); - ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), - ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, - nullptr); + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, + mymesh->subdivx(), ct.numst, ct.bcWF, projmatrices.get(), nullptr, + nullptr, nullptr, nullptr); const pb::PEenv& myPEenv = mymesh->peenv(); HDFrestart h5file("WF", myPEenv, ct.out_restart_file_type); diff --git a/tests/EnergyAndForces/testEnergyAndForces.cc b/tests/EnergyAndForces/testEnergyAndForces.cc index c8f7ab95..f8d1264b 100644 --- a/tests/EnergyAndForces/testEnergyAndForces.cc +++ b/tests/EnergyAndForces/testEnergyAndForces.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { diff --git a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc index 058b0a8e..776e7983 100644 --- a/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc +++ b/tests/RestartEnergyAndForces/testRestartEnergyAndForces.cc @@ -85,8 +85,9 @@ int main(int argc, char** argv) std::cout << "-------------------------" << std::endl; } - MGmolInterface* mgmol = new MGmol(global_comm, - *MPIdata::sout, input_filename, lrs_filename, constraints_filename); + MGmolInterface* mgmol = new MGmol>( + global_comm, *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { @@ -142,9 +143,9 @@ int main(int argc, char** argv) std::shared_ptr projmatrices = mgmol->getProjectedMatrices(); - ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), - ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, - nullptr); + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, + mymesh->subdivx(), ct.numst, ct.bcWF, projmatrices.get(), nullptr, + nullptr, nullptr, nullptr); // read numst_ wavefunction int nread = orbitals.read_func_hdf5(h5file, name); diff --git a/tests/RhoVhRestart/testRhoVhRestart.cc b/tests/RhoVhRestart/testRhoVhRestart.cc index ce40272d..b2cd8294 100644 --- a/tests/RhoVhRestart/testRhoVhRestart.cc +++ b/tests/RhoVhRestart/testRhoVhRestart.cc @@ -178,26 +178,27 @@ int main(int argc, char** argv) // Enter main scope { - MGmolInterface* mgmol = new MGmol(global_comm, - *MPIdata::sout, input_filename, lrs_filename, constraints_filename); + MGmolInterface* mgmol = new MGmol>( + global_comm, *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); mgmol->setup(); /* load a restart file */ - MGmol* mgmol_ext - = dynamic_cast*>(mgmol); + MGmol>* mgmol_ext + = dynamic_cast>*>(mgmol); mgmol_ext->loadRestartFile(ct.restart_file); if (MPIdata::onpe0) std::cout << "=============================" << std::endl; if (MPIdata::onpe0) std::cout << "testRhoRestart..." << std::endl; - status = testRhoRestart(mgmol); + status = testRhoRestart>(mgmol); if (status < 0) return status; if (MPIdata::onpe0) std::cout << "=============================" << std::endl; if (MPIdata::onpe0) std::cout << "testPotRestart..." << std::endl; - status = testPotRestart(mgmol); + status = testPotRestart>(mgmol); if (status < 0) return status; delete mgmol; diff --git a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc index f9039abe..ae07a3fa 100644 --- a/tests/WFEnergyAndForces/testWFEnergyAndForces.cc +++ b/tests/WFEnergyAndForces/testWFEnergyAndForces.cc @@ -87,11 +87,13 @@ int main(int argc, char** argv) MGmolInterface* mgmol; if (ct.isLocMode()) - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); else - mgmol = new MGmol(global_comm, *MPIdata::sout, - input_filename, lrs_filename, constraints_filename); + mgmol = new MGmol>(global_comm, + *MPIdata::sout, input_filename, lrs_filename, + constraints_filename); if (MPIdata::onpe0) { @@ -158,9 +160,9 @@ int main(int argc, char** argv) std::shared_ptr projmatrices = mgmol->getProjectedMatrices(); - ExtendedGridOrbitals orbitals("new_orbitals", mygrid, mymesh->subdivx(), - ct.numst, ct.bcWF, projmatrices.get(), nullptr, nullptr, nullptr, - nullptr); + ExtendedGridOrbitals orbitals("new_orbitals", mygrid, + mymesh->subdivx(), ct.numst, ct.bcWF, projmatrices.get(), nullptr, + nullptr, nullptr, nullptr); const pb::PEenv& myPEenv = mymesh->peenv(); HDFrestart h5file("WF", myPEenv, ct.out_restart_file_type); From 6f274c3c76c2519b05562fe2944a4ee677eca463 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 2 Dec 2025 10:36:19 -0500 Subject: [PATCH 92/99] Add test for MVP ReplicatedMatrix (#392) --- tests/CMakeLists.txt | 7 +++++++ tests/MVP/mvp.cfg | 3 ++- tests/MVPReplicated/mvp.cfg | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tests/MVPReplicated/mvp.cfg diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9fa70160..03dffc89 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -517,6 +517,13 @@ add_test(NAME testMVP ${CMAKE_CURRENT_SOURCE_DIR}/MVP/mvp.cfg ${CMAKE_CURRENT_SOURCE_DIR}/MVP/coords.in ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) +add_test(NAME testMVPReplicated + COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MVP/test.py + ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} + ${CMAKE_CURRENT_BINARY_DIR}/../src/mgmol-opt + ${CMAKE_CURRENT_SOURCE_DIR}/MVPReplicated/mvp.cfg + ${CMAKE_CURRENT_SOURCE_DIR}/MVP/coords.in + ${CMAKE_CURRENT_SOURCE_DIR}/../potentials) add_test(NAME testMVPmix COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/MVPmix/test.py ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 4 ${MPIEXEC_PREFLAGS} diff --git a/tests/MVP/mvp.cfg b/tests/MVP/mvp.cfg index 868e5703..28bca7e2 100644 --- a/tests/MVP/mvp.cfg +++ b/tests/MVP/mvp.cfg @@ -19,7 +19,7 @@ type=QUENCH [Quench] solver=PSD max_steps=300 -atol=1.e-7 +atol=2.e-7 ortho_freq=10 [Orbitals] nempty=10 @@ -27,6 +27,7 @@ initial_type=random temperature=300. [ProjectedMatrices] solver=exact +replicated=false [DensityMatrix] solver=MVP nb_inner_it=2 diff --git a/tests/MVPReplicated/mvp.cfg b/tests/MVPReplicated/mvp.cfg new file mode 100644 index 00000000..9978efa2 --- /dev/null +++ b/tests/MVPReplicated/mvp.cfg @@ -0,0 +1,35 @@ +verbosity=2 +xcFunctional=LDA +FDtype=4th +[Mesh] +nx=32 +ny=32 +nz=32 +[Domain] +ox=0. +oy=0. +oz=0. +lx=15.3 +ly=15.3 +lz=15.3 +[Potentials] +pseudopotential=pseudo.Al_LDA_FHI +[Run] +type=QUENCH +[Quench] +solver=PSD +max_steps=300 +atol=2.e-7 +ortho_freq=10 +[Orbitals] +nempty=10 +initial_type=random +temperature=300. +[ProjectedMatrices] +solver=exact +replicated=true +[DensityMatrix] +solver=MVP +nb_inner_it=2 +[Restart] +output_level=2 From 628caba9b5a33191e326367b32e49898826431d7 Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Tue, 2 Dec 2025 15:47:37 -0500 Subject: [PATCH 93/99] Some fixes in Hamiltonian and its usage in MVP (#393) --- src/Hamiltonian.cc | 40 ++++------------------------------------ src/LocGridOrbitals.h | 7 +++++++ src/MVPSolver.cc | 9 ++++++--- 3 files changed, 17 insertions(+), 39 deletions(-) diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index 77a27093..aed9dbad 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -222,6 +222,8 @@ void Hamiltonian>::addHlocal2matrix( LocGridOrbitals& phi1, LocGridOrbitals& phi2, ReplicatedMatrix& hij, const bool force) { + (void)phi1; + (void)phi2; (void)hij; applyLocal(phi2, force); @@ -284,39 +286,5 @@ void Hamiltonian>::addHlocal2matrix( mat.insertMatrixElements(ss, phi1.getOverlappingGids(), ct.numst); } -template Hamiltonian>::Hamiltonian(); -template Hamiltonian>::Hamiltonian(); - -template Hamiltonian>::~Hamiltonian(); -template Hamiltonian>::~Hamiltonian(); - -template void Hamiltonian>::setup( - pb::Grid const&, int); -template void Hamiltonian>::setup( - pb::Grid const&, int); - -template const LocGridOrbitals& -Hamiltonian>::applyLocal( - LocGridOrbitals&, const bool); -template const ExtendedGridOrbitals& -Hamiltonian>::applyLocal( - ExtendedGridOrbitals&, const bool); -template void Hamiltonian>::addHlocalij( - LocGridOrbitals&, LocGridOrbitals&, - ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian>::addHlocalij( - ExtendedGridOrbitals&, ExtendedGridOrbitals&, - ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian>::addHlocalij( - LocGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian>::addHlocalij( - ExtendedGridOrbitals&, ProjectedMatricesInterface* proj_matrices); -template void Hamiltonian>::addHlocal2matrix( - LocGridOrbitals&, LocGridOrbitals&, - VariableSizeMatrix& mat, const bool force); -template void Hamiltonian>::addHlocal2matrix( - LocGridOrbitals&, LocGridOrbitals&, - dist_matrix::DistMatrix& hij, const bool force); -template void Hamiltonian>::addHlocal2matrix( - LocGridOrbitals&, LocGridOrbitals&, ReplicatedMatrix&, - const bool force); +template class Hamiltonian>; +template class Hamiltonian>; diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index a19f42a6..a2cd525d 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -35,6 +35,7 @@ class ProjectedMatricesInterface; class LocalizationRegions; class MasksSet; class Masks4Orbitals; +class ReplicatedMatrix; template class LocGridOrbitals : public Orbitals @@ -355,6 +356,12 @@ class LocGridOrbitals : public Orbitals void addDotWithNcol2Matrix( LocGridOrbitals&, dist_matrix::DistMatrix&) const; + void addDotWithNcol2Matrix(LocGridOrbitals&, ReplicatedMatrix&) const + { + std::cerr << "LocGridOrbitals::addDotWithNcol2Matrix not implemented " + "for ReplicatedMatrix" + << std::endl; + } void scal(const double alpha) { diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 3ab04651..3be7fa10 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -211,6 +211,8 @@ int MVPSolver::solve(OrbitalsType& orbitals) kbpsi.computeHvnlMatrix(&kbpsi, ions_, h11_nl); + OrbitalsType hphi("MVP_hphi", orbitals); + for (int inner_it = 0; inner_it < n_inner_steps_; inner_it++) { if (onpe0 && ct.verbose > 1) @@ -239,7 +241,8 @@ int MVPSolver::solve(OrbitalsType& orbitals) // compute h11 for the current potential by adding local part to // nonlocal components MatrixType h11(h11_nl); - hamiltonian_->addHlocal2matrix(orbitals, orbitals, h11, false); + hamiltonian_->applyLocal(numst_, orbitals, hphi); + orbitals.addDotWithNcol2Matrix(hphi, h11); current_proj_mat->assignH(h11); current_proj_mat->setHB2H(); @@ -318,8 +321,8 @@ int MVPSolver::solve(OrbitalsType& orbitals) // update h11 h11 = h11_nl; - hamiltonian_->addHlocal2matrix( - orbitals, orbitals, h11, false); + hamiltonian_->applyLocal(numst_, orbitals, hphi); + orbitals.addDotWithNcol2Matrix(hphi, h11); proj_mat_work_->assignH(h11); proj_mat_work_->setHB2H(); From 31cb9611e47f850ac1b7799f257c94bbe7f013ce Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Thu, 4 Dec 2025 08:12:40 -0500 Subject: [PATCH 94/99] Avoid recompute complete H*Phi in MVP and Davidson (#394) * Use delta potential in Davidson and MVP --- src/BlockVector.cc | 22 ++++++++++++++++++++++ src/BlockVector.h | 8 ++++++++ src/DavidsonSolver.cc | 28 +++++++++++++++------------- src/ExtendedGridOrbitals.h | 6 ++++++ src/Hamiltonian.cc | 8 ++++++++ src/Hamiltonian.h | 5 +++++ src/LocGridOrbitals.h | 6 ++++++ src/MGmol.cc | 4 ++++ src/MVPSolver.cc | 15 +++++++++++---- src/Potentials.cc | 1 + src/Potentials.h | 2 ++ 11 files changed, 88 insertions(+), 17 deletions(-) diff --git a/src/BlockVector.cc b/src/BlockVector.cc index 21e2854c..a1923581 100644 --- a/src/BlockVector.cc +++ b/src/BlockVector.cc @@ -478,6 +478,27 @@ void BlockVector::axpy(const double alpha, LinearAlgebraUtils::MPaxpy( locnumel_, alpha, bv.vect_[ix] + shift, vect_[iy] + shift); } + +template +void BlockVector::applyDiagonalOp( + const std::vector& diag, + BlockVector& dst) const +{ + diagop_tm_.start(); + + const double* const dd = diag.data(); + + for (unsigned int j = 0; j < vect_.size(); j++) + { + const ScalarType* __restrict__ srcj = vect_[j]; + ScalarType* __restrict__ dstj = dst.vect_[j]; + for (int i = 0; i < numel_; i++) + dstj[i] = (ScalarType)(dd[i] * (double)srcj[i]); + } + + diagop_tm_.stop(); +} + template void BlockVector::hasnan(const int j) const { @@ -534,6 +555,7 @@ void BlockVector::printTimers(std::ostream& os) scal_tm_.print(os); opminus_tm_.print(os); copy_tm_.print(os); + diagop_tm_.print(os); } template diff --git a/src/BlockVector.h b/src/BlockVector.h index f424d78f..33016d62 100644 --- a/src/BlockVector.h +++ b/src/BlockVector.h @@ -34,6 +34,7 @@ class BlockVector static Timer scal_tm_; static Timer opminus_tm_; static Timer copy_tm_; + static Timer diagop_tm_; static short n_instances_; static short subdivx_; @@ -137,6 +138,9 @@ class BlockVector return vect_[i]; } + void applyDiagonalOp(const std::vector& diag, + BlockVector& dst) const; + ScalarType maxAbsValue() const; template @@ -331,4 +335,8 @@ Timer BlockVector::opminus_tm_( template Timer BlockVector::copy_tm_("BlockVector::copy"); + +template +Timer BlockVector::diagop_tm_( + "BlockVector::diagop"); #endif diff --git a/src/DavidsonSolver.cc b/src/DavidsonSolver.cc index 85f6fc9f..636de977 100644 --- a/src/DavidsonSolver.cc +++ b/src/DavidsonSolver.cc @@ -521,22 +521,29 @@ int DavidsonSolver::solve( kbpsi_2.computeHvnlMatrix(&kbpsi_2, ions_, h22nl); kbpsi_1.computeHvnlMatrix(&kbpsi_2, ions_, h12nl); + + h12 = h12nl; + h22 = h22nl; } else { - h11 = h11nl; - hamiltonian_->applyLocal(numst_, orbitals, hphi); + hamiltonian_->applyDeltaPot(orbitals, hphi); orbitals.addDotWithNcol2Matrix(hphi, h11); } - // compute H*P and store in hphi - hamiltonian_->applyLocal(numst_, work_orbitals, hphi); + if (inner_it == 0) + { + // compute H*P and store in hphi + hamiltonian_->applyLocal(numst_, work_orbitals, hphi); + } + else + { + hamiltonian_->applyDeltaPot(work_orbitals, hphi); + } // update h22, h12 and h21 - h12 = h12nl; orbitals.addDotWithNcol2Matrix(hphi, h12); - h22 = h22nl; work_orbitals.addDotWithNcol2Matrix(hphi, h22); h21.transpose(1., h12, 0.); @@ -609,16 +616,11 @@ int DavidsonSolver::solve( energy_->saveVofRho(); // update h11, h22, h12, and h21 - h11 = h11nl; - hamiltonian_->applyLocal(numst_, orbitals, hphi); + hamiltonian_->applyDeltaPot(orbitals, hphi); orbitals.addDotWithNcol2Matrix(hphi, h11); - hamiltonian_->applyLocal(numst_, work_orbitals, hphi); - - h22 = h22nl; + hamiltonian_->applyDeltaPot(work_orbitals, hphi); work_orbitals.addDotWithNcol2Matrix(hphi, h22); - - h12 = h12nl; orbitals.addDotWithNcol2Matrix(hphi, h12); h21.transpose(1., h12, 0.); diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 52ece157..c641459d 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -281,6 +281,12 @@ class ExtendedGridOrbitals : public Orbitals assert(numst_ < 10000); return numst_; } + void applyDiagonalOp( + const std::vector& v, ExtendedGridOrbitals& hphi) const + { + block_vector_.applyDiagonalOp(v, hphi.block_vector_); + } + short subdivx(void) const { return 1; } void printChromaticNumber(std::ostream& os) const { diff --git a/src/Hamiltonian.cc b/src/Hamiltonian.cc index aed9dbad..601fdc24 100644 --- a/src/Hamiltonian.cc +++ b/src/Hamiltonian.cc @@ -161,6 +161,14 @@ void Hamiltonian::applyLocal(const int ncolors, T& phi, T& hphi) apply_Hloc_tm_.stop(); } +template +void Hamiltonian::applyDeltaPot(const T& phi, T& hphi) +{ + const std::vector& dv(pot_->dv()); + + phi.applyDiagonalOp(dv, hphi); +} + // add to hij the elements // corresponding to the local part of the Hamiltonian template <> diff --git a/src/Hamiltonian.h b/src/Hamiltonian.h index 0c87ceda..7263dff6 100644 --- a/src/Hamiltonian.h +++ b/src/Hamiltonian.h @@ -42,6 +42,11 @@ class Hamiltonian const OrbitalsType& applyLocal(OrbitalsType& phi, const bool force = false); void applyLocal(const int nstates, OrbitalsType& phi, OrbitalsType& hphi); + /*! + * Apply potential difference dv to phi + */ + void applyDeltaPot(const OrbitalsType& phi, OrbitalsType& hphi); + template void addHlocal2matrix(OrbitalsType& orbitals1, OrbitalsType& orbitals2, MatrixType& mat, const bool force); diff --git a/src/LocGridOrbitals.h b/src/LocGridOrbitals.h index a2cd525d..ee49497c 100644 --- a/src/LocGridOrbitals.h +++ b/src/LocGridOrbitals.h @@ -363,6 +363,12 @@ class LocGridOrbitals : public Orbitals << std::endl; } + void applyDiagonalOp( + const std::vector& v, LocGridOrbitals& hphi) const + { + block_vector_.applyDiagonalOp(v, hphi.block_vector_); + } + void scal(const double alpha) { block_vector_.scal(alpha); diff --git a/src/MGmol.cc b/src/MGmol.cc index a6af740e..cf57a746 100644 --- a/src/MGmol.cc +++ b/src/MGmol.cc @@ -902,8 +902,12 @@ void MGmol::printTimers() proj_matrices_->printTimers(os_); ShortSightedInverse::printTimers(os_); if (std::is_same>::value) + { MVPSolver, dist_matrix::DistMatrix>::printTimers(os_); + MVPSolver, + ReplicatedMatrix>::printTimers(os_); + } VariableSizeMatrixInterface::printTimers(os_); DataDistribution::printTimers(os_); PackedCommunicationBuffer::printTimers(os_); diff --git a/src/MVPSolver.cc b/src/MVPSolver.cc index 3be7fa10..97bcc825 100644 --- a/src/MVPSolver.cc +++ b/src/MVPSolver.cc @@ -213,6 +213,8 @@ int MVPSolver::solve(OrbitalsType& orbitals) OrbitalsType hphi("MVP_hphi", orbitals); + MatrixType h11(h11_nl); + for (int inner_it = 0; inner_it < n_inner_steps_; inner_it++) { if (onpe0 && ct.verbose > 1) @@ -240,8 +242,14 @@ int MVPSolver::solve(OrbitalsType& orbitals) // compute h11 for the current potential by adding local part to // nonlocal components - MatrixType h11(h11_nl); - hamiltonian_->applyLocal(numst_, orbitals, hphi); + if (inner_it == 0) + { + hamiltonian_->applyLocal(numst_, orbitals, hphi); + } + else + { + hamiltonian_->applyDeltaPot(orbitals, hphi); + } orbitals.addDotWithNcol2Matrix(hphi, h11); current_proj_mat->assignH(h11); @@ -320,8 +328,7 @@ int MVPSolver::solve(OrbitalsType& orbitals) energy_->saveVofRho(); // update h11 - h11 = h11_nl; - hamiltonian_->applyLocal(numst_, orbitals, hphi); + hamiltonian_->applyDeltaPot(orbitals, hphi); orbitals.addDotWithNcol2Matrix(hphi, h11); proj_mat_work_->assignH(h11); diff --git a/src/Potentials.cc b/src/Potentials.cc index 4da9ab9a..2fd556f4 100644 --- a/src/Potentials.cc +++ b/src/Potentials.cc @@ -155,6 +155,7 @@ double Potentials::updateVtot(const std::vector>& rho) double minus = -1.; LinearAlgebraUtils::MPaxpy( size_, minus, &vtot_[0], &dv_[0]); + LinearAlgebraUtils::MPscal(size_, minus, &dv_[0]); evalNormDeltaVtotRho(rho); diff --git a/src/Potentials.h b/src/Potentials.h index c7062234..f5dc4636 100644 --- a/src/Potentials.h +++ b/src/Potentials.h @@ -155,6 +155,8 @@ class Potentials POTDTYPE* vtot() { return vtot_.data(); } RHODTYPE* rho_comp() { return rho_comp_.data(); } + const std::vector& dv() { return dv_; } + const std::vector& vnuc() const { return v_nuc_; } const std::vector& vh_rho() const { return vh_rho_; } From dfdcd734c93c9ba55e7639c4d13027c1235270cc Mon Sep 17 00:00:00 2001 From: Jean-Luc Fattebert Date: Mon, 8 Dec 2025 08:09:36 -0500 Subject: [PATCH 95/99] Bug fix in Poisson PCG Mehrstellen r.h.s. (#396) --- src/PCGSolver.cc | 11 +++++++++-- src/pb/Mgm.h | 7 ++----- tests/Chebyshev/test.py | 7 +++++-- tests/Cl2_ONCVPSP_LDA/test.py | 4 ++-- tests/Fatom/test.py | 2 +- tests/MVP/test.py | 5 +++-- tests/ShortSighted/test.py | 6 +++--- tests/SpinO2/test.py | 6 +++--- tests/SpinO2LDA/test.py | 6 +++--- tests/SpreadPenalty/test.py | 2 +- 10 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/PCGSolver.cc b/src/PCGSolver.cc index 7a5fd157..254b1a23 100644 --- a/src/PCGSolver.cc +++ b/src/PCGSolver.cc @@ -176,8 +176,15 @@ bool PCGSolver::solve( /* compute Ax */ oper_.apply(gf_phi, lhs); // set r = b - pb::GridFunc res(gf_rhs); - oper_.transform(res); + pb::GridFunc rhs(gf_rhs); + + // transform r.h.s. to account for dielectric model + oper_.transform(rhs); + + // apply Mehrstelllen r.h.s. if appropriate + pb::GridFunc res(finegrid, bc_[0], bc_[1], bc_[2]); + oper_.rhs(rhs, res); + // compute r = r - Ax res -= lhs; diff --git a/src/pb/Mgm.h b/src/pb/Mgm.h index 4b5ad68d..a821261a 100644 --- a/src/pb/Mgm.h +++ b/src/pb/Mgm.h @@ -35,12 +35,10 @@ bool Mgm(T1& A, T2& vh, const GridFunc& rho, const short cogr, // Compute r.h.s. from rho GridFunc res(rho); - A.transform(res); + A.transform(res); // to account for dielectric model GridFunc rhs(finegrid, bcx, bcy, bcz); - A.rhs(res, rhs); + A.rhs(res, rhs); // to account for possible Mehrstellen operator - // Hartree units - // work GridFunc GridFunc lhs(finegrid, bcx, bcy, bcz); short bcwork[3] = { bcx, bcy, bcz }; @@ -54,7 +52,6 @@ bool Mgm(T1& A, T2& vh, const GridFunc& rho, const short cogr, nb_sweeps = 0; for (short i = 0; i < max_sweeps; i++) { - A.apply(vh, lhs); // res=rhs-lhs; res.diff(rhs, lhs); diff --git a/tests/Chebyshev/test.py b/tests/Chebyshev/test.py index 5cb9a284..28718ee3 100644 --- a/tests/Chebyshev/test.py +++ b/tests/Chebyshev/test.py @@ -70,21 +70,24 @@ print("Chebyshev-MVP test FAILED for taking too many iterations") sys.exit(1) +energy_ref = -108.264614049136 +tol = 1.e-6 print("Check energy...") last_energy = eval(energies[-1]) print("Energy = {}".format(last_energy)) -if last_energy>-108.0868: +if abs(last_energy-energy_ref)>tol: print("Last energy = {}".format(last_energy)) sys.exit(1) tol = 1.e-5 +entropy_ref = 0.17653976 print("Check entropy...") for line in lines: if line.count(b'-TS'): words=line.split() entropy = eval(words[3]) print("Entropy = {}".format(entropy)) - entropy_diff = entropy+0.17819 + entropy_diff = entropy+entropy_ref if abs(entropy_diff)>tol: print("Check entropy test FAILED. Entropy difference = {}".format(abs(entropy_diff))) sys.exit(1) diff --git a/tests/Cl2_ONCVPSP_LDA/test.py b/tests/Cl2_ONCVPSP_LDA/test.py index e7ff345a..dcb9bff3 100755 --- a/tests/Cl2_ONCVPSP_LDA/test.py +++ b/tests/Cl2_ONCVPSP_LDA/test.py @@ -36,7 +36,7 @@ lines=output.split(b'\n') tol = 4.e-6 -Fz = 1.2e-3 +Fz = -7.33e-04 for line in lines: num_matches = line.count(b'%%') if num_matches: @@ -51,7 +51,7 @@ for i in range(5,7): force = eval(words[i]) if abs(force)>tol: - print("force = {}".format(force)) + print("Force larger than tol, force = {}".format(force)) sys.exit(1) #check value of force in z direction if abs(eval(words[7])-Fz)>2.e-5: diff --git a/tests/Fatom/test.py b/tests/Fatom/test.py index db960c3a..def0b3a9 100755 --- a/tests/Fatom/test.py +++ b/tests/Fatom/test.py @@ -65,7 +65,7 @@ print("ERROR Eigenvalue 0 = {}".format(eval(eigenvalues[0]))) sys.exit(1) for ii in range(3): - if abs(eval(eigenvalues[1+ii])+0.409)>tole: + if abs(eval(eigenvalues[1+ii])+0.410)>tole: print("ERROR Eigenvalue {} = {}".format(1+ii,eval(eigenvalues[1+ii]))) sys.exit(1) sys.exit(0) diff --git a/tests/MVP/test.py b/tests/MVP/test.py index 57d9522e..8a55cc5b 100644 --- a/tests/MVP/test.py +++ b/tests/MVP/test.py @@ -87,12 +87,13 @@ print(eigenvalues) tol = 1.e-4 -eigenvalue0 = -0.208 +eigenvalue0 = -0.210 if abs(eigenvalues[0]-eigenvalue0)>tol: print("Expected eigenvalue 0 to be {}".format(eigenvalue0)) sys.exit(1) -eigenvalue50 = 0.208 +eigenvalue50 = 0.205 if abs(eigenvalues[50]-eigenvalue50)>tol: + print("Eeigenvalue 50 = {}".format(eigenvalues[50])) print("Expected eigenvalue 50 to be {}".format(eigenvalue50)) sys.exit(1) diff --git a/tests/ShortSighted/test.py b/tests/ShortSighted/test.py index 68a617aa..25f7290b 100755 --- a/tests/ShortSighted/test.py +++ b/tests/ShortSighted/test.py @@ -67,8 +67,8 @@ print("Check energies...") tol = 1.e-3 count = 0 -energy1_ref = -83.904 -energy4_ref = -83.871 +energy1_ref = -83.929 +energy4_ref = -83.896 for line in lines: num_matches1 = line.count(b'IONIC') @@ -93,7 +93,7 @@ tol = 1.e-1 count = 0 temperature1_ref = 948.253 -temperature4_ref = 916.029 +temperature4_ref = 916.336 for line in lines: num_matches1 = line.count(b'Kinetic') diff --git a/tests/SpinO2/test.py b/tests/SpinO2/test.py index 5d5e823c..8d08bfd9 100755 --- a/tests/SpinO2/test.py +++ b/tests/SpinO2/test.py @@ -43,15 +43,15 @@ words=line.split() energy = eval(words[5][:-1]) -ref_energy = -31.805 +ref_energy = -31.808 print("energy = {}".format(energy)) if abs(ref_energy-energy) > 1.e-3: - print("Incorrect energy!") + print("Expected energy = {}".format(ref_energy)) sys.exit(1) #make sure forces are below tolerance tol = 6.e-4 -Fz = -1.06e-2 +Fz = -0.96e-2 for line in lines: #find output lines with forces if line.count(b'##'): diff --git a/tests/SpinO2LDA/test.py b/tests/SpinO2LDA/test.py index 77557ff0..26010c0a 100755 --- a/tests/SpinO2LDA/test.py +++ b/tests/SpinO2LDA/test.py @@ -43,15 +43,15 @@ words=line.split() energy = eval(words[5][:-1]) -ref_energy = -31.6105 +ref_energy = -31.6130 print("energy = {}".format(energy)) if abs(ref_energy-energy) > 1.e-3: - print("Incorrect energy!") + print("Incorrect energy, expected {}".format(ref_energy)) sys.exit(1) #make sure forces are below tolerance tol = 4.e-4 -Fz = 1.e-2 +Fz = 1.06e-2 for line in lines: #find output lines with forces if line.count(b'##'): diff --git a/tests/SpreadPenalty/test.py b/tests/SpreadPenalty/test.py index ea70f074..059f3320 100755 --- a/tests/SpreadPenalty/test.py +++ b/tests/SpreadPenalty/test.py @@ -79,7 +79,7 @@ #we tolerate an energy difference since the initial wave functions #are very delocalized and the spread penalty remains active all along -energy_ref = -17.16448 +energy_ref = -17.1660 tol = 5.e-4 if abs(energy-energy_ref) > tol: print("Test failed: last energy value incorrect!") From 69ee55aee2eaa53073b582ec3e7f4b382f4ef7c5 Mon Sep 17 00:00:00 2001 From: Daniel Osei-Kuffuor Date: Mon, 8 Dec 2025 11:15:05 -0800 Subject: [PATCH 96/99] Add Convergence tolerance option for Poisson problem and some minor cleanup. (#395) * Add input option for PCG convergence tolerance. * Change default options for PCG * Renamed CG to PCG in input files. * Updated build script for LC - build.pel --- examples/AOMM/Si216.cfg | 2 +- examples/C200H272_1.05gcc/mgmol_md.cfg | 2 +- examples/C200H272_1.05gcc/mgmol_opt.cfg | 2 +- examples/C200H272_1.05gcc/mgmol_quench.cfg | 2 +- examples/Cu32/mvp.cfg | 2 +- examples/D144localPotentials/mgmol_md.cfg | 2 +- examples/D144localPotentials/mgmol_quench.cfg | 2 +- examples/Li128/mgmol_quench_hmvp.cfg | 2 +- examples/Li128/mgmol_quench_mvp.cfg | 2 +- examples/Li16/mvp.cfg | 2 +- examples/Li2GTH/davidson.cfg | 2 +- examples/ShortSighted/mgmol_md.cfg | 2 +- examples/ShortSighted/mgmol_quench.cfg | 2 +- examples/Water4x4y4z/md.cfg | 2 +- examples/Water4x4y4z/quench.cfg | 2 +- scripts/build_pel.sh | 26 +++++++++++-------- scripts/modules.pel | 14 +++++----- src/Control.cc | 12 ++++++--- src/Control.h | 6 ++++- src/Electrostatic.cc | 5 ++-- src/read_config.cc | 12 +++++---- tests/Chebyshev/cheb.cfg | 2 +- tests/Davidson/davidson.cfg | 2 +- tests/DavidsonReplicated/davidson.cfg | 2 +- tests/ProjectedMatrices/quenchExact.cfg | 2 +- .../ProjectedMatrices/quenchShortSighted.cfg | 2 +- tests/RhoVhRestart/md.cfg | 5 ++++ tests/RhoVhRestart/mgmol.cfg | 5 ++++ tests/RhoVhRestart/restart.cfg | 5 ++++ tests/ShortSighted/md.cfg | 2 +- tests/ShortSighted/quench.cfg | 2 +- 31 files changed, 83 insertions(+), 51 deletions(-) diff --git a/examples/AOMM/Si216.cfg b/examples/AOMM/Si216.cfg index a8c8e2c6..9756a4da 100644 --- a/examples/AOMM/Si216.cfg +++ b/examples/AOMM/Si216.cfg @@ -15,7 +15,7 @@ lz=30.78 [Potentials] pseudopotential=pseudo.Si [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/C200H272_1.05gcc/mgmol_md.cfg b/examples/C200H272_1.05gcc/mgmol_md.cfg index 393f83ca..b11e9109 100644 --- a/examples/C200H272_1.05gcc/mgmol_md.cfg +++ b/examples/C200H272_1.05gcc/mgmol_md.cfg @@ -16,7 +16,7 @@ lz=30.568 pseudopotential=pseudo.C_pbe pseudopotential=pseudo.H_pbe [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/C200H272_1.05gcc/mgmol_opt.cfg b/examples/C200H272_1.05gcc/mgmol_opt.cfg index a1fe927f..96420be8 100644 --- a/examples/C200H272_1.05gcc/mgmol_opt.cfg +++ b/examples/C200H272_1.05gcc/mgmol_opt.cfg @@ -16,7 +16,7 @@ lz=30.568 pseudopotential=pseudo.C_pbe pseudopotential=pseudo.H_pbe [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/C200H272_1.05gcc/mgmol_quench.cfg b/examples/C200H272_1.05gcc/mgmol_quench.cfg index ef3781ff..38d38907 100644 --- a/examples/C200H272_1.05gcc/mgmol_quench.cfg +++ b/examples/C200H272_1.05gcc/mgmol_quench.cfg @@ -16,7 +16,7 @@ lz=30.568 pseudopotential=pseudo.C_pbe pseudopotential=pseudo.H_pbe [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Cu32/mvp.cfg b/examples/Cu32/mvp.cfg index 69085aef..c4c29d61 100644 --- a/examples/Cu32/mvp.cfg +++ b/examples/Cu32/mvp.cfg @@ -15,7 +15,7 @@ lz=13.6 [Potentials] pseudopotential=pseudo.Cu_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/D144localPotentials/mgmol_md.cfg b/examples/D144localPotentials/mgmol_md.cfg index c9a509a8..6e4e9359 100644 --- a/examples/D144localPotentials/mgmol_md.cfg +++ b/examples/D144localPotentials/mgmol_md.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.H [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/D144localPotentials/mgmol_quench.cfg b/examples/D144localPotentials/mgmol_quench.cfg index 896acfa1..a090ba64 100644 --- a/examples/D144localPotentials/mgmol_quench.cfg +++ b/examples/D144localPotentials/mgmol_quench.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.H [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li128/mgmol_quench_hmvp.cfg b/examples/Li128/mgmol_quench_hmvp.cfg index 9ce53512..7a1198e8 100644 --- a/examples/Li128/mgmol_quench_hmvp.cfg +++ b/examples/Li128/mgmol_quench_hmvp.cfg @@ -15,7 +15,7 @@ lz= 26.52 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li128/mgmol_quench_mvp.cfg b/examples/Li128/mgmol_quench_mvp.cfg index d2d3c1dd..41ed3731 100644 --- a/examples/Li128/mgmol_quench_mvp.cfg +++ b/examples/Li128/mgmol_quench_mvp.cfg @@ -15,7 +15,7 @@ lz= 26.52 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li16/mvp.cfg b/examples/Li16/mvp.cfg index a2fe2f97..efea30df 100644 --- a/examples/Li16/mvp.cfg +++ b/examples/Li16/mvp.cfg @@ -15,7 +15,7 @@ lz=13.26 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/examples/Li2GTH/davidson.cfg b/examples/Li2GTH/davidson.cfg index d211b812..929ef604 100644 --- a/examples/Li2GTH/davidson.cfg +++ b/examples/Li2GTH/davidson.cfg @@ -15,7 +15,7 @@ lz=18. [Potentials] pseudopotential=pseudo.Li_GTH_PBE [Poisson] -solver=CG +solver=PCG bcx=0 bcy=0 bcz=0 diff --git a/examples/ShortSighted/mgmol_md.cfg b/examples/ShortSighted/mgmol_md.cfg index 18bc419b..7c485c80 100644 --- a/examples/ShortSighted/mgmol_md.cfg +++ b/examples/ShortSighted/mgmol_md.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_tm_pbe [Poisson] -solver=CG +solver=PCG max_steps_initial=20 max_steps=20 bcx=periodic diff --git a/examples/ShortSighted/mgmol_quench.cfg b/examples/ShortSighted/mgmol_quench.cfg index 17499e60..a89e6b47 100644 --- a/examples/ShortSighted/mgmol_quench.cfg +++ b/examples/ShortSighted/mgmol_quench.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_tm_pbe [Poisson] -solver=CG +solver=PCG max_steps_initial=20 max_steps=20 bcx=periodic diff --git a/examples/Water4x4y4z/md.cfg b/examples/Water4x4y4z/md.cfg index bdf99eac..749da19f 100644 --- a/examples/Water4x4y4z/md.cfg +++ b/examples/Water4x4y4z/md.cfg @@ -16,7 +16,7 @@ lz= 93.84 pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/examples/Water4x4y4z/quench.cfg b/examples/Water4x4y4z/quench.cfg index 7f58a871..d50f860c 100644 --- a/examples/Water4x4y4z/quench.cfg +++ b/examples/Water4x4y4z/quench.cfg @@ -16,7 +16,7 @@ lz= 93.84 pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/scripts/build_pel.sh b/scripts/build_pel.sh index 0264cafb..bb7d72dd 100755 --- a/scripts/build_pel.sh +++ b/scripts/build_pel.sh @@ -1,31 +1,31 @@ ##! /bin/csh -f ## An example script to build on LLNL Peloton systems. ## For now, this script assumes intel/ mkl libraries are being used. - + # load some modules source scripts/modules.pel - + # set some environment variables. Set them explicitly or use loaded module path (preferred) # Here we use an explicit path for scalapack to be consistent with the path for the blas libraries and avoid # benign cmake warnings -setenv SCALAPACK_ROOT /usr/tce/packages/mkl/mkl-2019.0/ +setenv SCALAPACK_ROOT ${MKLROOT} setenv HDF5_ROOT ${HDF5} - + # We need to define the cmake blas vendor option here to find the right one. set BLAS_VENDOR = Intel10_64lp - + # manually set the location of BLACS libraries for scalapack -set BLACS_LIB = ${SCALAPACK_ROOT}/lib - +set BLACS_LIB = ${SCALAPACK_ROOT}/lib/intel64 + set MGMOL_ROOT = `pwd` - + set INSTALL_DIR = ${MGMOL_ROOT}/mgmol_install mkdir -p ${INSTALL_DIR} - + set BUILD_DIR = ${MGMOL_ROOT}/mgmol_build mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} - + # call cmake cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DCMAKE_CXX_COMPILER=mpic++ \ @@ -33,8 +33,12 @@ cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ -DMPIEXEC_NUMPROC_FLAG="-n" \ -DBLA_VENDOR=${BLAS_VENDOR} \ -DSCALAPACK_BLACS_LIBRARY=${BLACS_LIB}/libmkl_blacs_intelmpi_lp64.so \ + -DCMAKE_BUILD_TYPE=Release \ + -DMPIEXEC_EXECUTABLE=/usr/bin/srun \ .. - + # call make install make -j make install + + diff --git a/scripts/modules.pel b/scripts/modules.pel index 5e4efa85..bdad8018 100644 --- a/scripts/modules.pel +++ b/scripts/modules.pel @@ -1,7 +1,9 @@ -module load intel/19.0.4 -module load hdf5-parallel/1.10.2 -module load boost/1.69.0 -module load mkl/2019.0 +##TOSS4 options +module load intel/2022.1.0 +module load hdf5-parallel/1.14.0 +module load mkl module load cmake/3.14.5 -module load python/3.7.2 - +module load python +module load boost +## manually add boost path +setenv LD_LIBRARY_PATH ${BOOST_ROOT}/lib:$LD_LIBRARY_PATH diff --git a/src/Control.cc b/src/Control.cc index a76abb03..428e7a4f 100644 --- a/src/Control.cc +++ b/src/Control.cc @@ -41,9 +41,10 @@ Control::Control() lrs_extrapolation = 1; // default lrs_compute = 0; system_charge_ = 0.; - poisson_pc_nu1 = 2; - poisson_pc_nu2 = 2; + poisson_pc_nu1 = 1; + poisson_pc_nu2 = 1; poisson_pc_nlev = 10; + poisson_conv_tol = 1.e-8; poisson_pc_data_ = 32; coloring_algo_ = 0; maxDistanceAtomicInfo_ = 8.; @@ -444,7 +445,7 @@ void Control::sync(void) memset(&int_buffer[0], 0, size_int_buffer * sizeof(int)); } - const short size_float_buffer = 44; + const short size_float_buffer = 45; float* float_buffer = new float[size_float_buffer]; if (mype_ == 0) { @@ -491,6 +492,7 @@ void Control::sync(void) float_buffer[41] = pair_mlwf_distance_threshold_; float_buffer[42] = e0_; float_buffer[43] = dm_tol; + float_buffer[44] = poisson_conv_tol; } else { @@ -689,6 +691,7 @@ void Control::sync(void) pair_mlwf_distance_threshold_ = float_buffer[41]; e0_ = float_buffer[42]; dm_tol = float_buffer[43]; + poisson_conv_tol = float_buffer[44]; max_electronic_steps_loose_ = max_electronic_steps; delete[] short_buffer; @@ -1402,7 +1405,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) if (str.compare("periodic") == 0) bcWF[2] = 1; str = vm["Poisson.solver"].as(); - if (str.compare("CG") == 0) diel_flag_ = 10; + if (str.compare("CG") == 0 || str.compare("PCG") == 0) diel_flag_ = 10; if (str.compare("MG") == 0) diel_flag_ = 0; str = vm["Poisson.diel"].as(); @@ -1421,6 +1424,7 @@ void Control::setOptions(const boost::program_options::variables_map& vm) drho0_ = vm["Poisson.beta"].as(); e0_ = vm["Poisson.e0"].as(); poisson_pc_data_ = vm["Poisson.precond_precision"].as(); + poisson_conv_tol = vm["Poisson.conv_tol"].as(); str = vm["ProjectedMatrices.solver"].as(); if (str.compare("short_sighted") == 0) short_sighted = 1; diff --git a/src/Control.h b/src/Control.h index c0b28d31..115828e9 100644 --- a/src/Control.h +++ b/src/Control.h @@ -368,7 +368,7 @@ class Control poisson_pc_nlev = nlev; } - // 10 or larger means CG, otherwise MG V-cycles + // 10 or larger means PCG, otherwise MG V-cycles bool MGPoissonSolver() { return (diel_flag_ / 10 == 0); } bool LangevinThermostat() { return (thermostat_type == 1); } @@ -399,6 +399,7 @@ class Control // dielectric model for solvation short diel; + // Parameters for MG solver/ preconditioner for Poisson problem short poisson_pc_nu1; short poisson_pc_nu2; short poisson_pc_nlev; @@ -472,6 +473,9 @@ class Control // Number of v-cycles for hartree solution short vh_its; + + // convergence tolerance for solving Poisson problem using PCG. + float poisson_conv_tol; // Max number of changes of potential short max_changes_pot; diff --git a/src/Electrostatic.cc b/src/Electrostatic.cc index d912f5f3..cbefab6f 100644 --- a/src/Electrostatic.cc +++ b/src/Electrostatic.cc @@ -254,9 +254,10 @@ void Electrostatic::setup(const short max_sweeps) { Control& ct = *(Control::instance()); const short nu1 = ct.poisson_pc_nu1; - const short nu2 = ct.poisson_pc_nu1; + const short nu2 = ct.poisson_pc_nu2; const short max_nlevs = ct.poisson_pc_nlev; - poisson_solver_->setup(nu1, nu2, max_sweeps, 1.e-16, max_nlevs); + const float conv_tol = ct.poisson_conv_tol; + poisson_solver_->setup(nu1, nu2, max_sweeps, conv_tol, max_nlevs); } template diff --git a/src/read_config.cc b/src/read_config.cc index de11dff9..d02aecb9 100644 --- a/src/read_config.cc +++ b/src/read_config.cc @@ -237,7 +237,7 @@ int read_config(int argc, char** argv, po::variables_map& vm, "safety factor to use for static allocation of orbitals")( "Potentials.filterPseudo", po::value()->default_value('f'), "filter")("Poisson.solver", - po::value()->default_value("CG"), + po::value()->default_value("PCG"), "solver")("Poisson.e0", po::value()->default_value(78.36), "continuum solvent: epsilon0")("Poisson.rho0", po::value()->default_value(0.0004), @@ -245,14 +245,16 @@ int read_config(int argc, char** argv, po::variables_map& vm, po::value()->default_value(1.3), "continuum solvent: beta")("Poisson.FDtype", po::value()->default_value("Mehrstellen"), - "FDtype")("Poisson.nu1", po::value()->default_value(2), - "nu_1")("Poisson.nu2", po::value()->default_value(2), - "nu_2")("Poisson.max_steps", po::value()->default_value(20), + "FDtype")("Poisson.nu1", po::value()->default_value(1), + "MG pre-smoothing sweeps")("Poisson.nu2", po::value()->default_value(1), + "MG post-smoothing sweeps")("Poisson.max_steps", po::value()->default_value(20), "max. nb. steps Poisson solver")("Poisson.max_steps_initial", po::value()->default_value(20), "max. nb. steps Poisson solver in first solve")( "Poisson.max_levels", po::value()->default_value(10), - "max. nb. MG levels Poisson solver")("Poisson.reset", + "max. nb. levels for MG solver or Precon")("Poisson.conv_tol", + po::value()->default_value(1.e-8), + "Convergence tolerance for Poisson solver")("Poisson.reset", po::value()->default_value(false), "reset Hartree potential at each MD step")("ABPG.m", po::value()->default_value(1), diff --git a/tests/Chebyshev/cheb.cfg b/tests/Chebyshev/cheb.cfg index f6d67c02..43c95f50 100644 --- a/tests/Chebyshev/cheb.cfg +++ b/tests/Chebyshev/cheb.cfg @@ -15,7 +15,7 @@ lz=13.26 [Potentials] pseudopotential=pseudo.Li_ONCVPSP_LDA [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/Davidson/davidson.cfg b/tests/Davidson/davidson.cfg index ce864055..e34f407b 100644 --- a/tests/Davidson/davidson.cfg +++ b/tests/Davidson/davidson.cfg @@ -15,7 +15,7 @@ lz=15.3 [Potentials] pseudopotential=pseudo.Al_LDA_FHI [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/DavidsonReplicated/davidson.cfg b/tests/DavidsonReplicated/davidson.cfg index dc093a07..87f0ea1f 100644 --- a/tests/DavidsonReplicated/davidson.cfg +++ b/tests/DavidsonReplicated/davidson.cfg @@ -15,7 +15,7 @@ lz=15.3 [Potentials] pseudopotential=pseudo.Al_LDA_FHI [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/ProjectedMatrices/quenchExact.cfg b/tests/ProjectedMatrices/quenchExact.cfg index 30c4d6a6..4e338690 100644 --- a/tests/ProjectedMatrices/quenchExact.cfg +++ b/tests/ProjectedMatrices/quenchExact.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/ProjectedMatrices/quenchShortSighted.cfg b/tests/ProjectedMatrices/quenchShortSighted.cfg index 47615ccd..15e5c903 100644 --- a/tests/ProjectedMatrices/quenchShortSighted.cfg +++ b/tests/ProjectedMatrices/quenchShortSighted.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] diff --git a/tests/RhoVhRestart/md.cfg b/tests/RhoVhRestart/md.cfg index 2b8a378b..5fba5590 100644 --- a/tests/RhoVhRestart/md.cfg +++ b/tests/RhoVhRestart/md.cfg @@ -15,6 +15,11 @@ lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Poisson] +solver=PCG +conv_tol=1.e-16 +nu1=2 +nu2=2 [Run] type=MD [MD] diff --git a/tests/RhoVhRestart/mgmol.cfg b/tests/RhoVhRestart/mgmol.cfg index eee7f11c..aff5795d 100644 --- a/tests/RhoVhRestart/mgmol.cfg +++ b/tests/RhoVhRestart/mgmol.cfg @@ -15,6 +15,11 @@ lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Poisson] +solver=PCG +conv_tol=1.e-16 +nu1=2 +nu2=2 [Run] type=QUENCH [Quench] diff --git a/tests/RhoVhRestart/restart.cfg b/tests/RhoVhRestart/restart.cfg index 20f0293a..74b1abc5 100644 --- a/tests/RhoVhRestart/restart.cfg +++ b/tests/RhoVhRestart/restart.cfg @@ -15,6 +15,11 @@ lz=9. [Potentials] pseudopotential=pseudo.O_ONCV_PBE_SG15 pseudopotential=pseudo.H_ONCV_PBE_SG15 +[Poisson] +solver=PCG +conv_tol=1.e-16 +nu1=2 +nu2=2 [Run] type=QUENCH [Quench] diff --git a/tests/ShortSighted/md.cfg b/tests/ShortSighted/md.cfg index 012deb56..5730d62e 100644 --- a/tests/ShortSighted/md.cfg +++ b/tests/ShortSighted/md.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=MD [MD] diff --git a/tests/ShortSighted/quench.cfg b/tests/ShortSighted/quench.cfg index d6821bba..8b21d4cd 100644 --- a/tests/ShortSighted/quench.cfg +++ b/tests/ShortSighted/quench.cfg @@ -15,7 +15,7 @@ lz=21.2406 [Potentials] pseudopotential=pseudo.D_ONCV_PBE_SG15 [Poisson] -solver=CG +solver=PCG [Run] type=QUENCH [Quench] From 5908a8097969f19a1956199717852ad85d6ea1c5 Mon Sep 17 00:00:00 2001 From: Siu Wun Cheung Date: Tue, 16 Dec 2025 11:19:02 -0800 Subject: [PATCH 97/99] Resolve errors --- CMakeLists.txt | 2 +- src/ExtendedGridOrbitals.cc | 26 +++++++++++++++++++++++++ src/ExtendedGridOrbitals.h | 2 ++ src/rom.cc | 38 ++++++++++++++++++------------------- 4 files changed, 48 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dab46d4..23d058f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -151,7 +151,7 @@ if(USE_LIBROM) find_package(libROM REQUIRED) if(libROM_FOUND) - set(MGMOL_HAS_LIBROM 1) + add_definitions(-DMGMOL_HAS_LIBROM) endif(libROM_FOUND) endif(USE_LIBROM) diff --git a/src/ExtendedGridOrbitals.cc b/src/ExtendedGridOrbitals.cc index 589e1f5b..48eafce6 100644 --- a/src/ExtendedGridOrbitals.cc +++ b/src/ExtendedGridOrbitals.cc @@ -30,6 +30,10 @@ #include #include +#ifdef MGMOL_HAS_LIBROM +#include "librom.h" +#endif + #define ORBITAL_OCCUPATION 2. std::string getDatasetName(const std::string& name, const int color); @@ -1810,6 +1814,28 @@ void ExtendedGridOrbitals::initWF( #endif } +#ifdef MGMOL_HAS_LIBROM +template +void ExtendedGridOrbitals::set(std::string file_path, int rdim) +{ + const int dim = getLocNumpt(); + + CAROM::BasisReader reader(file_path); + CAROM::Matrix* orbital_basis = reader.getSpatialBasis(rdim); + + Control& ct = *(Control::instance()); + Mesh* mymesh = Mesh::instance(); + pb::GridFunc gf_psi(mymesh->grid(), ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); + CAROM::Vector psi; + for (int i = 0; i < rdim; ++i) + { + orbital_basis->getColumn(i, psi); + gf_psi.assign(psi.getData()); + setPsi(gf_psi, i); + } +} +#endif + template void ExtendedGridOrbitals::axpy( const ORBDTYPE alpha, const ExtendedGridOrbitals&); diff --git a/src/ExtendedGridOrbitals.h b/src/ExtendedGridOrbitals.h index 77887bb4..e1fb667c 100644 --- a/src/ExtendedGridOrbitals.h +++ b/src/ExtendedGridOrbitals.h @@ -402,9 +402,11 @@ class ExtendedGridOrbitals : public Orbitals const pb::Grid& mygrid = mymesh->grid(); return mygrid.maxDomainSize(); } + #ifdef MGMOL_HAS_LIBROM void set(std::string file_path, int rdim); #endif + }; #endif diff --git a/src/rom.cc b/src/rom.cc index 5085f1d3..863d76e9 100644 --- a/src/rom.cc +++ b/src/rom.cc @@ -92,25 +92,25 @@ void MGmol::project_orbital(std::string file_path, int rdim, Orbit } } -template -void ExtendedGridOrbitals::set(std::string file_path, int rdim) -{ - const int dim = getLocNumpt(); - - CAROM::BasisReader reader(file_path); - CAROM::Matrix* orbital_basis = reader.getSpatialBasis(rdim); - - Control& ct = *(Control::instance()); - Mesh* mymesh = Mesh::instance(); - pb::GridFunc gf_psi(mymesh->grid(), ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); - CAROM::Vector psi; - for (int i = 0; i < rdim; ++i) - { - orbital_basis->getColumn(i, psi); - gf_psi.assign(psi.getData()); - setPsi(gf_psi, i); - } -} +//template +//void ExtendedGridOrbitals::set(std::string file_path, int rdim) +//{ +// const int dim = getLocNumpt(); + +// CAROM::BasisReader reader(file_path); +// CAROM::Matrix* orbital_basis = reader.getSpatialBasis(rdim); + +// Control& ct = *(Control::instance()); +// Mesh* mymesh = Mesh::instance(); +// pb::GridFunc gf_psi(mymesh->grid(), ct.bcWF[0], ct.bcWF[1], ct.bcWF[2]); +// CAROM::Vector psi; +// for (int i = 0; i < rdim; ++i) +// { +// orbital_basis->getColumn(i, psi); +// gf_psi.assign(psi.getData()); +// setPsi(gf_psi, i); +// } +//} template class MGmol>; template class MGmol>; From 5f0ed94db450ff53b3c418430eef07bda66f87b8 Mon Sep 17 00:00:00 2001 From: Siu Wun Cheung Date: Wed, 17 Dec 2025 08:39:08 -0800 Subject: [PATCH 98/99] Minor change to scripts --- examples/PinnedH2O/job.ref | 2 +- examples/PinnedH2O/job.rom_3DOF | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/PinnedH2O/job.ref b/examples/PinnedH2O/job.ref index 32024e6f..f9826e8e 100644 --- a/examples/PinnedH2O/job.ref +++ b/examples/PinnedH2O/job.ref @@ -8,7 +8,7 @@ date setenv OMP_NUM_THREADS 1 #setenv KMP_DETERMINISTIC_REDUCTION 1 -set ncpus = 8 +set ncpus = 1 set case = 2 set maindir = /p/lustre2/cheung26/mgmol diff --git a/examples/PinnedH2O/job.rom_3DOF b/examples/PinnedH2O/job.rom_3DOF index 0e38688b..b7dc5d6c 100644 --- a/examples/PinnedH2O/job.rom_3DOF +++ b/examples/PinnedH2O/job.rom_3DOF @@ -8,7 +8,7 @@ date setenv OMP_NUM_THREADS 1 #setenv KMP_DETERMINISTIC_REDUCTION 1 -set ncpus = 8 +set ncpus = 1 set case = 2 set maindir = /p/lustre2/cheung26/mgmol From aa6217f15cd6c1563343b38164993e0a09f0dfde Mon Sep 17 00:00:00 2001 From: Siu Wun Cheung Date: Wed, 17 Dec 2025 14:55:19 -0800 Subject: [PATCH 99/99] Minor fix in names --- examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg b/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg index 24273c40..bb2912d9 100644 --- a/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg +++ b/examples/PinnedH2O/mgmol_rom_3DOF_test2.cfg @@ -38,7 +38,7 @@ output_level=4 [ROM] stage=online_pinned_H2O_3dof [ROM.offline] -basis_file=/usr/workspace/nlrom/MGmol/PinnedH2O_3DOF/data_8/PinnedH2O_3DOF_orbitals_basis_2_2 +basis_file=/usr/workspace/nlrom/MGmol/PinnedH2O_3DOF/data_1/PinnedH2O_3DOF_orbitals_basis_2_2 [ROM.basis] compare_md=false number_of_orbital_basis=34