diff --git a/cmake/gauxc-dep-versions.cmake b/cmake/gauxc-dep-versions.cmake index cd3969d8..265b92c1 100644 --- a/cmake/gauxc-dep-versions.cmake +++ b/cmake/gauxc-dep-versions.cmake @@ -11,7 +11,7 @@ set( GAUXC_CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git ) set( GAUXC_CUTLASS_REVISION v2.10.0 ) set( GAUXC_EXCHCXX_REPOSITORY https://github.com/wavefunction91/ExchCXX.git ) -set( GAUXC_EXCHCXX_REVISION 21a4700a826ec0beae1311a1d59677393bcb168f ) +set( GAUXC_EXCHCXX_REVISION ef88e168249e38c526353fe7c48aab4bc697c99d ) set( GAUXC_GAU2GRID_REPOSITORY https://github.com/dgasmith/gau2grid.git ) set( GAUXC_GAU2GRID_REVISION v2.0.6 ) diff --git a/include/gauxc/load_balancer.hpp b/include/gauxc/load_balancer.hpp index 738464f2..c7b55e1d 100644 --- a/include/gauxc/load_balancer.hpp +++ b/include/gauxc/load_balancer.hpp @@ -102,6 +102,12 @@ class LoadBalancer { const shell_pair_type& shell_pairs() const; const shell_pair_type& shell_pairs(); + /// Return the underlying protonic BasisSet instance used to generate this LoadBalancer + const basis_type& protonic_basis() const; + + /// Return the protonic BasisSetMap instance corresponding to protonic basis/molecule + const basis_map_type& protonic_basis_map() const; + /// Return the runtime handle used to construct this LoadBalancer const RuntimeEnvironment& runtime() const; @@ -163,6 +169,10 @@ class LoadBalancerFactory { LoadBalancer get_instance( const RuntimeEnvironment& rt, const Molecule& mol, const MolGrid& mg, const BasisSet& bs); + LoadBalancer get_instance( const RuntimeEnvironment& rt, + const Molecule& mol, const MolGrid& mg, const BasisSet& bs, + const BasisSet& bs2 ); + /** * @brief Generate a shared pointer to a LoadBalancer instance per kernel and * execution space specfication @@ -180,6 +190,11 @@ class LoadBalancerFactory { const RuntimeEnvironment& rt, const Molecule& mol, const MolGrid& mg, const BasisSet&); + std::shared_ptr get_shared_instance( + const RuntimeEnvironment& rt, + const Molecule& mol, const MolGrid& mg, const BasisSet&, + const BasisSet& ); + private: ExecutionSpace ex_; ///< Execution space for the generated LoadBalancer instances diff --git a/include/gauxc/xc_integrator.hpp b/include/gauxc/xc_integrator.hpp index e08da39c..fecd0c7b 100644 --- a/include/gauxc/xc_integrator.hpp +++ b/include/gauxc/xc_integrator.hpp @@ -34,6 +34,8 @@ class XCIntegrator { using exc_vxc_type_rks = std::tuple< value_type, matrix_type >; using exc_vxc_type_uks = std::tuple< value_type, matrix_type, matrix_type >; using exc_vxc_type_gks = std::tuple< value_type, matrix_type, matrix_type, matrix_type, matrix_type >; + using exc_vxc_type_neo_rks = std::tuple< value_type, value_type, matrix_type, matrix_type, matrix_type >; + using exc_vxc_type_neo_uks = std::tuple< value_type, value_type, matrix_type, matrix_type, matrix_type, matrix_type >; using exc_grad_type = std::vector< value_type >; using exx_type = matrix_type; @@ -64,7 +66,11 @@ class XCIntegrator { exc_vxc_type_uks eval_exc_vxc ( const MatrixType&, const MatrixType&, const IntegratorSettingsXC& = IntegratorSettingsXC{} ); exc_vxc_type_gks eval_exc_vxc ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, - const IntegratorSettingsXC& = IntegratorSettingsXC{}); + const IntegratorSettingsXC& = IntegratorSettingsXC{} ); + exc_vxc_type_neo_rks neo_eval_exc_vxc ( const MatrixType&, const MatrixType&, const MatrixType&, + const IntegratorSettingsXC& = IntegratorSettingsXC{} ); + exc_vxc_type_neo_uks neo_eval_exc_vxc ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, + const IntegratorSettingsXC& = IntegratorSettingsXC{} ); exc_grad_type eval_exc_grad( const MatrixType& ); diff --git a/include/gauxc/xc_integrator/impl.hpp b/include/gauxc/xc_integrator/impl.hpp index 85a655cc..b70b08dd 100644 --- a/include/gauxc/xc_integrator/impl.hpp +++ b/include/gauxc/xc_integrator/impl.hpp @@ -70,9 +70,26 @@ template typename XCIntegrator::exc_vxc_type_gks XCIntegrator::eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, const IntegratorSettingsXC& ks_settings ) { - if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); - return pimpl_->eval_exc_vxc(Ps, Pz, Py, Px, ks_settings); - }; + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + return pimpl_->eval_exc_vxc(Ps, Pz, Py, Px, ks_settings); +}; + +template +typename XCIntegrator::exc_vxc_type_neo_rks + XCIntegrator::neo_eval_exc_vxc( const MatrixType& elec_Ps, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings ){ + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + return pimpl_->neo_eval_exc_vxc(elec_Ps, prot_Ps, prot_Pz, ks_settings); +}; + +template +typename XCIntegrator::exc_vxc_type_neo_uks + XCIntegrator::neo_eval_exc_vxc( const MatrixType& elec_Ps, const MatrixType& elec_Pz, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings ){ + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + return pimpl_->neo_eval_exc_vxc(elec_Ps, elec_Pz, prot_Ps, prot_Pz, ks_settings); +}; + template typename XCIntegrator::exc_grad_type diff --git a/include/gauxc/xc_integrator/integrator_factory.hpp b/include/gauxc/xc_integrator/integrator_factory.hpp index d63d23be..2790d3a9 100644 --- a/include/gauxc/xc_integrator/integrator_factory.hpp +++ b/include/gauxc/xc_integrator/integrator_factory.hpp @@ -89,6 +89,58 @@ class XCIntegratorFactory { return get_shared_instance( std::make_shared(func), lb ); } + /** Generate XCIntegrator instance + * + * @param[in] func XC functional + * @param[in] epcfunc EPC functional + * @param[in] lb Preconstructed Load Balancer instance + */ + std::shared_ptr get_shared_instance( + std::shared_ptr func, + std::shared_ptr epcfunc, + std::shared_ptr lb ) { + + // Create Local Work Driver + auto lwd = LocalWorkDriverFactory::make_local_work_driver( ex_, + lwd_kernel_, local_work_settings_ ); + + // Create Reduction Driver + auto rd = ReductionDriverFactory::get_shared_instance( + lb->runtime(), rd_kernel_ ); + + // Create Integrator instance + std::transform( input_type_.begin(), input_type_.end(), input_type_.begin(), + ::toupper ); + + if(!epcfunc->is_polarized()) + GAUXC_GENERIC_EXCEPTION("EPC FUNCTIONAL NOT POLARIZED"); + + if( input_type_ == "REPLICATED" ) + return std::make_shared( + ReplicatedXCIntegratorFactory::make_integrator_impl( + ex_, integrator_kernel_, func, epcfunc, lb, std::move(lwd), rd + ) + ); + else GAUXC_GENERIC_EXCEPTION("INTEGRATOR TYPE NOT RECOGNIZED"); + + return nullptr; + + } + + auto get_shared_instance( const functional_type& func, const functional_type& epcfunc, + const LoadBalancer& lb ) { + return get_shared_instance( std::make_shared(func), + std::make_shared(epcfunc), + std::make_shared(lb) ); + } + + auto get_shared_instance( const functional_type& func, const functional_type& epcfunc, + std::shared_ptr lb ) { + return get_shared_instance( std::make_shared(func), + std::make_shared(epcfunc), + lb ); + } + template integrator_type get_instance( Args&&... args ) { diff --git a/include/gauxc/xc_integrator/replicated/impl.hpp b/include/gauxc/xc_integrator/replicated/impl.hpp index a892f5e3..674bd60a 100644 --- a/include/gauxc/xc_integrator/replicated/impl.hpp +++ b/include/gauxc/xc_integrator/replicated/impl.hpp @@ -157,6 +157,59 @@ typename ReplicatedXCIntegrator::exc_vxc_type_gks } +template +typename ReplicatedXCIntegrator::exc_vxc_type_neo_rks + ReplicatedXCIntegrator::neo_eval_exc_vxc_( const MatrixType& elec_Ps, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings ) { + + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + matrix_type elec_VXCs( elec_Ps.rows(), elec_Ps.cols() ); + matrix_type prot_VXCs( prot_Ps.rows(), prot_Ps.cols() ); + matrix_type prot_VXCz( prot_Pz.rows(), prot_Pz.cols() ); + value_type elec_EXC; + value_type prot_EXC; + + pimpl_->neo_eval_exc_vxc( elec_Ps.rows(), elec_Ps.cols(), prot_Ps.rows(), prot_Ps.cols(), + elec_Ps.data(), elec_Ps.rows(), + prot_Ps.data(), prot_Ps.rows(), + prot_Pz.data(), prot_Pz.rows(), + elec_VXCs.data(), elec_VXCs.rows(), + prot_VXCs.data(), prot_VXCs.rows(), + prot_VXCz.data(), prot_VXCz.rows(), + &elec_EXC, &prot_EXC, ks_settings ); + + return std::make_tuple( elec_EXC, prot_EXC, elec_VXCs, prot_VXCs, prot_VXCz ); + +} + +template +typename ReplicatedXCIntegrator::exc_vxc_type_neo_uks + ReplicatedXCIntegrator::neo_eval_exc_vxc_( const MatrixType& elec_Ps, const MatrixType& elec_Pz, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings ) { + + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + matrix_type elec_VXCs( elec_Ps.rows(), elec_Ps.cols() ); + matrix_type elec_VXCz( elec_Pz.rows(), elec_Pz.cols() ); + matrix_type prot_VXCs( prot_Ps.rows(), prot_Ps.cols() ); + matrix_type prot_VXCz( prot_Pz.rows(), prot_Pz.cols() ); + value_type elec_EXC; + value_type prot_EXC; + + pimpl_->neo_eval_exc_vxc( elec_Ps.rows(), elec_Ps.cols(), prot_Ps.rows(), prot_Ps.cols(), + elec_Ps.data(), elec_Ps.rows(), + elec_Pz.data(), elec_Pz.rows(), + prot_Ps.data(), prot_Ps.rows(), + prot_Pz.data(), prot_Pz.rows(), + elec_VXCs.data(), elec_VXCs.rows(), + elec_VXCz.data(), elec_VXCz.rows(), + prot_VXCs.data(), prot_VXCs.rows(), + prot_VXCz.data(), prot_VXCz.rows(), + &elec_EXC, &prot_EXC, ks_settings ); + + return std::make_tuple( elec_EXC, prot_EXC, elec_VXCs, elec_VXCz, prot_VXCs, prot_VXCz ); + +} + template typename ReplicatedXCIntegrator::exc_grad_type ReplicatedXCIntegrator::eval_exc_grad_( const MatrixType& P ) { diff --git a/include/gauxc/xc_integrator/replicated/replicated_xc_device_integrator.hpp b/include/gauxc/xc_integrator/replicated/replicated_xc_device_integrator.hpp index 4721243c..e8f82aff 100644 --- a/include/gauxc/xc_integrator/replicated/replicated_xc_device_integrator.hpp +++ b/include/gauxc/xc_integrator/replicated/replicated_xc_device_integrator.hpp @@ -57,6 +57,15 @@ struct ReplicatedXCDeviceIntegratorFactory { std::shared_ptr rd ); + static ptr_return_t make_integrator_impl( + std::string integrator_kernel, + std::shared_ptr func, + std::shared_ptr epcfunc, + std::shared_ptr lb, + std::unique_ptr&& lwd, + std::shared_ptr rd + ); + }; diff --git a/include/gauxc/xc_integrator/replicated/replicated_xc_host_integrator.hpp b/include/gauxc/xc_integrator/replicated/replicated_xc_host_integrator.hpp index 57685599..d1fd41d1 100644 --- a/include/gauxc/xc_integrator/replicated/replicated_xc_host_integrator.hpp +++ b/include/gauxc/xc_integrator/replicated/replicated_xc_host_integrator.hpp @@ -56,6 +56,15 @@ struct ReplicatedXCHostIntegratorFactory { std::unique_ptr&& lwd, std::shared_ptr rd ); + + static ptr_return_t make_integrator_impl( + std::string integrator_kernel, + std::shared_ptr func, + std::shared_ptr epcfunc, + std::shared_ptr lb, + std::unique_ptr&& lwd, + std::shared_ptr rd + ); }; diff --git a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_factory.hpp b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_factory.hpp index d75a92d8..0eecb4bd 100644 --- a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_factory.hpp +++ b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_factory.hpp @@ -73,6 +73,59 @@ struct ReplicatedXCIntegratorFactory { } + + + /** Generate a ReplicatedXCIntegrator instance + * + * @param[in] ex Execution space for integrator instance + * @param[in] integration_kernel Name of integration scaffold to load ("Default", "Reference", etc) + * @param[in] func XC functional to integrate + * @param[in] epcfunc EPC functional to integrate + * @param[in] lb Pregenerated LoadBalancer instance + * @param[in] lwd Local Work Driver + */ + static ptr_return_t make_integrator_impl( + ExecutionSpace ex, + std::string integrator_kernel, + std::shared_ptr func, + std::shared_ptr epcfunc, + std::shared_ptr lb, + std::unique_ptr&& lwd, + std::shared_ptr rd + ) { + + + + switch(ex) { + + using host_factory = + detail::ReplicatedXCHostIntegratorFactory; + case ExecutionSpace::Host: + return std::make_unique( + host_factory::make_integrator_impl( + integrator_kernel, func, epcfunc, lb, std::move(lwd), rd + ) + ); + + #ifdef GAUXC_HAS_DEVICE + using device_factory = + detail::ReplicatedXCDeviceIntegratorFactory; + case ExecutionSpace::Device: + return std::make_unique( + device_factory::make_integrator_impl( + integrator_kernel, func, epcfunc, lb, std::move(lwd), rd + ) + ); + #endif + + default: + GAUXC_GENERIC_EXCEPTION("ReplicatedXCIntegrator ExecutionSpace Not Supported"); + } + + return nullptr; + + } + }; diff --git a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp index 70c33db5..3622b275 100644 --- a/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp +++ b/include/gauxc/xc_integrator/replicated/replicated_xc_integrator_impl.hpp @@ -29,6 +29,7 @@ class ReplicatedXCIntegratorImpl { protected: std::shared_ptr< functional_type > func_; ///< XC functional + std::shared_ptr< functional_type > epcfunc_; ///< EPC functional for NEO std::shared_ptr< LoadBalancer > load_balancer_; ///< Load Balancer std::unique_ptr< LocalWorkDriver > local_work_driver_; ///< Local Work Driver std::shared_ptr< ReductionDriver > reduction_driver_; ///< Reduction Driver @@ -73,7 +74,24 @@ class ReplicatedXCIntegratorImpl { value_type* VXCy, int64_t ldvxcy, value_type* VXCx, int64_t ldvxcx, value_type* EXC, const IntegratorSettingsXC& ks_settings ) = 0; - + virtual void neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_mn, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& ks_settings) = 0; + virtual void neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& ks_settings) = 0; virtual void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD ) = 0; virtual void eval_exx_( int64_t m, int64_t n, const value_type* P, @@ -88,6 +106,13 @@ class ReplicatedXCIntegratorImpl { std::shared_ptr< ReductionDriver> rd ); + ReplicatedXCIntegratorImpl( std::shared_ptr< functional_type > func, + std::shared_ptr< functional_type > epcfunc, + std::shared_ptr< LoadBalancer > lb, + std::unique_ptr< LocalWorkDriver >&& lwd, + std::shared_ptr< ReductionDriver> rd + ); + virtual ~ReplicatedXCIntegratorImpl() noexcept; void integrate_den( int64_t m, int64_t n, const value_type* P, @@ -128,7 +153,26 @@ class ReplicatedXCIntegratorImpl { value_type* VXCy, int64_t ldvxcy, value_type* VXCx, int64_t ldvxcx, value_type* EXC, const IntegratorSettingsXC& ks_settings ); - + + void neo_eval_exc_vxc( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& ks_settings ); + + void neo_eval_exc_vxc( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& ks_settings ); void eval_exc_grad( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD ); diff --git a/include/gauxc/xc_integrator/replicated_xc_integrator.hpp b/include/gauxc/xc_integrator/replicated_xc_integrator.hpp index ac93a4f0..e79057ec 100644 --- a/include/gauxc/xc_integrator/replicated_xc_integrator.hpp +++ b/include/gauxc/xc_integrator/replicated_xc_integrator.hpp @@ -31,6 +31,8 @@ class ReplicatedXCIntegrator : public XCIntegratorImpl { using exc_vxc_type_rks = typename XCIntegratorImpl::exc_vxc_type_rks; using exc_vxc_type_uks = typename XCIntegratorImpl::exc_vxc_type_uks; using exc_vxc_type_gks = typename XCIntegratorImpl::exc_vxc_type_gks; + using exc_vxc_type_neo_rks = typename XCIntegratorImpl::exc_vxc_type_neo_rks; + using exc_vxc_type_neo_uks = typename XCIntegratorImpl::exc_vxc_type_neo_uks; using exc_grad_type = typename XCIntegratorImpl::exc_grad_type; using exx_type = typename XCIntegratorImpl::exx_type; @@ -46,6 +48,8 @@ class ReplicatedXCIntegrator : public XCIntegratorImpl { exc_vxc_type_rks eval_exc_vxc_ ( const MatrixType&, const IntegratorSettingsXC& ) override; exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const IntegratorSettingsXC&) override; exc_vxc_type_gks eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override; + exc_vxc_type_neo_rks neo_eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override; + exc_vxc_type_neo_uks neo_eval_exc_vxc_ ( const MatrixType&, const MatrixType&, const MatrixType&, const MatrixType&, const IntegratorSettingsXC& ) override; exc_grad_type eval_exc_grad_( const MatrixType& ) override; exx_type eval_exx_ ( const MatrixType&, const IntegratorSettingsEXX& ) override; const util::Timer& get_timings_() const override; diff --git a/include/gauxc/xc_integrator/xc_integrator_impl.hpp b/include/gauxc/xc_integrator/xc_integrator_impl.hpp index 1406bf8e..713adb86 100644 --- a/include/gauxc/xc_integrator/xc_integrator_impl.hpp +++ b/include/gauxc/xc_integrator/xc_integrator_impl.hpp @@ -23,6 +23,8 @@ class XCIntegratorImpl { using exc_vxc_type_rks = typename XCIntegrator::exc_vxc_type_rks; using exc_vxc_type_uks = typename XCIntegrator::exc_vxc_type_uks; using exc_vxc_type_gks = typename XCIntegrator::exc_vxc_type_gks; + using exc_vxc_type_neo_rks = typename XCIntegrator::exc_vxc_type_neo_rks; + using exc_vxc_type_neo_uks = typename XCIntegrator::exc_vxc_type_neo_uks; using exc_grad_type = typename XCIntegrator::exc_grad_type; using exx_type = typename XCIntegrator::exx_type; @@ -38,6 +40,10 @@ class XCIntegratorImpl { virtual exc_vxc_type_uks eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const IntegratorSettingsXC& ks_settings ) = 0; virtual exc_vxc_type_gks eval_exc_vxc_ ( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, const IntegratorSettingsXC& ks_settings ) = 0; + virtual exc_vxc_type_neo_rks neo_eval_exc_vxc_ ( const MatrixType& elec_Ps, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings ) = 0; + virtual exc_vxc_type_neo_uks neo_eval_exc_vxc_ ( const MatrixType& elec_Ps, const MatrixType& elec_Pz, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings ) = 0; virtual exc_grad_type eval_exc_grad_( const MatrixType& P ) = 0; virtual exx_type eval_exx_ ( const MatrixType& P, const IntegratorSettingsEXX& settings ) = 0; @@ -106,6 +112,16 @@ class XCIntegratorImpl { exc_vxc_type_gks eval_exc_vxc( const MatrixType& Ps, const MatrixType& Pz, const MatrixType& Py, const MatrixType& Px, const IntegratorSettingsXC& ks_settings ) { return eval_exc_vxc_(Ps, Pz, Py, Px, ks_settings); } + + exc_vxc_type_neo_rks neo_eval_exc_vxc( const MatrixType& elec_Ps, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings){ + return neo_eval_exc_vxc_(elec_Ps, prot_Ps, prot_Pz, ks_settings); + } + + exc_vxc_type_neo_uks neo_eval_exc_vxc( const MatrixType& elec_Ps, const MatrixType& elec_Pz, const MatrixType& prot_Ps, const MatrixType& prot_Pz, + const IntegratorSettingsXC& ks_settings){ + return neo_eval_exc_vxc_(elec_Ps, elec_Pz, prot_Ps, prot_Pz, ks_settings); + } /** Integrate EXC gradient for RKS * diff --git a/include/gauxc/xc_task.hpp b/include/gauxc/xc_task.hpp index 1f70418f..27cb26c3 100644 --- a/include/gauxc/xc_task.hpp +++ b/include/gauxc/xc_task.hpp @@ -55,6 +55,7 @@ struct XCTask { } screening_data bfn_screening; + screening_data protonic_bfn_screening; screening_data cou_screening; void merge_with( const XCTask& other ) { diff --git a/src/load_balancer/host/load_balancer_host_factory.cxx b/src/load_balancer/host/load_balancer_host_factory.cxx index 94db35de..ada7d93f 100644 --- a/src/load_balancer/host/load_balancer_host_factory.cxx +++ b/src/load_balancer/host/load_balancer_host_factory.cxx @@ -41,5 +41,35 @@ std::shared_ptr LoadBalancerHostFactory::get_shared_instance( } +std::shared_ptr LoadBalancerHostFactory::get_shared_instance( + std::string kernel_name, const RuntimeEnvironment& rt, + const Molecule& mol, const MolGrid& mg, const BasisSet& basis, + const BasisSet& protonic_basis +) { + + std::transform(kernel_name.begin(), kernel_name.end(), + kernel_name.begin(), ::toupper ); + + + if( kernel_name == "DEFAULT" or kernel_name == "REPLICATED" ) + kernel_name = "REPLICATED-PETITE"; + + std::unique_ptr ptr = nullptr; + if( kernel_name == "REPLICATED-PETITE" ) + ptr = std::make_unique( + rt, mol, mg, basis, protonic_basis + ); + + if( kernel_name == "REPLICATED-FILLIN" ) + ptr = std::make_unique( + rt, mol, mg, basis, protonic_basis + ); + + if( ! ptr ) GAUXC_GENERIC_EXCEPTION("Load Balancer Kernel Not Recognized: " + kernel_name); + + return std::make_shared(std::move(ptr)); + +} + } diff --git a/src/load_balancer/host/load_balancer_host_factory.hpp b/src/load_balancer/host/load_balancer_host_factory.hpp index d02e5c03..0af2e919 100644 --- a/src/load_balancer/host/load_balancer_host_factory.hpp +++ b/src/load_balancer/host/load_balancer_host_factory.hpp @@ -17,6 +17,12 @@ struct LoadBalancerHostFactory { const Molecule& mol, const MolGrid& mg, const BasisSet& basis ); + static std::shared_ptr get_shared_instance( + std::string kernel_name, const RuntimeEnvironment& rt, + const Molecule& mol, const MolGrid& mg, const BasisSet& basis, + const BasisSet& protonic_basis + ); + }; diff --git a/src/load_balancer/host/replicated_host_load_balancer.cxx b/src/load_balancer/host/replicated_host_load_balancer.cxx index 5a3bb9c9..ddf7e24c 100644 --- a/src/load_balancer/host/replicated_host_load_balancer.cxx +++ b/src/load_balancer/host/replicated_host_load_balancer.cxx @@ -58,7 +58,15 @@ std::vector< XCTask > HostReplicatedLoadBalancer::create_local_tasks_() const { // Microbatch Screening auto [shell_list, nbe] = micro_batch_screen( (*this->basis_), lo, up ); + // If there's a NEO protonic basis, then do microbatch screening on it + std::vector protonic_shell_list; + size_t protonic_nbe = 0; + if (this->protonic_basis_) + std::tie(protonic_shell_list, protonic_nbe) = micro_batch_screen((*this->protonic_basis_), lo, up); + // Course grain screening + // For NEO, skip task when electronic shell list is empty + // (Protonic system doesnt have XC. It only has EPC) if( not shell_list.size() ) continue; // Copy task data @@ -71,6 +79,10 @@ std::vector< XCTask > HostReplicatedLoadBalancer::create_local_tasks_() const { task.bfn_screening.shell_list = std::move(shell_list); task.bfn_screening.nbe = nbe; task.dist_nearest = molmeta_->dist_nearest()[iCurrent]; + if(this->protonic_basis_){ + task.protonic_bfn_screening.shell_list = std::move(protonic_shell_list); + task.protonic_bfn_screening.nbe = protonic_nbe; + } #pragma omp critical temp_tasks.push_back( diff --git a/src/load_balancer/load_balancer.cxx b/src/load_balancer/load_balancer.cxx index 9329fef8..aad80ddd 100644 --- a/src/load_balancer/load_balancer.cxx +++ b/src/load_balancer/load_balancer.cxx @@ -93,6 +93,15 @@ const LoadBalancer::shell_pair_type& LoadBalancer::shell_pairs() { return pimpl_->shell_pairs(); } +const LoadBalancer::basis_type& LoadBalancer::protonic_basis() const { + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + return pimpl_->protonic_basis(); +} +const LoadBalancer::basis_map_type& LoadBalancer::protonic_basis_map() const { + if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); + return pimpl_->protonic_basis_map(); +} + LoadBalancerState& LoadBalancer::state() { if( not pimpl_ ) GAUXC_PIMPL_NOT_INITIALIZED(); return pimpl_->state(); diff --git a/src/load_balancer/load_balancer_factory.cxx b/src/load_balancer/load_balancer_factory.cxx index b14ddbee..cd5c19aa 100644 --- a/src/load_balancer/load_balancer_factory.cxx +++ b/src/load_balancer/load_balancer_factory.cxx @@ -50,6 +50,41 @@ LoadBalancer LoadBalancerFactory::get_instance( } +std::shared_ptr LoadBalancerFactory::get_shared_instance( + const RuntimeEnvironment& rt, + const Molecule& mol, const MolGrid& mg, const BasisSet& basis, + const BasisSet& protonic_basis +) { + + switch(ex_) { + case ExecutionSpace::Host: + using host_factory = LoadBalancerHostFactory; + return host_factory::get_shared_instance(kernel_name_, + rt, mol, mg, basis, protonic_basis ); + #ifdef GAUXC_ENABLE_DEVICE + case ExecutionSpace::Device: + GAUXC_GENERIC_EXCEPTION("2 basis with GPU NYI"); + //using device_factory = LoadBalancerDeviceFactory; + //return device_factory::get_shared_instance(kernel_name_, + // rt, mol, mg, basis ); + #endif + default: + GAUXC_GENERIC_EXCEPTION("Unrecognized Execution Space"); + } + + +} + +LoadBalancer LoadBalancerFactory::get_instance( + const RuntimeEnvironment& rt, + const Molecule& mol, const MolGrid& mg, const BasisSet& basis, + const BasisSet& protonic_basis +) { + + auto ptr = get_shared_instance(rt, mol, mg, basis, protonic_basis); + return LoadBalancer(std::move(*ptr)); + +} } diff --git a/src/load_balancer/load_balancer_impl.cxx b/src/load_balancer/load_balancer_impl.cxx index 06dbbd19..ce1722e0 100644 --- a/src/load_balancer/load_balancer_impl.cxx +++ b/src/load_balancer/load_balancer_impl.cxx @@ -21,6 +21,17 @@ LoadBalancerImpl::LoadBalancerImpl( const RuntimeEnvironment& rt, const Molecule } +LoadBalancerImpl::LoadBalancerImpl(const RuntimeEnvironment& rt, const Molecule& mol, + const MolGrid& mg, const basis_type& basis, const basis_type& protonic_basis, + std::shared_ptr molmeta ) : + LoadBalancerImpl(rt, mol, mg, basis, molmeta) { + + // Unique initializations for the protonic basis + protonic_basis_ = std::make_shared(protonic_basis); + protonic_basis_map_ = std::make_shared(*protonic_basis_, mol); + +} + LoadBalancerImpl::LoadBalancerImpl( const RuntimeEnvironment& rt, const Molecule& mol, const MolGrid& mg, const basis_type& basis, const MolMeta& molmeta ) : LoadBalancerImpl( rt, mol, mg, basis, std::make_shared(molmeta) ) { } @@ -29,6 +40,13 @@ LoadBalancerImpl::LoadBalancerImpl( const RuntimeEnvironment& rt, const Molecule const MolGrid& mg, const basis_type& basis ) : LoadBalancerImpl( rt, mol, mg, basis, std::make_shared(mol) ) { } +LoadBalancerImpl::LoadBalancerImpl( const RuntimeEnvironment& rt, const Molecule& mol, + const MolGrid& mg, const basis_type& basis, const basis_type& protonic_basis ) : + LoadBalancerImpl( rt, mol, mg, basis, protonic_basis, std::make_shared(mol) ) { } + +LoadBalancerImpl::LoadBalancerImpl( const RuntimeEnvironment& rt, const Molecule& mol, + const MolGrid& mg, const basis_type& basis, const basis_type& protonic_basis, const MolMeta& molmeta ) : + LoadBalancerImpl( rt, mol, mg, basis, protonic_basis, std::make_shared(molmeta) ) { } LoadBalancerImpl::LoadBalancerImpl( const LoadBalancerImpl& ) = default; LoadBalancerImpl::LoadBalancerImpl( LoadBalancerImpl&& ) noexcept = default; @@ -120,6 +138,18 @@ const LoadBalancerImpl::shell_pair_type& LoadBalancerImpl::shell_pairs() { return *shell_pairs_; } +const LoadBalancerImpl::basis_type& LoadBalancerImpl::protonic_basis() const { + if(!protonic_basis_) + GAUXC_GENERIC_EXCEPTION("No Protonic Basis Found in LoadBalancerImpl::protonic_basis()"); + return *protonic_basis_; +} + +const LoadBalancerImpl::basis_map_type& LoadBalancerImpl::protonic_basis_map() const { + if(!protonic_basis_map_) + GAUXC_GENERIC_EXCEPTION("No Protonic Basis Found in LoadBalancerImpl::protonic_basis_map()"); + return *protonic_basis_map_; +} + const RuntimeEnvironment& LoadBalancerImpl::runtime() const { return runtime_; } diff --git a/src/load_balancer/load_balancer_impl.hpp b/src/load_balancer/load_balancer_impl.hpp index 566279a1..d2c68086 100644 --- a/src/load_balancer/load_balancer_impl.hpp +++ b/src/load_balancer/load_balancer_impl.hpp @@ -29,6 +29,9 @@ class LoadBalancerImpl { std::shared_ptr molmeta_; std::shared_ptr basis_map_; std::shared_ptr shell_pairs_; + // Protonic basis information if doing Nuclear-Electronic Orbital (NEO) theory + std::shared_ptr protonic_basis_; + std::shared_ptr protonic_basis_map_; std::vector< XCTask > local_tasks_; @@ -48,6 +51,13 @@ class LoadBalancerImpl { const basis_type&, const MolMeta& ); LoadBalancerImpl( const RuntimeEnvironment&, const Molecule&, const MolGrid& mg, const basis_type&, std::shared_ptr ); + + LoadBalancerImpl( const RuntimeEnvironment&, const Molecule&, const MolGrid& mg, + const basis_type&, const basis_type& ); + LoadBalancerImpl( const RuntimeEnvironment&, const Molecule&, const MolGrid& mg, + const basis_type&, const basis_type&, const MolMeta& ); + LoadBalancerImpl( const RuntimeEnvironment&, const Molecule&, const MolGrid& mg, + const basis_type&, const basis_type&, std::shared_ptr ); LoadBalancerImpl( const LoadBalancerImpl& ); LoadBalancerImpl( LoadBalancerImpl&& ) noexcept; @@ -74,6 +84,8 @@ class LoadBalancerImpl { const basis_map_type& basis_map() const; const shell_pair_type& shell_pairs() const; const shell_pair_type& shell_pairs(); + const basis_type& protonic_basis() const; + const basis_map_type& protonic_basis_map() const; LoadBalancerState& state(); diff --git a/src/load_balancer/rebalance.cxx b/src/load_balancer/rebalance.cxx index c66086dd..fe885d97 100644 --- a/src/load_balancer/rebalance.cxx +++ b/src/load_balancer/rebalance.cxx @@ -177,6 +177,8 @@ auto rebalance(TaskIterator begin, TaskIterator end, const CostFunctor& cost, MP mpi_buffer.pack(task.weights); mpi_buffer.pack(task.bfn_screening.shell_list); mpi_buffer.pack(task.bfn_screening.nbe); + mpi_buffer.pack(task.protonic_bfn_screening.shell_list); + mpi_buffer.pack(task.protonic_bfn_screening.nbe); mpi_buffer.pack(task.cou_screening.shell_list); mpi_buffer.pack(task.cou_screening.shell_pair_list); mpi_buffer.pack(task.cou_screening.shell_pair_idx_list); @@ -296,6 +298,8 @@ auto rebalance(TaskIterator begin, TaskIterator end, const CostFunctor& cost, MP mpi_buffer.unpack(task.weights); mpi_buffer.unpack(task.bfn_screening.shell_list); mpi_buffer.unpack(task.bfn_screening.nbe); + mpi_buffer.unpack(task.protonic_bfn_screening.shell_list); + mpi_buffer.unpack(task.protonic_bfn_screening.nbe); mpi_buffer.unpack(task.cou_screening.shell_list); mpi_buffer.unpack(task.cou_screening.shell_pair_list); mpi_buffer.unpack(task.cou_screening.shell_pair_idx_list); diff --git a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp index 30403175..4ba0e9ad 100644 --- a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp +++ b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator.hpp @@ -68,7 +68,26 @@ class IncoreReplicatedXCDeviceIntegrator : value_type* VXCy, int64_t ldvxcy, value_type* VXCx, int64_t ldvxcx, value_type* EXC, const IntegratorSettingsXC& settings ) override; - + + void neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) override; + + void neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) override; void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, int64_t ldp, value_type* EXC_GRAD ) override; diff --git a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp index c8cae61d..ef6c6592 100644 --- a/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp +++ b/src/xc_integrator/replicated/device/incore_replicated_xc_device_integrator_exc_vxc.hpp @@ -214,6 +214,38 @@ void IncoreReplicatedXCDeviceIntegrator:: } +template +void IncoreReplicatedXCDeviceIntegrator:: + neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) { + GauXC::util::unused(elec_m,elec_n,prot_m,prot_n,elec_Ps,elec_ldps,prot_Ps,prot_ldps,prot_Pz,prot_ldpz, + elec_VXCs,elec_ldvxcs,prot_VXCs,prot_ldvxcs,prot_VXCz,prot_ldvxcz,elec_EXC,prot_EXC,settings); + GAUXC_GENERIC_EXCEPTION("NEO-RKS NOT YET IMPLEMENTED FOR DEVICE"); +} + +template +void IncoreReplicatedXCDeviceIntegrator:: + neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) { + GauXC::util::unused(elec_m,elec_n,prot_m,prot_n,elec_Ps,elec_ldps,elec_Pz,elec_ldpz,prot_Ps,prot_ldps,prot_Pz,prot_ldpz, + elec_VXCs,elec_ldvxcs,elec_VXCz,elec_ldvxcz,prot_VXCs,prot_ldvxcs,prot_VXCz,prot_ldvxcz,elec_EXC,prot_EXC,settings); + GAUXC_GENERIC_EXCEPTION("NEO-UKS NOT YET IMPLEMENTED FOR DEVICE"); +} + template void IncoreReplicatedXCDeviceIntegrator:: exc_vxc_local_work_( const basis_type& basis, const value_type* Ps, int64_t ldps, diff --git a/src/xc_integrator/replicated/device/replicated_xc_device_integrator.cxx b/src/xc_integrator/replicated/device/replicated_xc_device_integrator.cxx index 605de1c6..c76e3e76 100644 --- a/src/xc_integrator/replicated/device/replicated_xc_device_integrator.cxx +++ b/src/xc_integrator/replicated/device/replicated_xc_device_integrator.cxx @@ -52,6 +52,43 @@ typename ReplicatedXCDeviceIntegratorFactory::ptr_return_t GAUXC_GENERIC_EXCEPTION("Integrator Kernel " + integrator_kernel + " Not Recognized"); +} + + +template +typename ReplicatedXCDeviceIntegratorFactory::ptr_return_t + ReplicatedXCDeviceIntegratorFactory::make_integrator_impl( + std::string integrator_kernel, + std::shared_ptr func, + std::shared_ptr epcfunc, + std::shared_ptr lb, + std::unique_ptr&& lwd, + std::shared_ptr rd + ) { + + // Make sure that the LWD is a valid LocalDeviceWorkDriver + if(not dynamic_cast(lwd.get())) { + GAUXC_GENERIC_EXCEPTION("Passed LWD Not valid for Device ExSpace"); + } + + std::transform(integrator_kernel.begin(), integrator_kernel.end(), + integrator_kernel.begin(), ::toupper ); + + if( integrator_kernel == "DEFAULT" ) integrator_kernel = "INCORE"; + + if( integrator_kernel == "INCORE" ) + return std::make_unique>( + func, epcfunc, lb, std::move(lwd), rd + ); + else if( integrator_kernel == "SHELLBATCHED" ) + return std::make_unique>( + func, epcfunc, lb, std::move(lwd), rd + ); + + else + GAUXC_GENERIC_EXCEPTION("INTEGRATOR KERNEL " + integrator_kernel + " NOT RECOGNIZED"); + + } template struct ReplicatedXCDeviceIntegratorFactory; diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.cxx b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.cxx index 731eaf84..8e659421 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.cxx +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.cxx @@ -8,6 +8,7 @@ #include "reference_replicated_xc_host_integrator_integrate_den.hpp" #include "reference_replicated_xc_host_integrator_exc.hpp" #include "reference_replicated_xc_host_integrator_exc_vxc.hpp" +#include "reference_replicated_xc_host_integrator_exc_vxc_neo.hpp" #include "reference_replicated_xc_host_integrator_exc_grad.hpp" #include "reference_replicated_xc_host_integrator_exx.hpp" diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp index bf5d4d61..3383189c 100644 --- a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator.hpp @@ -70,6 +70,25 @@ class ReferenceReplicatedXCHostIntegrator : value_type* VXCx, int64_t ldvxcx, value_type* EXC, const IntegratorSettingsXC& ks_settings ) override; + void neo_eval_exc_vxc_( int64_t m1, int64_t n1, int64_t m2, int64_t n2, + const value_type* Ps, int64_t ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* VXCs, int64_t ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* EXC1, value_type* prot_EXC, const IntegratorSettingsXC& ks_settings ) override; + + void neo_eval_exc_vxc_( int64_t m1, int64_t n1, int64_t m2, int64_t n2, + const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* EXC1, value_type* prot_EXC, const IntegratorSettingsXC& ks_settings ) override; /// RKS EXC Gradient void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, @@ -97,6 +116,19 @@ class ReferenceReplicatedXCHostIntegrator : value_type* VXCx, int64_t ldvxcx, value_type* EXC, value_type *N_EL, const IntegratorSettingsXC& ks_settings, task_iterator task_begin, task_iterator task_end ); + + void neo_exc_vxc_local_work_( const value_type* Ps, int64_t ldps, + const value_type* Pz, int64_t ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* VXCs, int64_t ldvxcs, + value_type* VXCz, int64_t ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* EXC1, value_type* prot_EXC, + value_type *N_EL, value_type *N_PROT, + const IntegratorSettingsXC& ks_settings, + task_iterator task_begin, task_iterator task_end ); // Implemetation details of exc_grad void exc_grad_local_work_( const value_type* P, int64_t ldp, value_type* EXC_GRAD ); diff --git a/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc_neo.hpp b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc_neo.hpp new file mode 100644 index 00000000..6aa67471 --- /dev/null +++ b/src/xc_integrator/replicated/host/reference_replicated_xc_host_integrator_exc_vxc_neo.hpp @@ -0,0 +1,620 @@ +/** + * GauXC Copyright (c) 2020-2023, The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of + * any required approvals from the U.S. Dept. of Energy). All rights reserved. + * + * See LICENSE.txt for details + */ +#pragma once + +#include "reference_replicated_xc_host_integrator.hpp" +#include "integrator_util/integrator_common.hpp" +#include "host/local_host_work_driver.hpp" +#include "host/blas.hpp" +#include +#include +#include + +namespace GauXC { +namespace detail { + +template +void ReferenceReplicatedXCHostIntegrator:: + neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings){ + + const auto& elec_basis = this->load_balancer_->basis(); + const auto& prot_basis = this->load_balancer_->protonic_basis(); + + // Check that P / VXC are sane + const int64_t elec_nbf = elec_basis.nbf(); + const int64_t prot_nbf = prot_basis.nbf(); + + if( elec_m != elec_n | prot_m != prot_n) + GAUXC_GENERIC_EXCEPTION("P/VXC Must Be Square"); + if( elec_m != elec_nbf | prot_m != prot_nbf) + GAUXC_GENERIC_EXCEPTION("P/VXC Must Have Same Dimension as Basis"); + if( elec_ldps < elec_nbf | prot_ldps < prot_nbf | prot_ldpz < prot_nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDP"); + if( elec_ldvxcs < elec_nbf | prot_ldvxcs < prot_nbf | prot_ldvxcz < prot_nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDVXC"); + + // Get Tasks + auto& tasks = this->load_balancer_->get_tasks(); + + // Temporary electron count to judge integrator accuracy + value_type N_EL; + // Temporary proton count to judge integrator accuracy + value_type N_PROT; + + // Compute Local contributions to EXC / VXC + this->timer_.time_op("XCIntegrator.LocalWork", [&](){ + neo_exc_vxc_local_work_( elec_Ps, elec_ldps, + nullptr, 0, + prot_Ps, prot_ldps, + prot_Pz, prot_ldpz, + elec_VXCs, elec_ldvxcs, + nullptr, 0, + prot_VXCs, prot_ldvxcs, + prot_VXCz, prot_ldvxcz, + elec_EXC, prot_EXC, + &N_EL, &N_PROT, settings, + tasks.begin(), tasks.end() ); + }); + + + // Reduce Results + this->timer_.time_op("XCIntegrator.Allreduce", [&](){ + + if( not this->reduction_driver_->takes_host_memory() ) + GAUXC_GENERIC_EXCEPTION("This Module Only Works With Host Reductions"); + + this->reduction_driver_->allreduce_inplace( elec_VXCs, elec_nbf*elec_nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( prot_VXCs, prot_nbf*prot_nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( prot_VXCz, prot_nbf*prot_nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( elec_EXC, 1, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( prot_EXC, 1, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( &N_EL, 1, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( &N_PROT, 1, ReductionOp::Sum ); + + }); + +} + +template +void ReferenceReplicatedXCHostIntegrator:: + neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ){ + + const auto& elec_basis = this->load_balancer_->basis(); + const auto& prot_basis = this->load_balancer_->protonic_basis(); + + // Check that P / VXC are sane + const int64_t elec_nbf = elec_basis.nbf(); + const int64_t prot_nbf = prot_basis.nbf(); + + if( elec_m != elec_n | prot_m != prot_n) + GAUXC_GENERIC_EXCEPTION("P/VXC Must Be Square"); + if( elec_m != elec_nbf | prot_m != prot_nbf) + GAUXC_GENERIC_EXCEPTION("P/VXC Must Have Same Dimension as Basis"); + if( elec_ldps < elec_nbf | elec_ldpz < elec_nbf | prot_ldps < prot_nbf | prot_ldpz < prot_nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDP"); + if( elec_ldvxcs < elec_nbf | elec_ldvxcz < elec_nbf | prot_ldvxcs < prot_nbf | prot_ldvxcz < prot_nbf ) + GAUXC_GENERIC_EXCEPTION("Invalid LDVXC"); + + // Get Tasks + auto& tasks = this->load_balancer_->get_tasks(); + + // Temporary electron count to judge integrator accuracy + value_type N_EL; + // Temporary proton count to judge integrator accuracy + value_type N_PROT; + + // Compute Local contributions to EXC / VXC + this->timer_.time_op("XCIntegrator.LocalWork", [&](){ + neo_exc_vxc_local_work_( elec_Ps, elec_ldps, + elec_Pz, elec_ldpz, + prot_Ps, prot_ldps, + prot_Pz, prot_ldpz, + elec_VXCs, elec_ldvxcs, + elec_VXCz, elec_ldvxcz, + prot_VXCs, prot_ldvxcs, + prot_VXCz, prot_ldvxcz, + elec_EXC, prot_EXC, + &N_EL, &N_PROT, settings, + tasks.begin(), tasks.end() ); + }); + + + // Reduce Results + this->timer_.time_op("XCIntegrator.Allreduce", [&](){ + + if( not this->reduction_driver_->takes_host_memory() ) + GAUXC_GENERIC_EXCEPTION("This Module Only Works With Host Reductions"); + + this->reduction_driver_->allreduce_inplace( elec_VXCs, elec_nbf*elec_nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( elec_VXCz, elec_nbf*elec_nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( prot_VXCs, prot_nbf*prot_nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( prot_VXCz, prot_nbf*prot_nbf, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( elec_EXC, 1, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( prot_EXC, 1, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( &N_EL, 1, ReductionOp::Sum ); + this->reduction_driver_->allreduce_inplace( &N_PROT, 1, ReductionOp::Sum ); + + }); + +} + + +template +void ReferenceReplicatedXCHostIntegrator:: + neo_exc_vxc_local_work_( const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, + value_type *N_EL, value_type *N_PROT, + const IntegratorSettingsXC& settings, + task_iterator task_begin, task_iterator task_end) { + + // Determine is electronic subsystem is RKS or UKS + const bool is_uks = (elec_Pz != nullptr) and (elec_VXCz != nullptr); + const bool is_rks = not is_uks; + // TODO: Integrate with GKS + // TODO: Integrate with mGGA + + // Cast LWD to LocalHostWorkDriver + auto* lwd = dynamic_cast(this->local_work_driver_.get()); + + // Setup Aliases + const auto& func = *this->func_; + const auto& epcfunc = *this->epcfunc_; + const auto& basis = this->load_balancer_->basis(); + const auto& mol = this->load_balancer_->molecule(); + + // Get basis map + BasisSetMap basis_map(basis,mol); + + const int32_t nbf = basis.nbf(); + + // Get Protonic basis information + const auto& protonic_basis = this->load_balancer_->protonic_basis(); + BasisSetMap protonic_basis_map(protonic_basis,mol); + const int32_t protonic_nbf = protonic_basis.nbf(); + + // Sort tasks on size (XXX: maybe doesnt matter?) + auto task_comparator = []( const XCTask& a, const XCTask& b ) { + return (a.points.size() * a.bfn_screening.nbe) > (b.points.size() * b.bfn_screening.nbe); + }; + + auto& tasks = this->load_balancer_->get_tasks(); + std::sort( task_begin, task_end, task_comparator ); + + + // Check that Partition Weights have been calculated + auto& lb_state = this->load_balancer_->state(); + if( not lb_state.modified_weights_are_stored ) { + GAUXC_GENERIC_EXCEPTION("Weights Have Not Beed Modified"); + } + + + // Zero out integrands + + std::fill(elec_VXCs, elec_VXCs + nbf * elec_ldvxcs, 0.0); + if(is_uks) std::fill(elec_VXCz, elec_VXCz + nbf * elec_ldvxcz, 0.0); + + std::fill(prot_VXCs, prot_VXCs + protonic_nbf * prot_ldvxcs, 0.0); + std::fill(prot_VXCz, prot_VXCz + protonic_nbf * prot_ldvxcz, 0.0); + + *elec_EXC = 0.; + *prot_EXC = 0.; + + double NEL_WORK = 0.0; + double NPROT_WORK = 0.0; + double EXC_WORK = 0.0; + double EPC_WORK = 0.0; + + // Loop over tasks + const size_t ntasks = tasks.size(); + + #pragma omp parallel + { + + XCHostData host_data; // Thread local host data + + #pragma omp for schedule(dynamic) + for( size_t iT = 0; iT < ntasks; ++iT ) { + + //std::cout << iT << "/" << ntasks << std::endl; + //printf("%lu / %lu\n", iT, ntasks); + // Alias current task + const auto& task = tasks[iT]; + + // Get tasks constants + const int32_t npts = task.points.size(); + const int32_t nbe = task.bfn_screening.nbe; + const int32_t nshells = task.bfn_screening.shell_list.size(); + + const auto* points = task.points.data()->data(); + const auto* weights = task.weights.data(); + const int32_t* shell_list = task.bfn_screening.shell_list.data(); + + const int32_t protonic_nbe = task.protonic_bfn_screening.nbe; + const int32_t protonic_nshells = task.protonic_bfn_screening.shell_list.size(); + const int32_t* protonic_shell_list = task.protonic_bfn_screening.shell_list.data(); + + // Check if there's protonic shells to evaluate. If not, only electronic EXC/VXC will be calculated + bool evalProtonic = (protonic_nshells != 0); + + // Allocate enough memory for batch + + const size_t spin_dim_scal = is_rks ? 1 : 2; + + // Things that every calc needs + + + // Use same scratch for both electronic and protonic + const int32_t scr_dim = std::max(nbe, protonic_nbe); + host_data.nbe_scr .resize(scr_dim * scr_dim); + + //----------------------Start Electronic System Setup------------------------ + host_data.zmat .resize(npts * nbe * spin_dim_scal); + host_data.eps .resize(npts); + host_data.vrho .resize(npts * spin_dim_scal); + + // LDA data requirements + if( func.is_lda() ){ + host_data.basis_eval .resize( npts * nbe ); + host_data.den_scr .resize( npts * spin_dim_scal); + } + + // GGA data requirements + const size_t gga_dim_scal = is_rks ? 1 : 3; + if( func.is_gga() ){ + host_data.basis_eval .resize( 4 * npts * nbe ); + host_data.den_scr .resize( spin_dim_scal * 4 * npts ); + host_data.gamma .resize( gga_dim_scal * npts ); + host_data.vgamma .resize( gga_dim_scal * npts ); + } + + // Alias/Partition out scratch memory + auto* basis_eval = host_data.basis_eval.data(); + auto* den_eval = host_data.den_scr.data(); + auto* nbe_scr = host_data.nbe_scr.data(); + auto* zmat = host_data.zmat.data(); + + decltype(zmat) zmat_z = nullptr; + if(!is_rks) { + zmat_z = zmat + nbe * npts; + } + + auto* eps = host_data.eps.data(); + auto* gamma = host_data.gamma.data(); + auto* vrho = host_data.vrho.data(); + auto* vgamma = host_data.vgamma.data(); + + value_type* dbasis_x_eval = nullptr; + value_type* dbasis_y_eval = nullptr; + value_type* dbasis_z_eval = nullptr; + value_type* dden_x_eval = nullptr; + value_type* dden_y_eval = nullptr; + value_type* dden_z_eval = nullptr; + + if( func.is_gga() ) { + dbasis_x_eval = basis_eval + npts * nbe; + dbasis_y_eval = dbasis_x_eval + npts * nbe; + dbasis_z_eval = dbasis_y_eval + npts * nbe; + dden_x_eval = den_eval + spin_dim_scal * npts; + dden_y_eval = dden_x_eval + spin_dim_scal * npts; + dden_z_eval = dden_y_eval + spin_dim_scal * npts; + } + + //----------------------End Electronic System Setup------------------------ + + + //----------------------Start Protonic System Setup------------------------ + // Set Up Memory (assuming UKS) + host_data.epc .resize( npts ); + host_data.protonic_vrho .resize( npts * 2 ); + host_data.protonic_zmat .resize( npts * protonic_nbe * 2 ); + // LDA + host_data.protonic_basis_eval .resize( npts * protonic_nbe ); + host_data.protonic_den_scr .resize( npts * 2); + // Alias/Partition out scratch memory + auto* protonic_basis_eval = host_data.protonic_basis_eval.data(); + auto* protonic_den_eval = host_data.protonic_den_scr.data(); + auto* protonic_zmat = host_data.protonic_zmat.data(); + decltype(protonic_zmat) protonic_zmat_z = protonic_zmat + protonic_nbe * npts; + + auto* epc = host_data.epc.data(); + auto* protonic_vrho = host_data.protonic_vrho.data(); + // No GGA for NEO yet + //----------------------End Protonic System Setup------------------------ + + + //std::cout << "Task: " << iT << "/" << ntasks << std::endl; + //std::cout << "Electronic nbe: " << nbe << std::endl; + //std::cout << "Protonic nbe: " << protonic_nbe << std::endl; + + //----------------------Start Calculating Electronic Density & UV Variable------------------------ + // Get the submatrix map for batch + std::vector< std::array > submat_map; + std::tie(submat_map, std::ignore) = + gen_compressed_submat_map(basis_map, task.bfn_screening.shell_list, nbf, nbf); + + // Evaluate Collocation (+ Grad) + if( func.is_gga() ) + lwd->eval_collocation_gradient( npts, nshells, nbe, points, basis, shell_list, + basis_eval, dbasis_x_eval, dbasis_y_eval, dbasis_z_eval ); + else + lwd->eval_collocation( npts, nshells, nbe, points, basis, shell_list, + basis_eval ); + + // Evaluate X matrix (fac * P * B) -> store in Z + const auto xmat_fac = is_rks ? 2.0 : 1.0; // TODO Fix for spinor RKS input + lwd->eval_xmat( npts, nbf, nbe, submat_map, xmat_fac, elec_Ps, elec_ldps, basis_eval, nbe, + zmat, nbe, nbe_scr ); + + // X matrix for Pz + if(not is_rks) { + lwd->eval_xmat( npts, nbf, nbe, submat_map, 1.0, elec_Pz, elec_ldpz, basis_eval, nbe, + zmat_z, nbe, nbe_scr ); + } + + // Evaluate U and V variables + if( func.is_gga() ) { + if(is_rks) { + lwd->eval_uvvar_gga_rks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, zmat, nbe, den_eval, dden_x_eval, dden_y_eval, dden_z_eval, + gamma ); + } else if(is_uks) { + lwd->eval_uvvar_gga_uks( npts, nbe, basis_eval, dbasis_x_eval, dbasis_y_eval, + dbasis_z_eval, zmat, nbe, zmat_z, nbe, den_eval, dden_x_eval, + dden_y_eval, dden_z_eval, gamma ); + } + } else { + if(is_rks) { + lwd->eval_uvvar_lda_rks( npts, nbe, basis_eval, zmat, nbe, den_eval ); + } else if(is_uks) { + lwd->eval_uvvar_lda_uks( npts, nbe, basis_eval, zmat, nbe, zmat_z, nbe, + den_eval ); + } + } + + // Evaluate XC functional + if( func.is_gga() ) + func.eval_exc_vxc( npts, den_eval, gamma, eps, vrho, vgamma ); + else + func.eval_exc_vxc( npts, den_eval, eps, vrho ); + //----------------------End Calculating Electronic Density & UV Variable------------------------ + + + + + //----------------------Start Calculating Protonic Density & UV Variable------------------------ + std::vector< std::array > protonic_submat_map; + if(evalProtonic){ + std::tie(protonic_submat_map, std::ignore) = + gen_compressed_submat_map(protonic_basis_map, task.protonic_bfn_screening.shell_list, protonic_nbf, protonic_nbf); + + // Evaluate Collocation + lwd->eval_collocation( npts, protonic_nshells, protonic_nbe, points, protonic_basis, protonic_shell_list, + protonic_basis_eval ); + + // Evaluate X matrix (P * B) -> store in Z + lwd->eval_xmat( npts, protonic_nbf, protonic_nbe, protonic_submat_map, 1.0, prot_Ps, prot_ldps, protonic_basis_eval, protonic_nbe, + protonic_zmat, protonic_nbe, nbe_scr ); + lwd->eval_xmat( npts, protonic_nbf, protonic_nbe, protonic_submat_map, 1.0, prot_Pz, prot_ldpz, protonic_basis_eval, protonic_nbe, + protonic_zmat_z, protonic_nbe, nbe_scr ); + + // Evaluate U and V variables + lwd->eval_uvvar_lda_uks( npts, protonic_nbe, protonic_basis_eval, protonic_zmat, protonic_nbe, protonic_zmat_z, + protonic_nbe, protonic_den_eval ); + + // No protonic XC functional. Fill with eps and vrho to be 0.0 + std::fill_n(epc, npts, 0.); + std::fill_n(protonic_vrho, npts*2, 0.); + } + //----------------------End Calculating Protonic Density & UV Variable------------------------ + + + + + //----------------------Start EPC functional Evaluation--------------------------------------- + if(evalProtonic){ + // Prepare for kernal input + for (int32_t iPt = 0; iPt < npts; iPt++ ){ + // Treat total erho as spin-up, treat total prho as spin down + protonic_den_eval[2*iPt+1] = protonic_den_eval[2*iPt] + protonic_den_eval[2*iPt+1]; + protonic_den_eval[2*iPt] = is_rks ? den_eval[iPt] : (den_eval[2*iPt] + den_eval[2*iPt+1]); + } + + // EPC Functional Evaluation (Calling ExchCXX Builtin Function) + epcfunc.eval_exc_vxc( npts, protonic_den_eval, epc, protonic_vrho ); + + // Digest kernal output + for (int32_t iPt = 0; iPt < npts; iPt++ ){ + // assign df/derho + vrho[spin_dim_scal*iPt] += protonic_vrho[2*iPt]; + if(not is_rks) vrho[spin_dim_scal*iPt+1] += protonic_vrho[2*iPt]; + // assign df/dprho + protonic_vrho[2*iPt] = protonic_vrho[2*iPt+1]; + protonic_vrho[2*iPt+1] = 0.0; + // change back protonic density to original state + protonic_den_eval[2*iPt] = protonic_den_eval[2*iPt+1]; + protonic_den_eval[2*iPt+1] = 0.0; + } + } // End if(evalProtonic) + //----------------------End EPC functional Evaluation--------------------------------------- + + + + //----------------------Begin Evaluating Electronic ZMat---------------------------- + // Factor weights into XC results + for( int32_t i = 0; i < npts; ++i ) { + eps[i] *= weights[i]; + epc[i] *= weights[i]; + vrho[spin_dim_scal*i] *= weights[i]; + if(not is_rks) vrho[spin_dim_scal*i+1] *= weights[i]; + } + + if( func.is_gga() ){ + for( int32_t i = 0; i < npts; ++i ) { + vgamma[gga_dim_scal*i] *= weights[i]; + if(not is_rks) { + vgamma[gga_dim_scal*i+1] *= weights[i]; + vgamma[gga_dim_scal*i+2] *= weights[i]; + } + } + } + + // Evaluate Z matrix for VXC + if( func.is_gga() ) { + if(is_rks) { + lwd->eval_zmat_gga_vxc_rks( npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval, + dden_z_eval, zmat, nbe); + } else if(is_uks) { + lwd->eval_zmat_gga_vxc_uks( npts, nbe, vrho, vgamma, basis_eval, dbasis_x_eval, + dbasis_y_eval, dbasis_z_eval, dden_x_eval, dden_y_eval, + dden_z_eval, zmat, nbe, zmat_z, nbe); + } + } else { + if(is_rks) { + lwd->eval_zmat_lda_vxc_rks( npts, nbe, vrho, basis_eval, zmat, nbe ); + } else if(is_uks) { + lwd->eval_zmat_lda_vxc_uks( npts, nbe, vrho, basis_eval, zmat, nbe, zmat_z, nbe ); + } + } + //----------------------End Evaluating Electronic ZMat---------------------------- + + + + + //----------------------Begin Evaluating Protonic ZMat---------------------------- + if(evalProtonic){ + // Factor weights into XC results + for( int32_t i = 0; i < npts; ++i ) { + protonic_vrho[2*i] *= weights[i]; + protonic_vrho[2*i+1] *= weights[i]; + } + + // Evaluate Z matrix for VXC + lwd->eval_zmat_lda_vxc_uks( npts, protonic_nbe, protonic_vrho, protonic_basis_eval, protonic_zmat, protonic_nbe, + protonic_zmat_z, protonic_nbe ); + } + //----------------------End Evaluating Protonic ZMat---------------------------- + + // Scalar integrations + double NEL_local = 0.0; + double NPROT_local = 0.0; + double EXC_local = 0.0; + double EPC_local = 0.0; + for( int32_t i = 0; i < npts; ++i ) { + const auto den = is_rks ? den_eval[i] : (den_eval[2*i] + den_eval[2*i+1]); + NEL_local += weights[i] * den; + EXC_local += eps[i] * den; + EPC_local += epc[i] * den;; + } + // Protonic XC (EPC) + if(evalProtonic){ + for( int32_t i = 0; i < npts; ++i ) { + const auto protonic_den = protonic_den_eval[2*i] + protonic_den_eval[2*i+1]; + NPROT_local += weights[i] * protonic_den; + EPC_local += epc[i] * protonic_den; + } + } + + // Atomic updates + #pragma omp atomic + NEL_WORK += NEL_local; + #pragma omp atomic + NPROT_WORK += NPROT_local; + #pragma omp atomic + EXC_WORK += EXC_local; + #pragma omp atomic + EPC_WORK += EPC_local; + + + // Incremeta LT of VXC + { + + // Increment Electronic XC (VXC+EPC) + lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat, nbe, elec_VXCs, elec_ldvxcs, nbe_scr ); + if(not is_rks) + lwd->inc_vxc( npts, nbf, nbe, basis_eval, submat_map, zmat_z, nbe, elec_VXCz, elec_ldvxcz, nbe_scr ); + + // Increment Protonic XC (EPC) + // Scalar integrations + if(evalProtonic){ + lwd->inc_vxc( npts, protonic_nbf, protonic_nbe, protonic_basis_eval, protonic_submat_map, + protonic_zmat, protonic_nbe, prot_VXCs, prot_ldvxcs, nbe_scr ); + lwd->inc_vxc( npts, protonic_nbf, protonic_nbe, protonic_basis_eval, protonic_submat_map, + protonic_zmat_z, protonic_nbe, prot_VXCz, prot_ldvxcz, nbe_scr ); + } + + } + + } // Loop over tasks + + } // End OpenMP region + + // Set scalar return values + *N_EL = NEL_WORK; + *N_PROT = NPROT_WORK; + *elec_EXC = EXC_WORK + EPC_WORK; + *prot_EXC = EPC_WORK; + + //std::cout << "N_EL = " << std::setprecision(12) << std::scientific << *N_EL << std::endl; + //std::cout << "N_PROT = " << std::setprecision(12) << std::scientific << *N_PROT << std::endl; + //std::cout << "elec_EXC = " << std::setprecision(12) << std::scientific << *elec_EXC << std::endl; + //std::cout << "prot_EXC = " << std::setprecision(12) << std::scientific << *prot_EXC << std::endl; + + // Symmetrize Electronic VXC + for( int32_t j = 0; j < nbf; ++j ) + for( int32_t i = j+1; i < nbf; ++i ) + elec_VXCs[ j + i*elec_ldvxcs ] = elec_VXCs[ i + j*elec_ldvxcs ]; + if(not is_rks) { + for( int32_t j = 0; j < nbf; ++j ) { + for( int32_t i = j+1; i < nbf; ++i ) { + elec_VXCz[ j + i*elec_ldvxcz ] = elec_VXCz[ i + j*elec_ldvxcz ]; + } + } + } + + // Symmetrize Protonic VXC + for( int32_t j = 0; j < protonic_nbf; ++j ){ + for( int32_t i = j+1; i < protonic_nbf; ++i ){ + prot_VXCs[ j + i*prot_ldvxcs ] = prot_VXCs[ i + j*prot_ldvxcs ]; + prot_VXCz[ j + i*prot_ldvxcz ] = prot_VXCz[ i + j*prot_ldvxcz ]; + } + } + + + + + + +} + + +} +} diff --git a/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx b/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx index 13953dd4..274e6bee 100644 --- a/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx +++ b/src/xc_integrator/replicated/host/replicated_xc_host_integrator.cxx @@ -54,6 +54,46 @@ typename ReplicatedXCHostIntegratorFactory::ptr_return_t return nullptr; +} + + +template +typename ReplicatedXCHostIntegratorFactory::ptr_return_t + ReplicatedXCHostIntegratorFactory::make_integrator_impl( + std::string integrator_kernel, + std::shared_ptr func, + std::shared_ptr epcfunc, + std::shared_ptr lb, + std::unique_ptr&& lwd, + std::shared_ptr rd + ) { + + // Make sure that the LWD is a valid LocalHostWorkDriver + if(not dynamic_cast(lwd.get())) { + GAUXC_GENERIC_EXCEPTION("Passed LWD Not valid for Host ExSpace"); + } + + std::transform(integrator_kernel.begin(), integrator_kernel.end(), + integrator_kernel.begin(), ::toupper ); + + if( integrator_kernel == "DEFAULT" ) integrator_kernel = "REFERENCE"; + + if( integrator_kernel == "REFERENCE" ) + return std::make_unique>( + func, epcfunc, lb, std::move(lwd), rd + ); + + else if( integrator_kernel == "SHELLBATCHED" ) + return std::make_unique>( + func, epcfunc, lb, std::move(lwd), rd + ); + + else + GAUXC_GENERIC_EXCEPTION("INTEGRATOR KERNEL: " + integrator_kernel + " NOT RECOGNIZED"); + + return nullptr; + + } template class ReplicatedXCHostIntegratorFactory; diff --git a/src/xc_integrator/replicated/host/xc_host_data.hpp b/src/xc_integrator/replicated/host/xc_host_data.hpp index 5649d523..decebb43 100644 --- a/src/xc_integrator/replicated/host/xc_host_data.hpp +++ b/src/xc_integrator/replicated/host/xc_host_data.hpp @@ -30,6 +30,14 @@ struct XCHostData { std::vector nbe_scr; std::vector den_scr; std::vector basis_eval; + + std::vector epc; + std::vector protonic_vrho; + + std::vector protonic_zmat; + std::vector protonic_gmat; + std::vector protonic_den_scr; + std::vector protonic_basis_eval; inline XCHostData() {} diff --git a/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx b/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx index b1d50523..1a79292e 100644 --- a/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx +++ b/src/xc_integrator/replicated/replicated_xc_integrator_impl.cxx @@ -19,6 +19,17 @@ ReplicatedXCIntegratorImpl:: func_(func), load_balancer_(lb), local_work_driver_(std::move(lwd)), reduction_driver_(rd){ } +template +ReplicatedXCIntegratorImpl:: + ReplicatedXCIntegratorImpl( std::shared_ptr< functional_type > func, + std::shared_ptr< functional_type > epcfunc, + std::shared_ptr< LoadBalancer > lb, + std::unique_ptr< LocalWorkDriver >&& lwd, + std::shared_ptr< ReductionDriver > rd) : + ReplicatedXCIntegratorImpl(func, lb, std::move(lwd), rd) { + epcfunc_ = epcfunc; +} + template ReplicatedXCIntegratorImpl:: ~ReplicatedXCIntegratorImpl() noexcept = default; @@ -81,7 +92,7 @@ void ReplicatedXCIntegratorImpl:: int64_t ldpz, value_type* VXCs, int64_t ldvxcs, value_type* VXCz, int64_t ldvxcz, - value_type* EXC, const IntegratorSettingsXC& ks_settings) { + value_type* EXC, const IntegratorSettingsXC& ks_settings ) { eval_exc_vxc_(m,n,Ps,ldps, Pz,ldpz, @@ -117,6 +128,56 @@ void ReplicatedXCIntegratorImpl:: } +template +void ReplicatedXCIntegratorImpl:: + neo_eval_exc_vxc( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, + const IntegratorSettingsXC& ks_settings ){ + + neo_eval_exc_vxc_(elec_m,elec_n,prot_m,prot_n, + elec_Ps, elec_ldps, + prot_Ps, prot_ldps, + prot_Pz, prot_ldpz, + elec_VXCs,elec_ldvxcs, + prot_VXCs,prot_ldvxcs, + prot_VXCz,prot_ldvxcz, + elec_EXC, prot_EXC, ks_settings); + +} + +template +void ReplicatedXCIntegratorImpl:: + neo_eval_exc_vxc( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, + const IntegratorSettingsXC& ks_settings ){ + + neo_eval_exc_vxc_(elec_m,elec_n,prot_m,prot_n, + elec_Ps, elec_ldps, + elec_Pz, elec_ldpz, + prot_Ps, prot_ldps, + prot_Pz, prot_ldpz, + elec_VXCs,elec_ldvxcs, + elec_VXCz,elec_ldvxcz, + prot_VXCs,prot_ldvxcs, + prot_VXCz,prot_ldvxcz, + elec_EXC, prot_EXC, ks_settings); + +} + template void ReplicatedXCIntegratorImpl:: eval_exc_grad( int64_t m, int64_t n, const value_type* P, diff --git a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator.hpp b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator.hpp index c6201a73..aaba9e16 100644 --- a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator.hpp +++ b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator.hpp @@ -81,6 +81,25 @@ class ShellBatchedReplicatedXCIntegrator : value_type* VXCx, int64_t ldvxcx, value_type* EXC, const IntegratorSettingsXC& ks_settings ) override; + void neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) override; + + void neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) override; /// RKS EXC Gradient void eval_exc_grad_( int64_t m, int64_t n, const value_type* P, diff --git a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_vxc.hpp b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_vxc.hpp index 4b56692b..9646a343 100644 --- a/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_vxc.hpp +++ b/src/xc_integrator/shell_batched/shell_batched_replicated_xc_integrator_exc_vxc.hpp @@ -149,6 +149,45 @@ void ShellBatchedReplicatedXCIntegrator +void ShellBatchedReplicatedXCIntegrator:: + neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) { + + + GauXC::util::unused(elec_m, elec_n, prot_m, prot_n, elec_Ps, elec_ldps, + prot_Ps, prot_ldps, prot_Pz, prot_ldpz, elec_VXCs, elec_ldvxcs, + prot_VXCs, prot_ldvxcs, prot_VXCz, prot_ldvxcz, elec_EXC, prot_EXC, settings); + GAUXC_GENERIC_EXCEPTION("NEO-UKS ShellBatched Not Yet Implemented"); + +} + +template +void ShellBatchedReplicatedXCIntegrator:: + neo_eval_exc_vxc_( int64_t elec_m, int64_t elec_n, int64_t prot_m, int64_t prot_n, + const value_type* elec_Ps, int64_t elec_ldps, + const value_type* elec_Pz, int64_t elec_ldpz, + const value_type* prot_Ps, int64_t prot_ldps, + const value_type* prot_Pz, int64_t prot_ldpz, + value_type* elec_VXCs, int64_t elec_ldvxcs, + value_type* elec_VXCz, int64_t elec_ldvxcz, + value_type* prot_VXCs, int64_t prot_ldvxcs, + value_type* prot_VXCz, int64_t prot_ldvxcz, + value_type* elec_EXC, value_type* prot_EXC, const IntegratorSettingsXC& settings ) { + + GauXC::util::unused(elec_m, elec_n, prot_m, prot_n, elec_Ps, elec_ldps, elec_Pz, elec_ldpz, + prot_Ps, prot_ldps, prot_Pz, prot_ldpz, elec_VXCs, elec_ldvxcs, elec_VXCz, elec_ldvxcz, + prot_VXCs, prot_ldvxcs, prot_VXCz, prot_ldvxcz, elec_EXC, prot_EXC, settings); + GAUXC_GENERIC_EXCEPTION("NEO-UKS ShellBatched Not Yet Implemented"); + +} + template void ShellBatchedReplicatedXCIntegrator:: exc_vxc_local_work_( const basis_type& basis, diff --git a/tests/ini_input.hpp b/tests/ini_input.hpp index bd0de809..82796815 100644 --- a/tests/ini_input.hpp +++ b/tests/ini_input.hpp @@ -23,9 +23,8 @@ * \param [in/out] s std::string to be trimmed */ static inline std::string& trim_left(std::string &s) { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), - std::not1(std::ptr_fun(std::isspace)))); - return s; + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) {return !std::isspace(c);})); + return s; }; // trim_left @@ -35,9 +34,8 @@ static inline std::string& trim_left(std::string &s) { * \param [in/out] s std::string to be trimmed */ static inline std::string& trim_right(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), - std::not1(std::ptr_fun(std::isspace))).base(), s.end()); - return s; + s.erase(std::find_if(s.rbegin(), s.rend(), [](int c) {return !std::isspace(c);}).base(), s.end()); + return s; }; // trim_right diff --git a/tests/ref_data/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf.hdf5 b/tests/ref_data/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf.hdf5 new file mode 100644 index 00000000..dfa79658 Binary files /dev/null and b/tests/ref_data/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf.hdf5 differ diff --git a/tests/ref_data/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf_uks.hdf5 b/tests/ref_data/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf_uks.hdf5 new file mode 100644 index 00000000..08d65f7e Binary files /dev/null and b/tests/ref_data/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf_uks.hdf5 differ diff --git a/tests/ref_data/coh2_blyp_epc17-2_sto-3g_protsp_ssf.hdf5 b/tests/ref_data/coh2_blyp_epc17-2_sto-3g_protsp_ssf.hdf5 new file mode 100644 index 00000000..52c934fc Binary files /dev/null and b/tests/ref_data/coh2_blyp_epc17-2_sto-3g_protsp_ssf.hdf5 differ diff --git a/tests/ref_data/coh2_blyp_epc18-2_cc-pvdz_pb4d_ssf.hdf5 b/tests/ref_data/coh2_blyp_epc18-2_cc-pvdz_pb4d_ssf.hdf5 new file mode 100644 index 00000000..f1dfb03c Binary files /dev/null and b/tests/ref_data/coh2_blyp_epc18-2_cc-pvdz_pb4d_ssf.hdf5 differ diff --git a/tests/xc_integrator.cxx b/tests/xc_integrator.cxx index 88527fa8..16a30fd5 100644 --- a/tests/xc_integrator.cxx +++ b/tests/xc_integrator.cxx @@ -22,14 +22,15 @@ using namespace GauXC; void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, std::string reference_file, - functional_type& func, + std::shared_ptr func, PruningScheme pruning_scheme, bool check_grad, bool check_integrate_den, bool check_k, std::string integrator_kernel = "Default", std::string reduction_kernel = "Default", - std::string lwd_kernel = "Default") { + std::string lwd_kernel = "Default", + std::shared_ptr epcfunc = nullptr) { // Read the reference file using matrix_type = Eigen::MatrixXd; @@ -39,6 +40,11 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, double EXC_ref; std::vector EXC_GRAD_ref; bool has_k = false, has_exc_grad = false, rks = true, uks = false, gks = false; + // For NEO-DFT Test: + bool neo = false; + BasisSet protonic_basis; + matrix_type protonic_Ps, protonic_Pz, protonic_VXCs_ref, protonic_VXCz_ref; + double protonic_EXC_ref; { read_hdf5_record( mol, reference_file, "/MOLECULE" ); read_hdf5_record( basis, reference_file, "/BASIS" ); @@ -68,9 +74,15 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, gks=true; } + if (file.exist("/PROTONIC_DENSITY_SCALAR") and file.exist("/PROTONIC_DENSITY_Z")) + neo=true; + + if(neo and !integrator_kernel.compare("ShellBatched")) return; + auto dset = file.getDataSet(den); auto dims = dset.getDimensions(); + P = matrix_type( dims[0], dims[1] ); VXC_ref = matrix_type( dims[0], dims[1] ); if (not rks) { @@ -123,9 +135,39 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, dset = file.getDataSet("/K"); dset.read( K_ref.data() ); } + + if (neo) { + + read_hdf5_record( protonic_basis, reference_file, "/PROTONIC_BASIS" ); + + std::string prot_den_s="/PROTONIC_DENSITY_SCALAR"; + std::string prot_den_z="/PROTONIC_DENSITY_Z"; + std::string prot_vxc_s="/PROTONIC_VXC_SCALAR"; + std::string prot_vxc_z="/PROTONIC_VXC_Z"; + + auto protonic_dset = file.getDataSet(prot_den_s); + auto protonic_dims = protonic_dset.getDimensions(); + + protonic_Ps = matrix_type( protonic_dims[0], protonic_dims[1] ); + protonic_Pz = matrix_type( protonic_dims[0], protonic_dims[1] ); + protonic_VXCs_ref = matrix_type( protonic_dims[0], protonic_dims[1] ); + protonic_VXCz_ref = matrix_type( protonic_dims[0], protonic_dims[1] ); + + protonic_dset.read( protonic_Ps.data() ); + protonic_dset = file.getDataSet(prot_vxc_s); + protonic_dset.read( protonic_VXCs_ref.data() ); + + protonic_dset = file.getDataSet(prot_den_z); + protonic_dset.read( protonic_Pz.data() ); + protonic_dset = file.getDataSet(prot_vxc_z); + protonic_dset.read( protonic_VXCz_ref.data() ); + + protonic_dset = file.getDataSet("/PROTONIC_EXC"); + protonic_dset.read( &protonic_EXC_ref ); + } } - if( (uks or gks) and ex == ExecutionSpace::Device and func.is_mgga() ) return; + if( (uks or gks) and ex == ExecutionSpace::Device and func->is_mgga() ) return; for( auto& sh : basis ) sh.set_shell_tolerance( std::numeric_limits::epsilon() ); @@ -135,14 +177,21 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, // Construct Load Balancer LoadBalancerFactory lb_factory(ExecutionSpace::Host, "Default"); - auto lb = lb_factory.get_instance(rt, mol, mg, basis); + std::unique_ptr lb; + if (neo) { + for( auto& sh : protonic_basis ) + sh.set_shell_tolerance( std::numeric_limits::epsilon() ); + lb = std::make_unique( lb_factory.get_instance(rt, mol, mg, basis, protonic_basis) ); + } else{ + lb = std::make_unique( lb_factory.get_instance(rt, mol, mg, basis) ); + } // Construct Weights Module MolecularWeightsFactory mw_factory( ex, "Default", MolecularWeightsSettings{} ); auto mw = mw_factory.get_instance(); // Apply partition weights - mw.modify_weights(lb); + mw.modify_weights(*lb); // Construct XC Functional //auto Spin = uks ? ExchCXX::Spin::Polarized : ExchCXX::Spin::Unpolarized; @@ -151,39 +200,75 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, // Construct XCIntegrator XCIntegratorFactory integrator_factory( ex, "Replicated", integrator_kernel, lwd_kernel, reduction_kernel ); - auto integrator = integrator_factory.get_instance( func, lb ); + std::unique_ptr> integrator; + if(neo){ + integrator = std::make_unique>( integrator_factory.get_instance(*func, *epcfunc, *lb) ); + }else{ + integrator = std::make_unique>( integrator_factory.get_instance(*func, *lb) ); + } // Integrate Density if( check_integrate_den and rks) { auto N_EL_ref = std::accumulate( mol.begin(), mol.end(), 0ul, [](const auto& a, const auto &b) { return a + b.Z.get(); }); - auto N_EL = integrator.integrate_den( P ); + auto N_EL = integrator->integrate_den( P ); // Factor of 2 b/c P is the alpha density for RKS CHECK( N_EL == Approx(N_EL_ref/2.0).epsilon(1e-6) ); } // Integrate EXC/VXC if ( rks ) { - auto [ EXC, VXC ] = integrator.eval_exc_vxc( P ); + double EXC, protonic_EXC; + matrix_type VXC, protonic_VXCs, protonic_VXCz; + + if(neo) std::tie( EXC, protonic_EXC, VXC, protonic_VXCs, protonic_VXCz ) = integrator->neo_eval_exc_vxc( P, protonic_Ps, protonic_Pz ); + else std::tie( EXC, VXC ) = integrator->eval_exc_vxc( P ); // Check EXC/VXC auto VXC_diff_nrm = ( VXC - VXC_ref ).norm(); CHECK( EXC == Approx( EXC_ref ) ); CHECK( VXC_diff_nrm / basis.nbf() < 1e-10 ); + + if ( neo ) { + auto protonic_VXCs_diff_nrm = ( protonic_VXCs - protonic_VXCs_ref ).norm(); + auto protonic_VXCz_diff_nrm = ( protonic_VXCz - protonic_VXCz_ref ).norm(); + CHECK( protonic_EXC == Approx( protonic_EXC_ref ) ); + CHECK( protonic_VXCs_diff_nrm / protonic_basis.nbf() < 1e-10 ); + CHECK( protonic_VXCz_diff_nrm / protonic_basis.nbf() < 1e-10 ); + } + // Check if the integrator propagates state correctly - { - auto [ EXC1, VXC1 ] = integrator.eval_exc_vxc( P ); + { + double EXC1, protonic_EXC1; + matrix_type VXC1, protonic_VXCs1, protonic_VXCz1; + + if(neo) std::tie( EXC1, protonic_EXC1, VXC1, protonic_VXCs1, protonic_VXCz1 ) = integrator->neo_eval_exc_vxc( P, protonic_Ps, protonic_Pz ); + else std::tie( EXC1, VXC1 ) = integrator->eval_exc_vxc( P ); + CHECK( EXC1 == Approx( EXC_ref ) ); auto VXC1_diff_nrm = ( VXC1 - VXC_ref ).norm(); CHECK( VXC1_diff_nrm / basis.nbf() < 1e-10 ); + + if ( neo ) { + auto protonic_VXCs1_diff_nrm = ( protonic_VXCs1 - protonic_VXCs_ref ).norm(); + auto protonic_VXCz1_diff_nrm = ( protonic_VXCz1 - protonic_VXCz_ref ).norm(); + CHECK( protonic_EXC1 == Approx( protonic_EXC_ref ) ); + CHECK( protonic_VXCs1_diff_nrm / protonic_basis.nbf() < 1e-10 ); + CHECK( protonic_VXCz1_diff_nrm / protonic_basis.nbf() < 1e-10 ); + } } // Check EXC-only path - auto EXC2 = integrator.eval_exc( P ); + if(neo) return; // NEO EXC-only NYI + auto EXC2 = integrator->eval_exc( P ); CHECK(EXC2 == Approx(EXC)); } else if (uks) { - auto [ EXC, VXC, VXCz ] = integrator.eval_exc_vxc( P, Pz ); + double EXC, protonic_EXC; + matrix_type VXC, VXCz, protonic_VXCs, protonic_VXCz; + + if(neo) std::tie( EXC, protonic_EXC, VXC, VXCz, protonic_VXCs, protonic_VXCz ) = integrator->neo_eval_exc_vxc( P, Pz, protonic_Ps, protonic_Pz ); + else std::tie( EXC, VXC, VXCz ) = integrator->eval_exc_vxc( P, Pz ); // Check EXC/VXC auto VXC_diff_nrm = ( VXC - VXC_ref ).norm(); @@ -191,21 +276,44 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, CHECK( EXC == Approx( EXC_ref ) ); CHECK( VXC_diff_nrm / basis.nbf() < 1e-10 ); CHECK( VXCz_diff_nrm / basis.nbf() < 1e-10 ); + + if ( neo ) { + auto protonic_VXCs_diff_nrm = ( protonic_VXCs - protonic_VXCs_ref ).norm(); + auto protonic_VXCz_diff_nrm = ( protonic_VXCz - protonic_VXCz_ref ).norm(); + CHECK( protonic_EXC == Approx( protonic_EXC_ref ) ); + CHECK( protonic_VXCs_diff_nrm / protonic_basis.nbf() < 1e-10 ); + CHECK( protonic_VXCz_diff_nrm / protonic_basis.nbf() < 1e-10 ); + } + // Check if the integrator propagates state correctly - { - auto [ EXC1, VXC1, VXCz1 ] = integrator.eval_exc_vxc( P, Pz ); - CHECK( EXC1 == Approx( EXC_ref ) ); + { + double EXC1, protonic_EXC1; + matrix_type VXC1, VXCz1, protonic_VXCs1, protonic_VXCz1; + + if(neo) std::tie( EXC1, protonic_EXC1, VXC1, VXCz1, protonic_VXCs1, protonic_VXCz1 ) = integrator->neo_eval_exc_vxc( P, Pz, protonic_Ps, protonic_Pz ); + else std::tie( EXC1, VXC1, VXCz1 ) = integrator->eval_exc_vxc( P, Pz ); + auto VXC1_diff_nrm = ( VXC1 - VXC_ref ).norm(); auto VXCz1_diff_nrm = ( VXCz1 - VXCz_ref ).norm(); + CHECK( EXC1 == Approx( EXC_ref ) ); CHECK( VXC1_diff_nrm / basis.nbf() < 1e-10 ); CHECK( VXCz1_diff_nrm / basis.nbf() < 1e-10 ); + + if ( neo ) { + auto protonic_VXCs1_diff_nrm = ( protonic_VXCs1 - protonic_VXCs_ref ).norm(); + auto protonic_VXCz1_diff_nrm = ( protonic_VXCz1 - protonic_VXCz_ref ).norm(); + CHECK( protonic_EXC1 == Approx( protonic_EXC_ref ) ); + CHECK( protonic_VXCs1_diff_nrm / protonic_basis.nbf() < 1e-10 ); + CHECK( protonic_VXCz1_diff_nrm / protonic_basis.nbf() < 1e-10 ); + } } // Check EXC-only path - auto EXC2 = integrator.eval_exc( P, Pz ); + if(neo) return; // NEO EXC-only NYI + auto EXC2 = integrator->eval_exc( P, Pz ); CHECK(EXC2 == Approx(EXC)); } else if (gks) { - auto [ EXC, VXC, VXCz, VXCy, VXCx ] = integrator.eval_exc_vxc( P, Pz, Py, Px ); + auto [ EXC, VXC, VXCz, VXCy, VXCx ] = integrator->eval_exc_vxc( P, Pz, Py, Px ); // Check EXC/VXC auto VXC_diff_nrm = ( VXC - VXC_ref ).norm(); @@ -220,7 +328,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, CHECK( VXCx_diff_nrm / basis.nbf() < 1e-10 ); // Check if the integrator propagates state correctly { - auto [ EXC1, VXC1, VXCz1, VXCy1, VXCx1] = integrator.eval_exc_vxc( P, Pz, Py, Px ); + auto [ EXC1, VXC1, VXCz1, VXCy1, VXCx1] = integrator->eval_exc_vxc( P, Pz, Py, Px ); CHECK( EXC1 == Approx( EXC_ref ) ); auto VXC1_diff_nrm = ( VXC1 - VXC_ref ).norm(); auto VXCz1_diff_nrm = ( VXCz1 - VXCz_ref ).norm(); @@ -233,7 +341,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, } // Check EXC-only path - auto EXC2 = integrator.eval_exc( P, Pz, Py, Px ); + auto EXC2 = integrator->eval_exc( P, Pz, Py, Px ); CHECK(EXC2 == Approx(EXC)); } @@ -241,7 +349,7 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, // Check EXC Grad if( check_grad and has_exc_grad and rks) { - auto EXC_GRAD = integrator.eval_exc_grad( P ); + auto EXC_GRAD = integrator->eval_exc_grad( P ); using map_type = Eigen::Map; map_type EXC_GRAD_ref_map( EXC_GRAD_ref.data(), mol.size(), 3 ); map_type EXC_GRAD_map( EXC_GRAD.data(), mol.size(), 3 ); @@ -256,14 +364,15 @@ void test_xc_integrator( ExecutionSpace ex, const RuntimeEnvironment& rt, std::cout << "Skiping device sn-K + L > 2" << std::endl; return; } - auto K = integrator.eval_exx( P ); + auto K = integrator->eval_exx( P ); CHECK((K - K.transpose()).norm() < std::numeric_limits::epsilon()); // Symmetric CHECK( (K - K_ref).norm() / basis.nbf() < 1e-7 ); } } -void test_integrator(std::string reference_file, functional_type& func, PruningScheme pruning_scheme) { +void test_integrator(std::string reference_file, std::shared_ptr func, PruningScheme pruning_scheme, + std::shared_ptr epcfunc = nullptr) { #ifdef GAUXC_HAS_DEVICE auto rt = DeviceRuntimeEnvironment(GAUXC_MPI_CODE(MPI_COMM_WORLD,) 0.9); @@ -275,11 +384,11 @@ void test_integrator(std::string reference_file, functional_type& func, PruningS SECTION( "Host" ) { SECTION("Reference") { test_xc_integrator( ExecutionSpace::Host, rt, reference_file, func, - pruning_scheme, true, true, true ); + pruning_scheme, true, true, true, "Default", "Default", "Default", epcfunc); } SECTION("ShellBatched") { test_xc_integrator( ExecutionSpace::Host, rt, reference_file, func, - pruning_scheme, false, false, false, "ShellBatched" ); + pruning_scheme, false, false, false, "ShellBatched", "Default", "Default", epcfunc); } } #endif @@ -300,7 +409,7 @@ void test_integrator(std::string reference_file, functional_type& func, PruningS #ifdef GAUXC_HAS_MAGMA SECTION( "Incore - MPI Reduction - MAGMA" ) { - if(not func.is_mgga() and not func.is_polarized()) { + if(not func->is_mgga() and not func->is_polarized()) { test_xc_integrator( ExecutionSpace::Device, rt, reference_file, func, pruning_scheme, false, true, check_k, "Default", "Default", @@ -311,7 +420,7 @@ void test_integrator(std::string reference_file, functional_type& func, PruningS #ifdef GAUXC_HAS_CUTLASS SECTION( "Incore - MPI Reduction - CUTLASS" ) { - if(not func.is_mgga() and not func.is_polarized()) { + if(not func->is_mgga() and not func->is_polarized()) { test_xc_integrator( ExecutionSpace::Device, rt, reference_file, func, pruning_scheme, false, true, false, "Default", "Default", @@ -339,8 +448,8 @@ void test_integrator(std::string reference_file, functional_type& func, PruningS } -functional_type make_functional(ExchCXX::Functional func_key, ExchCXX::Spin spin) { - return functional_type(ExchCXX::Backend::builtin, func_key, spin); +std::shared_ptr make_functional(ExchCXX::Functional func_key, ExchCXX::Spin spin) { + return std::make_shared(ExchCXX::Backend::builtin, func_key, spin); } @@ -353,6 +462,8 @@ TEST_CASE( "XC Integrator", "[xc-integrator]" ) { auto blyp = ExchCXX::Functional::BLYP; auto scan = ExchCXX::Functional::SCAN; auto r2scanl = ExchCXX::Functional::R2SCANL; + auto epc17_2 = ExchCXX::Functional::EPC17_2; + auto epc18_2 = ExchCXX::Functional::EPC18_2; // LDA Test SECTION( "Benzene / SVWN5 / cc-pVDZ" ) { @@ -447,4 +558,35 @@ TEST_CASE( "XC Integrator", "[xc-integrator]" ) { test_integrator(GAUXC_REF_DATA_PATH "/h2o2_def2-qzvp.hdf5", func, PruningScheme::Unpruned ); } -} + + // EPC Tests + // epc-17-2 Test (small basis) + SECTION( "COH2 / BLYP,EPC-17-2 / sto-3g, prot-sp" ) { + auto func = make_functional(blyp, unpol); + auto epcfunc = make_functional(epc17_2, pol); + test_integrator(GAUXC_REF_DATA_PATH "/coh2_blyp_epc17-2_sto-3g_protsp_ssf.hdf5", + func, PruningScheme::Unpruned, epcfunc); + } + // epc-17-2 Test (larger basis) + SECTION( "COH2 / BLYP,EPC-17-2 / cc-pVDZ, prot-PB4-D" ) { + auto func = make_functional(blyp, unpol); + auto epcfunc = make_functional(epc17_2, pol); + test_integrator(GAUXC_REF_DATA_PATH "/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf.hdf5", + func, PruningScheme::Unpruned, epcfunc); + } + // epc-18-2 Test + SECTION( "COH2 / BLYP,EPC-18-2 / cc-pVDZ, prot-PB4-D" ) { + auto func = make_functional(blyp, unpol); + auto epcfunc = make_functional(epc18_2, pol); + test_integrator(GAUXC_REF_DATA_PATH "/coh2_blyp_epc18-2_cc-pvdz_pb4d_ssf.hdf5", + func, PruningScheme::Unpruned, epcfunc); + } + // UKS NEO epc-17-2 Test + SECTION( "OH2+ / BLYP,EPC-17-2 / cc-pVDZ, prot-PB4-D" ) { + auto func = make_functional(blyp, pol); + auto epcfunc = make_functional(epc17_2, pol); + test_integrator(GAUXC_REF_DATA_PATH "/coh2_blyp_epc17-2_cc-pvdz_pb4d_ssf_uks.hdf5", + func, PruningScheme::Unpruned, epcfunc); + } +} +