-
Notifications
You must be signed in to change notification settings - Fork 169
Description
Describe the issue
When I run the following code with the given AMGX solver configuration, I notice that AMGX properly sets up matrix A and vector b, and correctly uploads the initial x vector; however, it stalls at the solve stage. Is it unable to proceed with the solution?
Code:
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include <amgx_c.h>
#define CHECK_AMGX(call)
do {
AMGX_RC rc = (call);
if (rc != AMGX_RC_OK) {
std::cerr << "AMGX error at " << FILE
<< ":" << LINE
<< " - error code: " << rc
<< std::endl;
std::exit(1);
}
} while (0)
inline int idx(int i, int j, int nx) { return i + j * nx; }
void coo_to_csr(int n, int nnz,
const std::vector &coo_row,
const std::vector &coo_col,
const std::vector &coo_val,
std::vector &row_ptr,
std::vector &col_ind,
std::vector &values)
{
row_ptr.assign(n + 1, 0);
col_ind.resize(nnz);
values.resize(nnz);
for (int k = 0; k < nnz; ++k) {
int r = coo_row[k];
if (r < 0 || r >= n) {
std::cerr << "coo_to_csr: row index out of range: " << r << std::endl;
std::exit(1);
}
row_ptr[r + 1]++;
}
for (int i = 0; i < n; ++i) {
row_ptr[i + 1] += row_ptr[i];
}
std::vector<int> offset = row_ptr;
for (int k = 0; k < nnz; ++k) {
int r = coo_row[k];
int dest = offset[r]++;
col_ind[dest] = coo_col[k];
values[dest] = coo_val[k];
}
}
// A[p,p] = 1, rhs[p] = 0
void build_2d_poisson_coo_periodic(
int nx, int ny, double hx, double hy,
const std::function<double(int,int)> &f,
std::vector &coo_row,
std::vector &coo_col,
std::vector &coo_val,
std::vector &rhs)
{
const int N = nx * ny;
rhs.assign(N, 0.0);
coo_row.clear(); coo_col.clear(); coo_val.clear();
coo_row.reserve(static_cast<size_t>(N) * 5);
coo_col.reserve(static_cast<size_t>(N) * 5);
coo_val.reserve(static_cast<size_t>(N) * 5);
const double cx = 1.0 / (hx * hx);
const double cy = 1.0 / (hy * hy);
auto try_add = [&](int r, int c, double v) {
coo_row.push_back(r);
coo_col.push_back(c);
coo_val.push_back(v);
};
for (int j = 0; j < ny; ++j) {
for (int i = 0; i < nx; ++i) {
int p = idx(i, j, nx);
if (i == 0 && j == 0) {
try_add(p, p, 1.0);
rhs[p] = 0.0;
continue;
}
int iL = (i - 1 + nx) % nx;
int iR = (i + 1) % nx;
int jD = (j - 1 + ny) % ny;
int jU = (j + 1) % ny;
int qL = idx(iL, j, nx);
int qR = idx(iR, j, nx);
int qD = idx(i, jD, nx);
int qU = idx(i, jU, nx);
double diag = 2.0 * (cx + cy);
double b = f(i, j); // f(i,j) = 8π^2 sin(2πx) sin(2πy)
try_add(p, p, diag);
try_add(p, qL, -cx);
try_add(p, qR, -cx);
try_add(p, qD, -cy);
try_add(p, qU, -cy);
rhs[p] = b;
}
}
}
std::vector analytical_solution(int nx, int ny, double hx, double hy)
{
const double PI = 3.14159265358979323846;
std::vector exact(nx * ny, 0.0);
for (int j = 0; j < ny; ++j) {
double y = j * hy;
for (int i = 0; i < nx; ++i) {
double x = i * hx;
exact[idx(i, j, nx)] = std::sin(2.0 * PI * x) * std::sin(2.0 * PI * y);
}
}
return exact;
}
struct ErrorStats {
double linf_abs;
double l2_abs;
double l2_rel;
};
ErrorStats compute_errors(const std::vector &u_num,
const std::vector &u_ex,
double hx, double hy)
{
double linf = 0.0;
double l2 = 0.0;
double l2_ex = 0.0;
const int N = static_cast(u_num.size());
for (int k = 0; k < N; ++k) {
double d = std::abs(u_num[k] - u_ex[k]);
linf = std::max(linf, d);
l2 += d * d;
l2_ex += u_ex[k] * u_ex[k];
}
l2 = std::sqrt(l2 * hx * hy);
l2_ex = std::sqrt(l2_ex * hx * hy);
ErrorStats s;
s.linf_abs = linf;
s.l2_abs = l2;
s.l2_rel = (l2_ex > 0.0) ? (l2 / l2_ex) : 0.0;
return s;
}
int main()
{
const int nx = 3201;
const int ny = 3201;
const double Lx = 1.0, Ly = 1.0;
const double hx = Lx / (nx - 1);
const double hy = Ly / (ny - 1);
const int N = nx * ny;
const double PI = 3.14159265358979323846;
auto f = [&](int i, int j) {
double x = i * hx;
double y = j * hy;
return 8.0 * PI * PI *
std::sin(2.0 * PI * x) *
std::sin(2.0 * PI * y);
};
std::vector<int> coo_row, coo_col;
std::vector<double> coo_val, rhs;
build_2d_poisson_coo_periodic(nx, ny, hx, hy,
f,
coo_row, coo_col, coo_val, rhs);
const int nnz = static_cast<int>(coo_row.size());
std::vector<int> row_ptr, col_ind;
std::vector<double> values;
coo_to_csr(N, nnz, coo_row, coo_col, coo_val, row_ptr, col_ind, values);
CHECK_AMGX(AMGX_initialize());
// CHECK_AMGX(AMGX_initialize_plugins());
AMGX_config_handle cfg;
// Reading from JSON configuration file (filename: cheaper_Solver.json)
CHECK_AMGX(AMGX_config_create_from_file(
&cfg,
"./cheaper_Solver.json"));
AMGX_resources_handle rsrc;
CHECK_AMGX(AMGX_resources_create_simple(&rsrc, cfg));
AMGX_matrix_handle A;
AMGX_vector_handle b, x;
AMGX_solver_handle solver;
CHECK_AMGX(AMGX_matrix_create(&A, rsrc, AMGX_mode_dDDI));
CHECK_AMGX(AMGX_vector_create(&b, rsrc, AMGX_mode_dDDI));
CHECK_AMGX(AMGX_vector_create(&x, rsrc, AMGX_mode_dDDI));
CHECK_AMGX(AMGX_solver_create(&solver, rsrc, AMGX_mode_dDDI, cfg));
std::cout << "Uploading matrix to AMGX...\n";
CHECK_AMGX(AMGX_matrix_upload_all(
A, N, nnz,
1, 1,
row_ptr.data(),
col_ind.data(),
values.data(),
nullptr
));
std::cout << "Uploading RHS and initial value...\n";
CHECK_AMGX(AMGX_vector_upload(b, N, 1, rhs.data()));
std::vector h_x0(N, 0.0);
CHECK_AMGX(AMGX_vector_upload(x, N, 1, h_x0.data()));
const double ABS_TOL = 1e-7;
const int MAX_OUTER_IT = 20;
std::cout << "Setup and solve...\n";
CHECK_AMGX(AMGX_solver_setup(solver, A));
double abs_residual = 1e30;
int outer_iter = 0;
while (abs_residual > ABS_TOL && outer_iter < MAX_OUTER_IT)
{
std::cout << "\n=== AMGX Outer Iteration " << outer_iter << " ===\n";
CHECK_AMGX(AMGX_solver_solve(solver, b, x));
AMGX_SOLVE_STATUS status;
CHECK_AMGX(AMGX_solver_get_status(solver, &status));
int iters = 0;
CHECK_AMGX(AMGX_solver_get_iterations_number(solver, &iters));
abs_residual = 0.0;
CHECK_AMGX(AMGX_solver_get_iteration_residual(
solver,
iters,
0, // block_idx
&abs_residual));
std::cout << " inner iters = " << iters
<< ", |r|_L2 = " << std::scientific << abs_residual << "\n";
++outer_iter;
}
if (abs_residual > ABS_TOL) {
std::cerr << "Warning: After " << outer_iter
<< " outer iterations, the absolute residual is still " << std::scientific << abs_residual
<< " > tol = " << ABS_TOL << "\n";
}
std::vector<double> u(N);
CHECK_AMGX(AMGX_vector_download(x, u.data()));
auto exact = analytical_solution(nx, ny, hx, hy);
auto err = compute_errors(u, exact, hx, hy);
CHECK_AMGX(AMGX_solver_destroy(solver));
CHECK_AMGX(AMGX_vector_destroy(x));
CHECK_AMGX(AMGX_vector_destroy(b));
CHECK_AMGX(AMGX_matrix_destroy(A));
CHECK_AMGX(AMGX_resources_destroy(rsrc));
CHECK_AMGX(AMGX_config_destroy(cfg));
CHECK_AMGX(AMGX_finalize());
return 0;
}
std::cout << "\nProgram execution completed!\n";
AMGX solver configuration
{
"config_version": 2,
"solver": "PCG",
"preconditioner": {
"solver": "AMG",
"algorithm": "AGGREGATION",
"cycle": "F",
"max_levels": 30,
"presweeps": 2,
"postsweeps": 2,
"coarsest_sweeps": 4,
"max_iters": 1,
"smoother": "JACOBI_L1",
"selector": "SIZE_2",
"strength_threshold": 0.25,
"interpolator": "D2",
"max_row_sum": 0.9,
"coarse_solver": "JACOBI_L1"
},
"convergence": "ABSOLUTE",
"max_iters": 1000,
"tolerance": 1e-7,
"norm": "L2",
"print_solve_stats": 0,
"obtain_timings": 1,
"monitor_residual": 1,
"store_res_history": 1,
"print_grid_stats": 0,
"matrix_coloring_scheme": "MIN_MAX",
"dense_lu_num_rows": 128,
"reorder_cols_by_color": 1,
"print_config": 1
}
RUN log
AMGX version 2.5.0
Built on Nov 9 2025, 19:52:31
Compiled with CUDA Runtime 11.5, using CUDA driver 11.0
Device 0: A100-SXM-80GB
AMG Configuration:
Default values:
GS_L1_variant = 0
affinity_iterations = 4
affinity_vectors = 4
aggregate_size = 2
aggregation_edge_weight_component = 0
aggregation_passes = 3
aggressive_interpolator = MULTIPASS
aggressive_levels = 0
aggressive_selector = DEFAULT
algorithm = CLASSICAL
alt_rel_tolerance = 1e-12
amg_consolidation_flag = 0
amg_host_levels_rows = -1
block_convert = 0
block_format = ROW_MAJOR
boundary_coloring = SYNC_COLORS
cf_smoothing_mode = 0
cheby_max_lambda = 1
cheby_min_lambda = 0.125
chebyshev_lambda_estimate_mode = 0
chebyshev_polynomial_order = 5
coarseAgenerator = LOW_DEG
coarseAgenerator_coarse = LOW_DEG
coarse_smoother = BLOCK_JACOBI
coarse_solver = DENSE_LU_SOLVER
coarsen_threshold = 1
coarsest_sweeps = 2
coloring_custom_arg =
coloring_level = 1
coloring_try_remove_last_colors = 0
communicator = MPI
complex_conversion = 0
convergence = ABSOLUTE
convergence_analysis = 0
cycle = V
cycle_iters = 2
dense_lu_max_rows = 0
dense_lu_num_rows = 128
determinism_flag = 0
device_alloc_scaling_factor = 10
device_alloc_scaling_threshold = 16384
device_consolidation_pool_size = 268435456
device_mem_pool_max_alloc_size = 20971520
device_mem_pool_size = 268435456
device_mem_pool_size_limit = 0
energymin_interpolator = EM
energymin_selector = CR
error_scaling = 0
exact_coarse_solve = 0
exception_handling = 0
filter_weights = 0
filter_weights_alpha = 0.5
fine_level_consolidation = 0
fine_levels = -1
fine_smoother = BLOCK_JACOBI
finest_sweeps = -1
full_ghost_level = 0
geometric_dim = 2
ghost_offdiag_limit = 0
gmres_krylov_dim = 0
gmres_n_restart = 20
halo_coloring = LAST
handshaking_phases = 1
high_priority_stream = 0
ilu_sparsity_level = 0
initial_color = 0
insert_diag_while_reordering = 0
intensive_smoothing = 0
interp_max_elements = -1
interp_truncation_factor = 1.1
interpolator = D1
jacobi_iters = 5
kaczmarz_coloring_needed = 1
kpz_mu = 4
kpz_order = 3
late_rejection = 0
matrix_coloring_scheme = MIN_MAX
matrix_consolidation_lower_threshold = 0
matrix_consolidation_upper_threshold = 1000
matrix_halo_exchange = 0
matrix_writer = matrixmarket
max_coarse_iters = 100
max_iters = 100
max_levels = 100
max_matching_iterations = 15
max_num_hash = 7
max_row_sum = 1.1
max_unassigned_percentage = 0.05
max_uncolored_percentage = 0.15
merge_singletons = 1
min_coarse_rows = 2
min_fine_rows = 1
min_rows_latency_hiding = -1
modified_handshake = 0
monitor_residual = 0
norm = L2
notay_weights = 0
num_colors = 10
num_streams = 0
obtain_timings = 0
postsweeps = 1
preconditioner = AMG
presweeps = 1
print_aggregation_info = 0
print_coloring_info = 0
print_config = 0
print_grid_stats = 0
print_solve_stats = 0
print_vis_data = 0
rel_div_tolerance = -1
relaxation_factor = 0.9
reorder_cols_by_color = 0
reuse_scale = 0
rhs_from_a = 0
scaling = NONE
scaling_smoother_steps = 2
selector = PMIS
separation_exterior = OWNED
separation_interior = INTERIOR
serial_matching = 0
serialize_threads = 0
smoother = BLOCK_JACOBI
solver = AMG
solver_verbose = 0
spmm_gmem_size = 1024
spmm_max_attempts = 6
spmm_no_sort = 1
spmm_verbose = 0
store_res_history = 0
strength = AHAT
strength_threshold = 0.25
structure_reuse_levels = 0
subspace_dim_s = 8
symmetric_GS = 0
tolerance = 1e-12
use_bsrxmv = 0
use_cuda_ipc_consolidation = 0
use_cusparse_kernels = 0
use_opt_kernels = 0
use_scalar_norm = 0
use_sum_stopping_criteria = 0
verbosity_level = 3
weakness_bound = 2147483647
weight_formula = 0
User-defined parameters:
Current_scope:parameter_name(new_scope) = parameter_value
default:convergence = ABSOLUTE
default:dense_lu_num_rows = 128
default:matrix_coloring_scheme = MIN_MAX
default:max_iters = 1000
default:monitor_residual = 1
default:norm = L2
default:obtain_timings = 1
default:preconditioner(default_sub_preconditioner) = AMG
default:print_config = 1
default:print_grid_stats = 0
default:print_solve_stats = 0
default:reorder_cols_by_color = 1
default:solver = PCG
default:store_res_history = 1
default:tolerance = 1e-07
default_sub_preconditioner:algorithm = AGGREGATION
default_sub_preconditioner:coarse_solver = JACOBI_L1
default_sub_preconditioner:coarsest_sweeps = 4
default_sub_preconditioner:cycle = F
default_sub_preconditioner:interpolator = D2
default_sub_preconditioner:max_iters = 1
default_sub_preconditioner:max_levels = 30
default_sub_preconditioner:max_row_sum = 0.9
default_sub_preconditioner:postsweeps = 2
default_sub_preconditioner:presweeps = 2
default_sub_preconditioner:selector = SIZE_2
default_sub_preconditioner:smoother = JACOBI_L1
default_sub_preconditioner:strength_threshold = 0.25
Uploading matrix to AMGX...
Uploading RHS and initial value...
Setup and solve...
=== AMGX Outer Iteration 0 ===