Skip to content

[Issue]I think I might have encountered a bug. When I use the following configuration, the AMGX solver freezes. I'm not even sure if it has entered the solving state—it just stalls. #353

@Haiqing-Fang

Description

@Haiqing-Fang

Describe the issue
When I run the following code with the given AMGX solver configuration, I notice that AMGX properly sets up matrix A and vector b, and correctly uploads the initial x vector; however, it stalls at the solve stage. Is it unable to proceed with the solution?
Code:
#include
#include
#include
#include
#include
#include
#include
#include
#include

#include <amgx_c.h>

#define CHECK_AMGX(call)
do {
AMGX_RC rc = (call);
if (rc != AMGX_RC_OK) {
std::cerr << "AMGX error at " << FILE
<< ":" << LINE
<< " - error code: " << rc
<< std::endl;
std::exit(1);
}
} while (0)

inline int idx(int i, int j, int nx) { return i + j * nx; }

void coo_to_csr(int n, int nnz,
const std::vector &coo_row,
const std::vector &coo_col,
const std::vector &coo_val,
std::vector &row_ptr,
std::vector &col_ind,
std::vector &values)
{
row_ptr.assign(n + 1, 0);
col_ind.resize(nnz);
values.resize(nnz);

for (int k = 0; k < nnz; ++k) {
    int r = coo_row[k];
    if (r < 0 || r >= n) {
        std::cerr << "coo_to_csr: row index out of range: " << r << std::endl;
        std::exit(1);
    }
    row_ptr[r + 1]++;
}

for (int i = 0; i < n; ++i) {
    row_ptr[i + 1] += row_ptr[i];
}


std::vector<int> offset = row_ptr;
for (int k = 0; k < nnz; ++k) {
    int r = coo_row[k];
    int dest = offset[r]++;
    col_ind[dest] = coo_col[k];
    values[dest]  = coo_val[k];
}

}

// A[p,p] = 1, rhs[p] = 0
void build_2d_poisson_coo_periodic(
int nx, int ny, double hx, double hy,
const std::function<double(int,int)> &f,
std::vector &coo_row,
std::vector &coo_col,
std::vector &coo_val,
std::vector &rhs)
{
const int N = nx * ny;
rhs.assign(N, 0.0);

coo_row.clear(); coo_col.clear(); coo_val.clear();
coo_row.reserve(static_cast<size_t>(N) * 5);
coo_col.reserve(static_cast<size_t>(N) * 5);
coo_val.reserve(static_cast<size_t>(N) * 5);

const double cx = 1.0 / (hx * hx);
const double cy = 1.0 / (hy * hy);

auto try_add = [&](int r, int c, double v) {
    coo_row.push_back(r);
    coo_col.push_back(c);
    coo_val.push_back(v);
};

for (int j = 0; j < ny; ++j) {
    for (int i = 0; i < nx; ++i) {
        int p = idx(i, j, nx);

        if (i == 0 && j == 0) {
            try_add(p, p, 1.0);
            rhs[p] = 0.0;
            continue;
        }

        int iL = (i - 1 + nx) % nx;
        int iR = (i + 1) % nx;
        int jD = (j - 1 + ny) % ny;
        int jU = (j + 1) % ny;

        int qL = idx(iL, j,  nx);
        int qR = idx(iR, j,  nx);
        int qD = idx(i,  jD, nx);
        int qU = idx(i,  jU, nx);

        double diag = 2.0 * (cx + cy);
        double b    = f(i, j);  // f(i,j) = 8π^2 sin(2πx) sin(2πy)

        try_add(p, p, diag);

        try_add(p, qL, -cx);
        try_add(p, qR, -cx);

        try_add(p, qD, -cy);
        try_add(p, qU, -cy);

        rhs[p] = b;
    }
}

}

std::vector analytical_solution(int nx, int ny, double hx, double hy)
{
const double PI = 3.14159265358979323846;
std::vector exact(nx * ny, 0.0);
for (int j = 0; j < ny; ++j) {
double y = j * hy;
for (int i = 0; i < nx; ++i) {
double x = i * hx;
exact[idx(i, j, nx)] = std::sin(2.0 * PI * x) * std::sin(2.0 * PI * y);
}
}
return exact;
}

struct ErrorStats {
double linf_abs;
double l2_abs;
double l2_rel;
};

ErrorStats compute_errors(const std::vector &u_num,
const std::vector &u_ex,
double hx, double hy)
{
double linf = 0.0;
double l2 = 0.0;
double l2_ex = 0.0;
const int N = static_cast(u_num.size());
for (int k = 0; k < N; ++k) {
double d = std::abs(u_num[k] - u_ex[k]);
linf = std::max(linf, d);
l2 += d * d;
l2_ex += u_ex[k] * u_ex[k];
}

l2 = std::sqrt(l2 * hx * hy);
l2_ex = std::sqrt(l2_ex * hx * hy);

ErrorStats s;
s.linf_abs = linf;
s.l2_abs   = l2;
s.l2_rel   = (l2_ex > 0.0) ? (l2 / l2_ex) : 0.0;
return s;

}

int main()
{

const int nx = 3201;      
const int ny = 3201;    
const double Lx = 1.0, Ly = 1.0;
const double hx = Lx / (nx - 1);   
const double hy = Ly / (ny - 1);
const int N = nx * ny;



const double PI = 3.14159265358979323846;


auto f = [&](int i, int j) {
    double x = i * hx;
    double y = j * hy;
    return 8.0 * PI * PI *
           std::sin(2.0 * PI * x) *
           std::sin(2.0 * PI * y);
};



std::vector<int> coo_row, coo_col;
std::vector<double> coo_val, rhs;
build_2d_poisson_coo_periodic(nx, ny, hx, hy,
                              f,
                              coo_row, coo_col, coo_val, rhs);

const int nnz = static_cast<int>(coo_row.size());



std::vector<int> row_ptr, col_ind;
std::vector<double> values;

coo_to_csr(N, nnz, coo_row, coo_col, coo_val, row_ptr, col_ind, values);



CHECK_AMGX(AMGX_initialize());
// CHECK_AMGX(AMGX_initialize_plugins());

AMGX_config_handle cfg;

// Reading from JSON configuration file (filename: cheaper_Solver.json)
CHECK_AMGX(AMGX_config_create_from_file(
&cfg,
"./cheaper_Solver.json"));

AMGX_resources_handle rsrc;
CHECK_AMGX(AMGX_resources_create_simple(&rsrc, cfg));

AMGX_matrix_handle A;
AMGX_vector_handle b, x;
AMGX_solver_handle solver;

CHECK_AMGX(AMGX_matrix_create(&A, rsrc, AMGX_mode_dDDI));
CHECK_AMGX(AMGX_vector_create(&b, rsrc, AMGX_mode_dDDI));
CHECK_AMGX(AMGX_vector_create(&x, rsrc, AMGX_mode_dDDI));
CHECK_AMGX(AMGX_solver_create(&solver, rsrc, AMGX_mode_dDDI, cfg));

std::cout << "Uploading matrix to AMGX...\n";
CHECK_AMGX(AMGX_matrix_upload_all(
A, N, nnz,
1, 1,
row_ptr.data(),
col_ind.data(),
values.data(),
nullptr
));

std::cout << "Uploading RHS and initial value...\n";
CHECK_AMGX(AMGX_vector_upload(b, N, 1, rhs.data()));
std::vector h_x0(N, 0.0);
CHECK_AMGX(AMGX_vector_upload(x, N, 1, h_x0.data()));

const double ABS_TOL      = 1e-7;   
const int    MAX_OUTER_IT = 20;      

std::cout << "Setup and solve...\n";
CHECK_AMGX(AMGX_solver_setup(solver, A));

double abs_residual = 1e30;   
int    outer_iter   = 0;

while (abs_residual > ABS_TOL && outer_iter < MAX_OUTER_IT)
{

std::cout << "\n=== AMGX Outer Iteration " << outer_iter << " ===\n";
CHECK_AMGX(AMGX_solver_solve(solver, b, x));

    AMGX_SOLVE_STATUS status;
    CHECK_AMGX(AMGX_solver_get_status(solver, &status));


    int iters = 0;
    CHECK_AMGX(AMGX_solver_get_iterations_number(solver, &iters));

    abs_residual = 0.0;
    CHECK_AMGX(AMGX_solver_get_iteration_residual(
        solver,
        iters,   
        0,       // block_idx
        &abs_residual));

    std::cout << "  inner iters = " << iters
              << ", |r|_L2 = " << std::scientific << abs_residual << "\n";

    ++outer_iter;
}

if (abs_residual > ABS_TOL) {
std::cerr << "Warning: After " << outer_iter
<< " outer iterations, the absolute residual is still " << std::scientific << abs_residual
<< " > tol = " << ABS_TOL << "\n";
}

std::vector<double> u(N);
CHECK_AMGX(AMGX_vector_download(x, u.data()));


auto exact = analytical_solution(nx, ny, hx, hy);
auto err   = compute_errors(u, exact, hx, hy);




CHECK_AMGX(AMGX_solver_destroy(solver));
CHECK_AMGX(AMGX_vector_destroy(x));
CHECK_AMGX(AMGX_vector_destroy(b));
CHECK_AMGX(AMGX_matrix_destroy(A));
CHECK_AMGX(AMGX_resources_destroy(rsrc));
CHECK_AMGX(AMGX_config_destroy(cfg));
CHECK_AMGX(AMGX_finalize());


return 0;

}
std::cout << "\nProgram execution completed!\n";

AMGX solver configuration
{
"config_version": 2,
"solver": "PCG",
"preconditioner": {
"solver": "AMG",
"algorithm": "AGGREGATION",
"cycle": "F",
"max_levels": 30,
"presweeps": 2,
"postsweeps": 2,
"coarsest_sweeps": 4,
"max_iters": 1,
"smoother": "JACOBI_L1",
"selector": "SIZE_2",
"strength_threshold": 0.25,
"interpolator": "D2",
"max_row_sum": 0.9,
"coarse_solver": "JACOBI_L1"
},

"convergence": "ABSOLUTE",
"max_iters": 1000,        
"tolerance": 1e-7,
"norm": "L2",
"print_solve_stats": 0,
"obtain_timings": 1,
"monitor_residual": 1,
"store_res_history": 1,
"print_grid_stats": 0,
"matrix_coloring_scheme": "MIN_MAX",
"dense_lu_num_rows": 128,
"reorder_cols_by_color": 1,
"print_config": 1

}

RUN log

AMGX version 2.5.0
Built on Nov 9 2025, 19:52:31
Compiled with CUDA Runtime 11.5, using CUDA driver 11.0
Device 0: A100-SXM-80GB
AMG Configuration:

Default values:

        GS_L1_variant = 0
        affinity_iterations = 4
        affinity_vectors = 4
        aggregate_size = 2
        aggregation_edge_weight_component = 0
        aggregation_passes = 3
        aggressive_interpolator = MULTIPASS
        aggressive_levels = 0
        aggressive_selector = DEFAULT
        algorithm = CLASSICAL
        alt_rel_tolerance = 1e-12
        amg_consolidation_flag = 0
        amg_host_levels_rows = -1
        block_convert = 0
        block_format = ROW_MAJOR
        boundary_coloring = SYNC_COLORS
        cf_smoothing_mode = 0
        cheby_max_lambda = 1
        cheby_min_lambda = 0.125
        chebyshev_lambda_estimate_mode = 0
        chebyshev_polynomial_order = 5
        coarseAgenerator = LOW_DEG
        coarseAgenerator_coarse = LOW_DEG
        coarse_smoother = BLOCK_JACOBI
        coarse_solver = DENSE_LU_SOLVER
        coarsen_threshold = 1
        coarsest_sweeps = 2
        coloring_custom_arg = 
        coloring_level = 1
        coloring_try_remove_last_colors = 0
        communicator = MPI
        complex_conversion = 0
        convergence = ABSOLUTE
        convergence_analysis = 0
        cycle = V
        cycle_iters = 2
        dense_lu_max_rows = 0
        dense_lu_num_rows = 128
        determinism_flag = 0
        device_alloc_scaling_factor = 10
        device_alloc_scaling_threshold = 16384
        device_consolidation_pool_size = 268435456
        device_mem_pool_max_alloc_size = 20971520
        device_mem_pool_size = 268435456
        device_mem_pool_size_limit = 0
        energymin_interpolator = EM
        energymin_selector = CR
        error_scaling = 0
        exact_coarse_solve = 0
        exception_handling = 0
        filter_weights = 0
        filter_weights_alpha = 0.5
        fine_level_consolidation = 0
        fine_levels = -1
        fine_smoother = BLOCK_JACOBI
        finest_sweeps = -1
        full_ghost_level = 0
        geometric_dim = 2
        ghost_offdiag_limit = 0
        gmres_krylov_dim = 0
        gmres_n_restart = 20
        halo_coloring = LAST
        handshaking_phases = 1
        high_priority_stream = 0
        ilu_sparsity_level = 0
        initial_color = 0
        insert_diag_while_reordering = 0
        intensive_smoothing = 0
        interp_max_elements = -1
        interp_truncation_factor = 1.1
        interpolator = D1
        jacobi_iters = 5
        kaczmarz_coloring_needed = 1
        kpz_mu = 4
        kpz_order = 3
        late_rejection = 0
        matrix_coloring_scheme = MIN_MAX
        matrix_consolidation_lower_threshold = 0
        matrix_consolidation_upper_threshold = 1000
        matrix_halo_exchange = 0
        matrix_writer = matrixmarket
        max_coarse_iters = 100
        max_iters = 100
        max_levels = 100
        max_matching_iterations = 15
        max_num_hash = 7
        max_row_sum = 1.1
        max_unassigned_percentage = 0.05
        max_uncolored_percentage = 0.15
        merge_singletons = 1
        min_coarse_rows = 2
        min_fine_rows = 1
        min_rows_latency_hiding = -1
        modified_handshake = 0
        monitor_residual = 0
        norm = L2
        notay_weights = 0
        num_colors = 10
        num_streams = 0
        obtain_timings = 0
        postsweeps = 1
        preconditioner = AMG
        presweeps = 1
        print_aggregation_info = 0
        print_coloring_info = 0
        print_config = 0
        print_grid_stats = 0
        print_solve_stats = 0
        print_vis_data = 0
        rel_div_tolerance = -1
        relaxation_factor = 0.9
        reorder_cols_by_color = 0
        reuse_scale = 0
        rhs_from_a = 0
        scaling = NONE
        scaling_smoother_steps = 2
        selector = PMIS
        separation_exterior = OWNED
        separation_interior = INTERIOR
        serial_matching = 0
        serialize_threads = 0
        smoother = BLOCK_JACOBI
        solver = AMG
        solver_verbose = 0
        spmm_gmem_size = 1024
        spmm_max_attempts = 6
        spmm_no_sort = 1
        spmm_verbose = 0
        store_res_history = 0
        strength = AHAT
        strength_threshold = 0.25
        structure_reuse_levels = 0
        subspace_dim_s = 8
        symmetric_GS = 0
        tolerance = 1e-12
        use_bsrxmv = 0
        use_cuda_ipc_consolidation = 0
        use_cusparse_kernels = 0
        use_opt_kernels = 0
        use_scalar_norm = 0
        use_sum_stopping_criteria = 0
        verbosity_level = 3
        weakness_bound = 2147483647
        weight_formula = 0

User-defined parameters:
Current_scope:parameter_name(new_scope) = parameter_value

        default:convergence = ABSOLUTE
        default:dense_lu_num_rows = 128
        default:matrix_coloring_scheme = MIN_MAX
        default:max_iters = 1000
        default:monitor_residual = 1
        default:norm = L2
        default:obtain_timings = 1
        default:preconditioner(default_sub_preconditioner) = AMG
        default:print_config = 1
        default:print_grid_stats = 0
        default:print_solve_stats = 0
        default:reorder_cols_by_color = 1
        default:solver = PCG
        default:store_res_history = 1
        default:tolerance = 1e-07
        default_sub_preconditioner:algorithm = AGGREGATION
        default_sub_preconditioner:coarse_solver = JACOBI_L1
        default_sub_preconditioner:coarsest_sweeps = 4
        default_sub_preconditioner:cycle = F
        default_sub_preconditioner:interpolator = D2
        default_sub_preconditioner:max_iters = 1
        default_sub_preconditioner:max_levels = 30
        default_sub_preconditioner:max_row_sum = 0.9
        default_sub_preconditioner:postsweeps = 2
        default_sub_preconditioner:presweeps = 2
        default_sub_preconditioner:selector = SIZE_2
        default_sub_preconditioner:smoother = JACOBI_L1
        default_sub_preconditioner:strength_threshold = 0.25

Uploading matrix to AMGX...
Uploading RHS and initial value...
Setup and solve...

=== AMGX Outer Iteration 0 ===

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions