Program Listing for File bigfeta_dist_solve.c¶
↰ Return to documentation for file (bigfeta/distributed/src/bigfeta_dist_solve.c
)
static char help[] = "usage:\n"
"em_dist_solve -input <input_file_path> -output <output_file_path> <ksp options>\n"
"ksp options:\n"
" direct solve with pastix: -ksp_type preonly -pc_type lu\n"
"see:\n"
" https://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/KSP/KSPPREONLY.html\n"
" https://www.mcs.anl.gov/petsc/petsc-3.11/docs/manualpages/Mat/MATSOLVERPASTIX.html#MATSOLVERPASTIX\n";
#include <sys/resource.h>
#include <stdio.h>
#include <petsctime.h>
#include "bigfeta_dist.h"
#include "hw_config.h"
int
main (int argc, char **args)
{
KSP ksp; //linear solver context
PetscMPIInt rank, size; //MPI rank and size
char fileinarg[PETSC_MAX_PATH_LEN]; //input file name
char sln_output[PETSC_MAX_PATH_LEN]; //input file name
char *dir, *sln_input, **csrnames; //various strings
int nfiles; //number of files
PetscInt **metadata; //metadata read from index.txt
PetscInt local_firstfile, local_lastfile; //local file indices
PetscInt global_nrow, global_ncol, global_nnz; //global index info
PetscInt local_nrow, local_nnz, local_row0; //local index info
PetscInt local_rowN;
PetscInt *local_indptr, *local_jcol; //index arrays for local CSR
PetscScalar *local_data, *local_weights; //data for local CSR and weights
PetscScalar **local_rhs;
PetscBool flg, trunc; //boolean used in checking command line
PetscErrorCode ierr; //error code that gets passed around.
PetscLogDouble tall0, tall1; //timers
int i;
Mat A, W, ATW, K, L; //K and the matrices that build it
Vec ATWRHS[2];
Vec rhs[2], x0[2], Lm[2], x[2]; //vectors associated with the solve(s)
PetscLogDouble t0, t1; //some timers
PetscReal norm[2], norm2[2];
PetscLogStage stage;
int mpisupp;
/* Command line handling and setup */
MPI_Init_thread (0, 0, MPI_THREAD_MULTIPLE, &mpisupp);
ierr = PetscInitialize (&argc, &args, (char *) 0, help);
if (ierr)
return ierr;
ierr =
PetscOptionsGetString (NULL, NULL, "-input", fileinarg,
PETSC_MAX_PATH_LEN, &flg);
CHKERRQ (ierr);
ierr =
PetscOptionsGetString (NULL, NULL, "-output", sln_output,
PETSC_MAX_PATH_LEN, &flg);
CHKERRQ (ierr);
ierr = PetscOptionsHasName (NULL, NULL, "-truncate", &trunc);
CHKERRQ (ierr);
PetscTime (&tall0);
sln_input = strdup (fileinarg);
dir = strdup (dirname (fileinarg));
ierr = MPI_Comm_rank (PETSC_COMM_WORLD, &rank);
CHKERRQ (ierr);
ierr = MPI_Comm_size (PETSC_COMM_WORLD, &size);
CHKERRQ (ierr);
PetscLogStageRegister ("Distributed Read", &stage);
PetscLogStagePush (stage);
/* count the numberof hdf5 CSR files */
ierr = CountFiles (PETSC_COMM_WORLD, sln_input, &nfiles);
CHKERRQ (ierr);
/* allocate for file names and metadata */
csrnames = (char **) malloc (nfiles * sizeof (char *));
metadata = (PetscInt **) malloc (nfiles * sizeof (PetscInt *));
for (i = 0; i < nfiles; i++)
{
csrnames[i] = (char *) malloc (PETSC_MAX_PATH_LEN * sizeof (char));
metadata[i] = (PetscInt *) malloc (4 * sizeof (PetscInt));
}
/* read in the metadata */
ierr =
ReadMetadata (PETSC_COMM_WORLD, sln_input, nfiles, csrnames, metadata);
CHKERRQ (ierr);
/* what does the hardware look like? */
node * nodes;
int count, j, nnodes, thisnode, noderank;
MPI_Comm MPI_COMM_NODE;
nodes = hw_config (MPI_COMM_WORLD, &nnodes, &thisnode);
split_files (nodes, nnodes, nfiles);
MPI_Comm_split (MPI_COMM_WORLD, thisnode, rank, &MPI_COMM_NODE);
MPI_Comm_rank (MPI_COMM_NODE, &noderank);
/* what files will this rank read */
count = 0;
for (i=0; i<nnodes; i++)
{
if (rank == 0)
{
disp_node (nodes[i]);
}
for (j=0; j<nodes[i].nrank; j++)
{
if ((thisnode == i) && (noderank == j))
{
local_firstfile = count;
local_lastfile = count + nodes[i].files[j] - 1;
}
count += nodes[i].files[j];
}
}
//ierr =
// SetFiles (PETSC_COMM_WORLD, nfiles, &local_firstfile, &local_lastfile);
//CHKERRQ (ierr);
/* how many rows and nnz per worker */
GetGlobalLocalCounts (nfiles, metadata, local_firstfile, local_lastfile,
&global_nrow, &global_ncol, &global_nnz, &local_nrow,
&local_nnz, &local_row0);
if (rank == 0)
{
printf ("input file: %s\n", sln_input);
printf ("%d ranks will handle %d files\n", size, nfiles);
}
/* how many solves */
PetscInt nsolve;
ierr = CountSolves (PETSC_COMM_WORLD, sln_input, &nsolve);
CHKERRQ (ierr);
/* allocate space for local CSR arrays */
local_indptr = (PetscInt *) calloc (local_nrow + 1, sizeof (PetscInt));
local_jcol = (PetscInt *) calloc (local_nnz, sizeof (PetscInt));
local_data = (PetscScalar *) calloc (local_nnz, sizeof (PetscScalar));
local_weights = (PetscScalar *) calloc (local_nrow, sizeof (PetscScalar));
local_rhs = (PetscScalar **) malloc (nsolve * sizeof (PetscInt *));
for (i = 0; i < nsolve; i++)
{
local_rhs[i] =
(PetscScalar *) calloc (local_nrow, sizeof (PetscScalar));
}
/* read in local hdf5 files and concatenate into CSR arrays */
ierr =
ReadLocalCSR (PETSC_COMM_SELF, csrnames, local_firstfile, local_lastfile,
nsolve, local_indptr, local_jcol, local_data, local_weights,
local_rhs);
CHKERRQ (ierr);
/* Create distributed A! */
MatCreateMPIAIJWithArrays (PETSC_COMM_WORLD, local_nrow, PETSC_DECIDE,
global_nrow, global_ncol, local_indptr,
local_jcol, local_data, &A);
free (local_jcol);
free (local_data);
free (local_indptr);
if (rank == 0)
{
printf ("A matrix created\n");
}
PetscLogStagePop ();
/* Create distributed rhs */
PetscScalar *u_local_ptr;
for (i = 0; i < nsolve; i++)
{
VecCreate (PETSC_COMM_WORLD, &rhs[i]);
VecSetType (rhs[i], VECMPI);
VecSetSizes (rhs[i], local_nrow, global_nrow);
VecSet (rhs[i], 0.0);
VecGetArray (rhs[i], &u_local_ptr);
for (int j = 0; j < local_nrow; j++)
{
u_local_ptr[j] = local_rhs[i][j];
}
ierr = VecRestoreArray (rhs[i], &u_local_ptr);
VecAssemblyBegin (rhs[i]);
VecAssemblyEnd (rhs[i]);
}
free (u_local_ptr);
for (i = 0; i < nsolve; i++)
{
free (local_rhs[i]);
}
free (local_rhs);
PetscLogStageRegister ("Create K", &stage);
PetscLogStagePush (stage);
/* Create the W matrix */
ierr =
CreateW (PETSC_COMM_WORLD, local_weights, local_nrow, local_row0,
global_nrow, &W);
free (local_weights);
/* Start the K matrix with K = AT*W*A */
ierr = MatTransposeMatMult (A, W, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &ATW);
CHKERRQ (ierr);
ierr = MatMatMult (ATW, A, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &K);
CHKERRQ (ierr);
/* find out how the rows are distributed */
MatGetOwnershipRange (K, &local_row0, &local_rowN);
MatGetSize (K, &global_nrow, NULL);
local_nrow = local_rowN - local_row0;
/* read in the regularization */
ierr =
CreateL (PETSC_COMM_WORLD, sln_input, local_nrow, global_nrow, trunc, &L);
if (rank == 0)
{
printf ("L created\n");
}
/* K = K+L */
MatSetOption (K, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE);
ierr = MatAXPY (K, (PetscScalar) 1.0, L, SUBSET_NONZERO_PATTERN);
CHKERRQ (ierr);
MatSetOption (K, MAT_SYMMETRIC, PETSC_TRUE);
// appropriate for Cholesky
// ierr = MatConvert (K, MATMPISBAIJ, MAT_INPLACE_MATRIX, &K);
// CHKERRQ (ierr);
if (rank == 0)
{
printf ("K matrix created\n");
}
PetscLogStagePop ();
PetscLogStageRegister ("Get x0", &stage);
PetscLogStagePush (stage);
/* Read in the x0 vector(s) */
ierr =
Readx0 (PETSC_COMM_WORLD, sln_input, local_nrow, global_nrow, nsolve,
trunc, x0);
CHKERRQ (ierr);
/* Create Lm vectors */
for (i = 0; i < nsolve; i++)
{
ierr = VecDuplicate (x0[i], &Lm[i]);
ierr = VecDuplicate (x0[i], &ATWRHS[i]);
CHKERRQ (ierr);
ierr = MatMult (L, x0[i], Lm[i]);
CHKERRQ (ierr);
ierr = MatMult (ATW, rhs[i], ATWRHS[i]);
CHKERRQ (ierr);
ierr = VecAXPY (Lm[i], 1.0, ATWRHS[i]);
CHKERRQ (ierr);
VecDestroy (&ATWRHS[i]);
}
if (rank == 0)
{
printf ("Lm(s) created\n");
}
PetscLogStagePop ();
PetscLogStageRegister ("Solve", &stage);
PetscLogStagePush (stage);
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Create the linear solver and set various options
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
ierr = KSPCreate (PETSC_COMM_WORLD, &ksp);
CHKERRQ (ierr);
ierr = KSPSetOperators (ksp, K, K);
CHKERRQ (ierr);
ierr = KSPSetFromOptions (ksp);
CHKERRQ (ierr);
ierr = KSPSetReusePreconditioner(ksp, PETSC_TRUE);
CHKERRQ (ierr);
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Solve the linear system
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
char xname[20];
PetscViewer viewer;
if (rank == 0)
{
CopyDataSetstoSolutionOut (PETSC_COMM_SELF, sln_input, sln_output);
}
ierr =
PetscViewerHDF5Open (PETSC_COMM_WORLD, sln_output, FILE_MODE_APPEND,
&viewer);
CHKERRQ (ierr);
for (i = 0; i < nsolve; i++)
{
PetscTime (&t0);
ierr = VecDuplicate (x0[i], &x[i]);
CHKERRQ (ierr);
ierr = KSPSolve (ksp, Lm[i], x[i]);
CHKERRQ (ierr);
PetscTime (&t1);
if (rank == 0)
{
printf ("solve %d: %0.1f sec\n", i, t1 - t0);
}
sprintf (xname, "x_%d", i);
ierr = PetscObjectSetName ((PetscObject) x[i], xname);
CHKERRQ (ierr);
ierr = VecView (x[i], viewer);
CHKERRQ (ierr);
}
ierr = PetscViewerDestroy (&viewer);
CHKERRQ (ierr);
PetscLogStagePop ();
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Check solution and clean up
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
PetscReal *precision = (PetscReal *) calloc (nsolve, sizeof (PetscReal));
char results_out[1000], strout[1000], tmp[400];
sprintf (strout, " precision [norm(Kx-Lm)/norm(Lm)] =");
sprintf (results_out, "{\"precision\": [");
for (i = 0; i < nsolve; i++)
{
//from here on, x0 is replaced by err
ierr = VecScale (Lm[i], (PetscScalar) - 1.0);
CHKERRQ (ierr);
ierr = MatMultAdd (K, x[i], Lm[i], x0[i]);
CHKERRQ (ierr); //err0 = Kx0-Lm0
ierr = VecNorm (x0[i], NORM_2, &norm[i]);
CHKERRQ (ierr); //NORM_2 denotes sqrt(sum_i |x_i|^2)
ierr = VecNorm (Lm[i], NORM_2, &norm2[i]);
CHKERRQ (ierr); //NORM_2 denotes sqrt(sum_i |x_i|^2)
precision[i] = norm[i] / norm2[i];
sprintf (tmp, " %0.1e", precision[i]);
strcat (strout, tmp);
strcat (results_out, tmp);
if (i != nsolve - 1)
{
strcat (strout, ",");
strcat (results_out, ",");
}
}
strcat (strout, "\n");
strcat (results_out, "],");
MatDestroy (&K);
PetscInt mA, nA, c0, cn;
ierr = MatGetSize (A, &mA, &nA);
CHKERRQ (ierr);
ierr = MatGetOwnershipRange (A, &c0, &cn);
CHKERRQ (ierr);
strcat (strout, " error [norm(Ax-b)] =");
strcat (results_out, "\"error\": [");
for (i = 0; i < nsolve; i++)
{
ierr = VecCreate (PETSC_COMM_WORLD, &x0[i]);
CHKERRQ (ierr);
ierr = VecSetType (x0[i], VECMPI);
CHKERRQ (ierr);
ierr = VecSetSizes (x0[i], cn - c0, mA);
CHKERRQ (ierr);
ierr = MatMult (A, x[i], x0[i]);
CHKERRQ (ierr); //err0 = Ax0
ierr = VecAXPY (x0[i], -1.0, rhs[i]);
CHKERRQ (ierr);
ierr = VecNorm (x0[i], NORM_2, &norm[i]);
CHKERRQ (ierr); //NORM_2 denotes sqrt(sum_i |x_i|^2)
sprintf (tmp, " %0.1f", norm[i]);
strcat (strout, tmp);
strcat (results_out, tmp);
if (i != nsolve - 1)
{
strcat (strout, ",");
strcat (results_out, ",");
}
}
strcat (strout, "\n");
strcat (results_out, "],");
//calculate the mean and standard deviation
PetscReal errmean[2], errstd[2];
PetscInt sz;
strcat (strout, " [mean(Ax) +/- std(Ax)] =");
strcat (results_out, " \"err\": [");
for (i = 0; i < nsolve; i++)
{
VecGetSize (x0[i], &sz);
VecSum (x0[i], &errmean[i]);
errmean[i] /= sz;
VecShift (x0[i], -1.0 * errmean[i]);
VecNorm (x0[i], NORM_2, &errstd[i]);
errstd[i] /= sqrt (sz);
sprintf (tmp, " %0.2f +/- %0.2f", errmean[i], errstd[i]);
strcat (strout, tmp);
sprintf (tmp, "[%0.2f, %0.2f]", errmean[i], errstd[i]);
strcat (results_out, tmp);
if (i != nsolve - 1)
{
strcat (strout, ",");
strcat (results_out, ",");
}
}
strcat (strout, "\n");
strcat (results_out, "]}");
if (rank == 0)
{
printf (strout);
hid_t fileout = H5Fopen (sln_output, H5F_ACC_RDWR, H5P_DEFAULT);
hid_t memtype = H5Tcopy (H5T_C_S1);
hsize_t dims[1] = { 1 };
hid_t space = H5Screate_simple (1, dims, NULL);
H5Tset_size (memtype, 1000);
hid_t dsetout =
H5Dcreate (fileout, "results", memtype, space, H5P_DEFAULT,
H5P_DEFAULT, H5P_DEFAULT);
H5Dwrite (dsetout, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, results_out);
H5Dclose (dsetout);
H5Sclose (space);
H5Tclose (memtype);
H5Fclose (fileout);
ierr = PetscViewerDestroy (&viewer);
CHKERRQ (ierr);
}
//cleanup
for (i = 0; i < nsolve; i++)
{
VecDestroy (&Lm[i]);
VecDestroy (&x[i]);
VecDestroy (&x0[i]);
VecDestroy (&rhs[i]);
}
MatDestroy (&A);
MatDestroy (&ATW);
KSPDestroy (&ksp);
free (dir);
free (sln_input);
for (i = 0; i < nfiles; i++)
{
free (csrnames[i]);
free (metadata[i]);
}
free (csrnames);
free (metadata);
PetscTime (&tall1);
if (rank == 0)
{
printf ("rank %d total time: %0.1f sec\n", rank, tall1 - tall0);
}
ierr = PetscFinalize ();
CHKERRQ (ierr);
return ierr;
}