Actual source code: cycliccuda.cu

slepc-3.16.0 2021-09-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc singular value solver: "cyclic" (CUDA implementation)
 12: */
 13: #include <slepc/private/svdimpl.h>
 14: #include "../src/svd/impls/cyclic/cyclic.h"

 16: PetscErrorCode MatMult_Cyclic_CUDA(Mat B,Vec x,Vec y)
 17: {
 18:   PetscErrorCode    ierr;
 19:   SVD_CYCLIC_SHELL  *ctx;
 20:   const PetscScalar *d_px;
 21:   PetscScalar       *d_py;
 22:   PetscInt          m;

 25:   MatShellGetContext(B,&ctx);
 26:   MatGetLocalSize(ctx->A,&m,NULL);
 27:   VecCUDAGetArrayRead(x,&d_px);
 28:   VecCUDAGetArrayWrite(y,&d_py);
 29:   VecCUDAPlaceArray(ctx->x1,d_px);
 30:   VecCUDAPlaceArray(ctx->x2,d_px+m);
 31:   VecCUDAPlaceArray(ctx->y1,d_py);
 32:   VecCUDAPlaceArray(ctx->y2,d_py+m);
 33:   MatMult(ctx->A,ctx->x2,ctx->y1);
 34:   MatMult(ctx->AT,ctx->x1,ctx->y2);
 35:   VecCUDAResetArray(ctx->x1);
 36:   VecCUDAResetArray(ctx->x2);
 37:   VecCUDAResetArray(ctx->y1);
 38:   VecCUDAResetArray(ctx->y2);
 39:   VecCUDARestoreArrayRead(x,&d_px);
 40:   VecCUDARestoreArrayWrite(y,&d_py);
 41:   return(0);
 42: }

 44: PetscErrorCode MatMult_ECross_CUDA(Mat B,Vec x,Vec y)
 45: {
 46:   PetscErrorCode    ierr;
 47:   SVD_CYCLIC_SHELL  *ctx;
 48:   const PetscScalar *d_px;
 49:   PetscScalar       *d_py;
 50:   PetscInt          mn,m,n;

 53:   MatShellGetContext(B,&ctx);
 54:   MatGetLocalSize(ctx->A,NULL,&n);
 55:   VecGetLocalSize(y,&mn);
 56:   m = mn-n;
 57:   VecCUDAGetArrayRead(x,&d_px);
 58:   VecCUDAGetArrayWrite(y,&d_py);
 59:   VecCUDAPlaceArray(ctx->x1,d_px);
 60:   VecCUDAPlaceArray(ctx->x2,d_px+m);
 61:   VecCUDAPlaceArray(ctx->y1,d_py);
 62:   VecCUDAPlaceArray(ctx->y2,d_py+m);
 63:   VecCopy(ctx->x1,ctx->y1);
 64:   MatMult(ctx->A,ctx->x2,ctx->w);
 65:   MatMult(ctx->AT,ctx->w,ctx->y2);
 66:   VecCUDAResetArray(ctx->x1);
 67:   VecCUDAResetArray(ctx->x2);
 68:   VecCUDAResetArray(ctx->y1);
 69:   VecCUDAResetArray(ctx->y2);
 70:   VecCUDARestoreArrayRead(x,&d_px);
 71:   VecCUDARestoreArrayWrite(y,&d_py);
 72:   return(0);
 73: }