22#ifdef DEAL_II_WITH_CUDA
32 template <
typename Number>
53 cusparseSpMatDescr_t &sp_descr)
62 reinterpret_cast<void *
>(
const_cast<float *
>(
A_val_dev)),
79 cusparseSpMatDescr_t &sp_descr)
88 reinterpret_cast<void *
>(
const_cast<double *
>(
A_val_dev)),
103 const cusparseSpMatDescr_t sp_descr,
109 float beta = add ? 1. : 0.;
119 reinterpret_cast<void *
>(
const_cast<float *
>(
x)),
127 reinterpret_cast<void *
>(
const_cast<float *
>(
y)),
176 const cusparseSpMatDescr_t sp_descr,
182 double beta = add ? 1. : 0.;
192 reinterpret_cast<void *
>(
const_cast<double *
>(
x)),
200 reinterpret_cast<void *
>(
const_cast<double *
>(
y)),
244 template <
typename Number>
247 const Number * val_dev,
248 const int * column_index_dev,
249 const int * row_ptr_dev,
257 for (
int j = row_ptr_dev[row];
j < row_ptr_dev[row + 1]; ++
j)
264 template <
typename Number>
267 const Number * val_dev,
269 const int *row_ptr_dev,
277 sums[row] = (Number)0.;
278 for (
int j = row_ptr_dev[row];
j < row_ptr_dev[row + 1]; ++
j)
286 template <
typename Number>
299 template <
typename Number>
314 template <
typename Number>
316 : cusparse_handle(
other.cusparse_handle)
318 , n_rows(
other.n_rows)
319 , n_cols(
other.n_cols)
324 , sp_descr(
other.sp_descr)
329 other.descr =
nullptr;
330 other.sp_descr =
nullptr;
335 template <
typename Number>
338 if (descr !=
nullptr)
346 if (sp_descr !=
nullptr)
360 template <
typename Number>
364 cusparse_handle =
other.cusparse_handle;
366 n_rows =
other.n_rows;
367 n_cols =
other.n_cols;
368 val_dev = std::move(
other.val_dev);
369 column_index_dev = std::move(
other.column_index_dev);
370 row_ptr_dev = std::move(
other.row_ptr_dev);
372 sp_descr =
other.sp_descr;
377 other.descr =
nullptr;
378 other.sp_descr =
nullptr;
385 template <
typename Number>
396 std::vector<Number> val;
398 std::vector<int> column_index;
399 column_index.reserve(nnz);
404 for (
int row = 0; row < n_rows; ++row)
407 unsigned int counter = 0;
410 val.emplace_back(p->value());
411 column_index.emplace_back(p->column());
417 unsigned int const offset =
row_ptr[row];
420 unsigned int pos = 1;
421 while ((column_index[offset +
pos] < row) && (
pos < counter))
423 val[offset +
pos - 1] = val[offset +
pos];
424 column_index[offset +
pos - 1] = column_index[offset +
pos];
432 val_dev.reset(Utilities::CUDA::allocate_device_data<Number>(nnz));
435 nnz *
sizeof(Number),
440 column_index_dev.reset(Utilities::CUDA::allocate_device_data<int>(nnz));
442 error_code =
cudaMemcpy(column_index_dev.get(),
449 row_ptr_dev.reset(Utilities::CUDA::allocate_device_data<int>(
row_ptr_size));
473 column_index_dev.get(),
479 template <
typename Number>
484 const int n_blocks = 1 + (nnz - 1) /
block_size;
485 internal::scale<Number>
486 <<<n_blocks,
block_size>>>(val_dev.get(), factor, nnz);
494 template <
typename Number>
500 const int n_blocks = 1 + (nnz - 1) /
block_size;
501 internal::scale<Number>
502 <<<n_blocks,
block_size>>>(val_dev.get(), 1. / factor, nnz);
510 template <
typename Number>
528 template <
typename Number>
546 template <
typename Number>
564 template <
typename Number>
582 template <
typename Number>
595 template <
typename Number>
609 template <
typename Number>
617 dst.
sadd(-1., 1., b);
624 template <
typename Number>
629 const int n_blocks = 1 + (nnz - 1) /
block_size;
630 internal::l1_norm<Number>
633 column_index_dev.get(),
643 template <
typename Number>
648 const int n_blocks = 1 + (nnz - 1) /
block_size;
649 internal::linfty_norm<Number>
652 column_index_dev.get(),
662 template <
typename Number>
669 nnz *
sizeof(Number),
678 template <
typename Number>
679 std::tuple<Number *, int *, int *, cusparseMatDescr_t, cusparseSpMatDescr_t>
682 return std::make_tuple(val_dev.get(),
683 column_index_dev.get(),
value_type * data() const noexcept
void vmult_add(LinearAlgebra::CUDAWrappers::Vector< Number > &dst, const LinearAlgebra::CUDAWrappers::Vector< Number > &src) const
std::tuple< Number *, int *, int *, cusparseMatDescr_t, cusparseSpMatDescr_t > get_cusparse_matrix() const
void Tvmult_add(LinearAlgebra::CUDAWrappers::Vector< Number > &dst, const LinearAlgebra::CUDAWrappers::Vector< Number > &src) const
void Tvmult(LinearAlgebra::CUDAWrappers::Vector< Number > &dst, const LinearAlgebra::CUDAWrappers::Vector< Number > &src) const
void reinit(Utilities::CUDA::Handle &handle, const ::SparseMatrix< Number > &sparse_matrix_host)
Number frobenius_norm() const
Number matrix_norm_square(const LinearAlgebra::CUDAWrappers::Vector< Number > &v) const
SparseMatrix & operator*=(const Number factor)
Number residual(LinearAlgebra::CUDAWrappers::Vector< Number > &dst, const LinearAlgebra::CUDAWrappers::Vector< Number > &x, const LinearAlgebra::CUDAWrappers::Vector< Number > &b) const
SparseMatrix & operator/=(const Number factor)
SparseMatrix & operator=(CUDAWrappers::SparseMatrix< Number > &&)
void vmult(LinearAlgebra::CUDAWrappers::Vector< Number > &dst, const LinearAlgebra::CUDAWrappers::Vector< Number > &src) const
Number linfty_norm() const
Number matrix_scalar_product(const LinearAlgebra::CUDAWrappers::Vector< Number > &u, const LinearAlgebra::CUDAWrappers::Vector< Number > &v) const
virtual void sadd(const Number s, const Number a, const VectorSpaceVector< Number > &V) override
virtual real_type l2_norm() const override
Number * get_values() const
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
#define AssertCusparse(error_code)
#define AssertCudaKernel()
static ::ExceptionBase & ExcZero()
#define Assert(cond, exc)
#define AssertIsFinite(number)
#define AssertNothrowCusparse(error_code)
#define AssertCuda(error_code)
void create_sp_mat_descr(int m, int n, int nnz, const float *A_val_dev, const int *A_row_ptr_dev, const int *A_column_index_dev, cusparseSpMatDescr_t &sp_descr)
__global__ void linfty_norm(const typename SparseMatrix< Number >::size_type n_rows, const Number *val_dev, const int *, const int *row_ptr_dev, Number *sums)
__global__ void l1_norm(const typename SparseMatrix< Number >::size_type n_rows, const Number *val_dev, const int *column_index_dev, const int *row_ptr_dev, Number *sums)
__global__ void scale(Number *val, const Number a, const typename SparseMatrix< Number >::size_type N)
void csrmv(cusparseHandle_t handle, bool transpose, int m, int n, const cusparseSpMatDescr_t sp_descr, const float *x, bool add, float *y)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
cusparseHandle_t cusparse_handle