20 #ifndef OPM_FPGASOLVER_BACKEND_HEADER_INCLUDED
21 #define OPM_FPGASOLVER_BACKEND_HEADER_INCLUDED
23 #include <opm/simulators/linalg/bda/BdaSolver.hpp>
24 #include <opm/simulators/linalg/bda/FPGABILU0.hpp>
26 #include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/bicgstab_solver_config.hpp>
27 #include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/common/opencl_lib.hpp>
28 #include <linearalgebra/ilu0bicgstab/xilinx/src/sda_app/common/fpga_functions_bicgstab.hpp>
36 template <
unsigned int block_size>
46 using Base::verbosity;
48 using Base::tolerance;
49 using Base::initialized;
54 int *fromOrder =
nullptr, *toOrder =
nullptr;
55 bool analysis_done =
false;
56 bool level_scheduling =
false;
59 std::unique_ptr<BlockedMatrix> mat =
nullptr;
61 std::unique_ptr<Preconditioner> prec =
nullptr;
64 void **processedPointers =
nullptr;
65 int *processedSizes =
nullptr;
67 unsigned int fpga_calls = 0;
68 bool perf_call_enabled =
true;
72 double s_preconditioner_create = 0.0;
73 double s_analysis = 0.0;
74 double s_reorder = 0.0;
75 double s_mem_setup = 0.0;
76 double s_mem_h2d = 0.0;
77 double s_kernel_exec = 0.0;
78 unsigned int n_kernel_exec_cycles = 0;
79 float n_kernel_exec_iters = 0.0;
80 double s_mem_d2h = 0.0;
82 double s_postprocess = 0.0;
83 bool converged =
false;
84 unsigned int converged_flags = 0;
85 } perf_call_metrics_t;
88 double s_initialization;
89 double s_preconditioner_setup;
90 double s_preconditioner_create;
91 double s_preconditioner_create_min,s_preconditioner_create_max,s_preconditioner_create_avg;
93 double s_analysis_min,s_analysis_max,s_analysis_avg;
95 double s_reorder_min,s_reorder_max,s_reorder_avg;
97 double s_mem_setup_min,s_mem_setup_max,s_mem_setup_avg;
99 double s_mem_h2d_min,s_mem_h2d_max,s_mem_h2d_avg;
100 double s_kernel_exec;
101 double s_kernel_exec_min,s_kernel_exec_max,s_kernel_exec_avg;
102 unsigned long n_kernel_exec_cycles;
103 unsigned long n_kernel_exec_cycles_min,n_kernel_exec_cycles_max,n_kernel_exec_cycles_avg;
104 float n_kernel_exec_iters;
105 float n_kernel_exec_iters_min,n_kernel_exec_iters_max,n_kernel_exec_iters_avg;
107 double s_mem_d2h_min,s_mem_d2h_max,s_mem_d2h_avg;
109 double s_solve_min,s_solve_max,s_solve_avg;
110 double s_postprocess;
111 double s_postprocess_min,s_postprocess_max,s_postprocess_avg;
112 unsigned int n_converged;
113 } perf_total_metrics_t;
114 std::vector<perf_call_metrics_t> perf_call;
115 perf_total_metrics_t perf_total;
118 unsigned int fpga_config_bits = 0;
119 bool fpga_disabled =
false;
121 unsigned int debugbufferSize;
122 unsigned long int *debugBuffer =
nullptr;
123 unsigned int *databufferSize =
nullptr;
124 unsigned char *dataBuffer[RW_BUF] = {
nullptr};
125 unsigned int debug_outbuf_words;
127 int resultsBufferNum;
128 unsigned int resultsBufferSize[RES_BUF_MAX];
129 unsigned int result_offsets[6];
130 unsigned int kernel_cycles, kernel_iter_run;
132 unsigned char last_norm_idx;
133 bool kernel_aborted, kernel_signature, kernel_overflow;
134 bool kernel_noresults;
135 bool kernel_wrafterend, kernel_dbgfifofull;
136 bool use_residuals =
false;
137 bool use_LU_res =
false;
140 unsigned int abort_cycles = 2000000000;
141 unsigned int debug_sample_rate = 65535;
142 int nnzValArrays_size = 0;
143 int L_nnzValArrays_size = 0;
144 int U_nnzValArrays_size = 0;
146 long unsigned int *setupArray =
nullptr;
147 double **nnzValArrays =
nullptr;
148 short unsigned int *columnIndexArray =
nullptr;
149 unsigned char *newRowOffsetArray =
nullptr;
150 unsigned int *PIndexArray =
nullptr;
151 unsigned int *colorSizesArray =
nullptr;
152 double **L_nnzValArrays =
nullptr;
153 short unsigned int *L_columnIndexArray =
nullptr;
154 unsigned char *L_newRowOffsetArray =
nullptr;
155 unsigned int *L_PIndexArray =
nullptr;
156 unsigned int *L_colorSizesArray =
nullptr;
157 double **U_nnzValArrays =
nullptr;
158 short unsigned int *U_columnIndexArray =
nullptr;
159 unsigned char *U_newRowOffsetArray =
nullptr;
160 unsigned int *U_PIndexArray =
nullptr;
161 unsigned int *U_colorSizesArray =
nullptr;
162 double *BLKDArray =
nullptr;
163 double *X1Array =
nullptr, *X2Array =
nullptr;
164 double *R1Array =
nullptr, *R2Array =
nullptr;
165 double *LresArray =
nullptr, *UresArray =
nullptr;
166 double *resultsBuffer[RES_BUF_MAX] = {
nullptr};
168 cl_device_id device_id;
170 cl_command_queue commands;
173 cl_mem cldata[RW_BUF] = {
nullptr};
174 cl_mem cldebug =
nullptr;
176 unsigned int hw_x_vector_elem;
177 unsigned int hw_max_row_size;
178 unsigned int hw_max_column_size;
179 unsigned int hw_max_colors_size;
180 unsigned short hw_max_nnzs_per_row;
181 unsigned int hw_max_matrix_size;
183 bool hw_write_ilu0_results;
184 unsigned short hw_dma_data_width;
185 unsigned char hw_x_vector_latency;
186 unsigned char hw_add_latency;
187 unsigned char hw_mult_latency;
188 unsigned char hw_mult_num;
189 unsigned char hw_num_read_ports;
190 unsigned char hw_num_write_ports;
191 unsigned short hw_reset_cycles;
192 unsigned short hw_reset_settle;
194 bool reset_data_buffers =
false;
195 bool fill_results_buffers =
false;
196 int dump_data_buffers = 0;
197 bool dump_results =
false;
198 char *data_dir =
nullptr;
199 char *basename =
nullptr;
200 unsigned short rst_assert_cycles = 0;
201 unsigned short rst_settle_cycles = 0;
210 void initialize(
int N,
int nnz,
int dim,
double *vals,
int *rows,
int *cols);
215 void update_system(
double *vals,
double *b);
219 bool analyse_matrix();
223 bool create_preconditioner();
230 void generate_statistics(
void);
240 FpgaSolverBackend(std::string fpga_bitstream,
int linear_solver_verbosity,
int maxit,
double tolerance, ILUReorder opencl_ilu_reorder);
256 SolverStatus solve_system(
int N,
int nnz,
int dim,
double *vals,
int *rows,
int *cols,
double *b,
WellContributions& wellContribs,
BdaResult &res)
override;
This class is based on InverseOperatorResult struct from dune/istl/solver.hh It is needed to prevent ...
Definition: BdaResult.hpp:31
This class serves to simplify choosing between different backend solvers, such as cusparseSolver and ...
Definition: BdaSolver.hpp:44
This struct resembles a blocked csr matrix, like Dune::BCRSMatrix.
Definition: BlockedMatrix.hpp:37
Definition: FPGABILU0.hpp:41
This class implements an ilu0-bicgstab solver on FPGA.
Definition: FPGASolverBackend.hpp:38
FpgaSolverBackend(std::string fpga_bitstream, int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder)
Construct an fpgaSolver.
Definition: FPGASolverBackend.cpp:51
void get_result(double *x) override
Get result after linear solve, and peform postprocessing if necessary.
Definition: FPGASolverBackend.cpp:211
~FpgaSolverBackend()
Destroy an fpgaSolver, and free memory.
Definition: FPGASolverBackend.cpp:177
This class serves to eliminate the need to include the WellContributions into the matrix (with –matri...
Definition: WellContributions.hpp:53
This file contains a set of helper functions used by VFPProd / VFPInj.
Definition: BlackoilPhases.hpp:27