Actual source code: fninvsqrt.c
slepc-3.17.2 2022-08-09
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Inverse square root function x^(-1/2)
12: */
14: #include <slepc/private/fnimpl.h>
15: #include <slepcblaslapack.h>
17: PetscErrorCode FNEvaluateFunction_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
18: {
20: #if !defined(PETSC_USE_COMPLEX)
22: #endif
23: *y = 1.0/PetscSqrtScalar(x);
24: PetscFunctionReturn(0);
25: }
27: PetscErrorCode FNEvaluateDerivative_Invsqrt(FN fn,PetscScalar x,PetscScalar *y)
28: {
30: #if !defined(PETSC_USE_COMPLEX)
32: #endif
33: *y = -1.0/(2.0*PetscPowScalarReal(x,1.5));
34: PetscFunctionReturn(0);
35: }
37: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Schur(FN fn,Mat A,Mat B)
38: {
39: PetscBLASInt n=0,ld,*ipiv,info;
40: PetscScalar *Ba,*Wa;
41: PetscInt m;
42: Mat W;
44: FN_AllocateWorkMat(fn,A,&W);
45: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
46: MatDenseGetArray(B,&Ba);
47: MatDenseGetArray(W,&Wa);
48: /* compute B = sqrtm(A) */
49: MatGetSize(A,&m,NULL);
50: PetscBLASIntCast(m,&n);
51: ld = n;
52: FNSqrtmSchur(fn,n,Ba,n,PETSC_FALSE);
53: /* compute B = A\B */
54: PetscMalloc1(ld,&ipiv);
55: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
56: SlepcCheckLapackInfo("gesv",info);
57: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
58: PetscFree(ipiv);
59: MatDenseRestoreArray(W,&Wa);
60: MatDenseRestoreArray(B,&Ba);
61: FN_FreeWorkMat(fn,&W);
62: PetscFunctionReturn(0);
63: }
65: PetscErrorCode FNEvaluateFunctionMatVec_Invsqrt_Schur(FN fn,Mat A,Vec v)
66: {
67: PetscBLASInt n=0,ld,*ipiv,info,one=1;
68: PetscScalar *Ba,*Wa;
69: PetscInt m;
70: Mat B,W;
72: FN_AllocateWorkMat(fn,A,&B);
73: FN_AllocateWorkMat(fn,A,&W);
74: MatDenseGetArray(B,&Ba);
75: MatDenseGetArray(W,&Wa);
76: /* compute B_1 = sqrtm(A)*e_1 */
77: MatGetSize(A,&m,NULL);
78: PetscBLASIntCast(m,&n);
79: ld = n;
80: FNSqrtmSchur(fn,n,Ba,n,PETSC_TRUE);
81: /* compute B_1 = A\B_1 */
82: PetscMalloc1(ld,&ipiv);
83: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&one,Wa,&ld,ipiv,Ba,&ld,&info));
84: SlepcCheckLapackInfo("gesv",info);
85: PetscFree(ipiv);
86: MatDenseRestoreArray(W,&Wa);
87: MatDenseRestoreArray(B,&Ba);
88: MatGetColumnVector(B,v,0);
89: FN_FreeWorkMat(fn,&W);
90: FN_FreeWorkMat(fn,&B);
91: PetscFunctionReturn(0);
92: }
94: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP(FN fn,Mat A,Mat B)
95: {
96: PetscBLASInt n=0;
97: PetscScalar *T;
98: PetscInt m;
100: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
101: MatDenseGetArray(B,&T);
102: MatGetSize(A,&m,NULL);
103: PetscBLASIntCast(m,&n);
104: FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_TRUE);
105: MatDenseRestoreArray(B,&T);
106: PetscFunctionReturn(0);
107: }
109: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS(FN fn,Mat A,Mat B)
110: {
111: PetscBLASInt n=0;
112: PetscScalar *T;
113: PetscInt m;
115: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
116: MatDenseGetArray(B,&T);
117: MatGetSize(A,&m,NULL);
118: PetscBLASIntCast(m,&n);
119: FNSqrtmNewtonSchulz(fn,n,T,n,PETSC_TRUE);
120: MatDenseRestoreArray(B,&T);
121: PetscFunctionReturn(0);
122: }
124: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi(FN fn,Mat A,Mat B)
125: {
126: PetscBLASInt n=0,ld,*ipiv,info;
127: PetscScalar *Ba,*Wa;
128: PetscInt m;
129: Mat W;
131: FN_AllocateWorkMat(fn,A,&W);
132: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
133: MatDenseGetArray(B,&Ba);
134: MatDenseGetArray(W,&Wa);
135: /* compute B = sqrtm(A) */
136: MatGetSize(A,&m,NULL);
137: PetscBLASIntCast(m,&n);
138: ld = n;
139: FNSqrtmSadeghi(fn,n,Ba,n);
140: /* compute B = A\B */
141: PetscMalloc1(ld,&ipiv);
142: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
143: SlepcCheckLapackInfo("gesv",info);
144: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
145: PetscFree(ipiv);
146: MatDenseRestoreArray(W,&Wa);
147: MatDenseRestoreArray(B,&Ba);
148: FN_FreeWorkMat(fn,&W);
149: PetscFunctionReturn(0);
150: }
152: #if defined(PETSC_HAVE_CUDA)
153: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_NS_CUDA(FN fn,Mat A,Mat B)
154: {
155: PetscBLASInt n=0;
156: PetscScalar *Ba;
157: PetscInt m;
159: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
160: MatDenseGetArray(B,&Ba);
161: MatGetSize(A,&m,NULL);
162: PetscBLASIntCast(m,&n);
163: FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_TRUE);
164: MatDenseRestoreArray(B,&Ba);
165: PetscFunctionReturn(0);
166: }
168: #if defined(PETSC_HAVE_MAGMA)
169: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
170: {
171: PetscBLASInt n=0;
172: PetscScalar *T;
173: PetscInt m;
175: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
176: MatDenseGetArray(B,&T);
177: MatGetSize(A,&m,NULL);
178: PetscBLASIntCast(m,&n);
179: FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_TRUE);
180: MatDenseRestoreArray(B,&T);
181: PetscFunctionReturn(0);
182: }
184: PetscErrorCode FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
185: {
186: PetscBLASInt n=0,ld,*ipiv,info;
187: PetscScalar *Ba,*Wa;
188: PetscInt m;
189: Mat W;
191: FN_AllocateWorkMat(fn,A,&W);
192: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
193: MatDenseGetArray(B,&Ba);
194: MatDenseGetArray(W,&Wa);
195: /* compute B = sqrtm(A) */
196: MatGetSize(A,&m,NULL);
197: PetscBLASIntCast(m,&n);
198: ld = n;
199: FNSqrtmSadeghi_CUDAm(fn,n,Ba,n);
200: /* compute B = A\B */
201: PetscMalloc1(ld,&ipiv);
202: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Wa,&ld,ipiv,Ba,&ld,&info));
203: SlepcCheckLapackInfo("gesv",info);
204: PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n*n);
205: PetscFree(ipiv);
206: MatDenseRestoreArray(W,&Wa);
207: MatDenseRestoreArray(B,&Ba);
208: FN_FreeWorkMat(fn,&W);
209: PetscFunctionReturn(0);
210: }
211: #endif /* PETSC_HAVE_MAGMA */
212: #endif /* PETSC_HAVE_CUDA */
214: PetscErrorCode FNView_Invsqrt(FN fn,PetscViewer viewer)
215: {
216: PetscBool isascii;
217: char str[50];
218: const char *methodname[] = {
219: "Schur method for inv(A)*sqrtm(A)",
220: "Denman-Beavers (product form)",
221: "Newton-Schulz iteration",
222: "Sadeghi iteration"
223: #if defined(PETSC_HAVE_CUDA)
224: ,"Newton-Schulz iteration CUDA"
225: #if defined(PETSC_HAVE_MAGMA)
226: ,"Denman-Beavers (product form) CUDA/MAGMA",
227: "Sadeghi iteration CUDA/MAGMA"
228: #endif
229: #endif
230: };
231: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
233: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
234: if (isascii) {
235: if (fn->beta==(PetscScalar)1.0) {
236: if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer," Inverse square root: x^(-1/2)\n");
237: else {
238: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
239: PetscViewerASCIIPrintf(viewer," Inverse square root: (%s*x)^(-1/2)\n",str);
240: }
241: } else {
242: SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
243: if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer," Inverse square root: %s*x^(-1/2)\n",str);
244: else {
245: PetscViewerASCIIPrintf(viewer," Inverse square root: %s",str);
246: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
247: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
248: PetscViewerASCIIPrintf(viewer,"*(%s*x)^(-1/2)\n",str);
249: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
250: }
251: }
252: if (fn->method<nmeth) PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]);
253: }
254: PetscFunctionReturn(0);
255: }
257: SLEPC_EXTERN PetscErrorCode FNCreate_Invsqrt(FN fn)
258: {
259: fn->ops->evaluatefunction = FNEvaluateFunction_Invsqrt;
260: fn->ops->evaluatederivative = FNEvaluateDerivative_Invsqrt;
261: fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Invsqrt_Schur;
262: fn->ops->evaluatefunctionmat[1] = FNEvaluateFunctionMat_Invsqrt_DBP;
263: fn->ops->evaluatefunctionmat[2] = FNEvaluateFunctionMat_Invsqrt_NS;
264: fn->ops->evaluatefunctionmat[3] = FNEvaluateFunctionMat_Invsqrt_Sadeghi;
265: #if defined(PETSC_HAVE_CUDA)
266: fn->ops->evaluatefunctionmat[4] = FNEvaluateFunctionMat_Invsqrt_NS_CUDA;
267: #if defined(PETSC_HAVE_MAGMA)
268: fn->ops->evaluatefunctionmat[5] = FNEvaluateFunctionMat_Invsqrt_DBP_CUDAm;
269: fn->ops->evaluatefunctionmat[6] = FNEvaluateFunctionMat_Invsqrt_Sadeghi_CUDAm;
270: #endif /* PETSC_HAVE_MAGMA */
271: #endif /* PETSC_HAVE_CUDA */
272: fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Invsqrt_Schur;
273: fn->ops->view = FNView_Invsqrt;
274: PetscFunctionReturn(0);
275: }