Actual source code: bvtensor.c
slepc-3.16.0 2021-09-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Tensor BV that is represented in compact form as V = (I otimes U) S
12: */
14: #include <slepc/private/bvimpl.h>
15: #include <slepcblaslapack.h>
17: typedef struct {
18: BV U; /* first factor */
19: Mat S; /* second factor */
20: PetscScalar *qB; /* auxiliary matrix used in non-standard inner products */
21: PetscScalar *sw; /* work space */
22: PetscInt d; /* degree of the tensor BV */
23: PetscInt ld; /* leading dimension of a single block in S */
24: PetscInt puk; /* copy of the k value */
25: Vec u; /* auxiliary work vector */
26: } BV_TENSOR;
28: PetscErrorCode BVMultInPlace_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
29: {
30: PetscErrorCode ierr;
31: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
32: PetscScalar *pS;
33: const PetscScalar *q;
34: PetscInt ldq,lds = ctx->ld*ctx->d;
37: MatGetSize(Q,&ldq,NULL);
38: MatDenseGetArray(ctx->S,&pS);
39: MatDenseGetArrayRead(Q,&q);
40: BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,ldq,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,q+V->l*ldq+V->l,PETSC_FALSE);
41: MatDenseRestoreArrayRead(Q,&q);
42: MatDenseRestoreArray(ctx->S,&pS);
43: return(0);
44: }
46: PetscErrorCode BVMultInPlaceHermitianTranspose_Tensor(BV V,Mat Q,PetscInt s,PetscInt e)
47: {
48: PetscErrorCode ierr;
49: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
50: PetscScalar *pS;
51: const PetscScalar *q;
52: PetscInt ldq,lds = ctx->ld*ctx->d;
55: MatGetSize(Q,&ldq,NULL);
56: MatDenseGetArray(ctx->S,&pS);
57: MatDenseGetArrayRead(Q,&q);
58: BVMultInPlace_BLAS_Private(V,lds,V->k-V->l,ldq,s-V->l,e-V->l,pS+(V->nc+V->l)*lds,q+V->l*ldq+V->l,PETSC_TRUE);
59: MatDenseRestoreArrayRead(Q,&q);
60: MatDenseRestoreArray(ctx->S,&pS);
61: return(0);
62: }
64: PetscErrorCode BVDot_Tensor(BV X,BV Y,Mat M)
65: {
67: BV_TENSOR *x = (BV_TENSOR*)X->data,*y = (BV_TENSOR*)Y->data;
68: PetscScalar *m;
69: const PetscScalar *px,*py;
70: PetscInt ldm,lds = x->ld*x->d;
73: if (x->U!=y->U) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_SUP,"BVDot() in BVTENSOR requires that both operands have the same U factor");
74: if (lds!=y->ld*y->d) SETERRQ2(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_SIZ,"Mismatching dimensions ld*d %D %D",lds,y->ld*y->d);
75: MatGetSize(M,&ldm,NULL);
76: MatDenseGetArrayRead(x->S,&px);
77: MatDenseGetArrayRead(y->S,&py);
78: MatDenseGetArray(M,&m);
79: BVDot_BLAS_Private(X,Y->k-Y->l,X->k-X->l,lds,ldm,py+(Y->nc+Y->l)*lds,px+(X->nc+X->l)*lds,m+X->l*ldm+Y->l,PETSC_FALSE);
80: MatDenseRestoreArray(M,&m);
81: MatDenseRestoreArrayRead(x->S,&px);
82: MatDenseRestoreArrayRead(y->S,&py);
83: return(0);
84: }
86: PetscErrorCode BVScale_Tensor(BV bv,PetscInt j,PetscScalar alpha)
87: {
89: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
90: PetscScalar *pS;
91: PetscInt lds = ctx->ld*ctx->d;
94: MatDenseGetArray(ctx->S,&pS);
95: if (j<0) {
96: BVScale_BLAS_Private(bv,(bv->k-bv->l)*lds,pS+(bv->nc+bv->l)*lds,alpha);
97: } else {
98: BVScale_BLAS_Private(bv,lds,pS+(bv->nc+j)*lds,alpha);
99: }
100: MatDenseRestoreArray(ctx->S,&pS);
101: return(0);
102: }
104: PetscErrorCode BVNorm_Tensor(BV bv,PetscInt j,NormType type,PetscReal *val)
105: {
106: PetscErrorCode ierr;
107: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
108: const PetscScalar *pS;
109: PetscInt lds = ctx->ld*ctx->d;
112: MatDenseGetArrayRead(ctx->S,&pS);
113: if (j<0) {
114: BVNorm_LAPACK_Private(bv,lds,bv->k-bv->l,pS+(bv->nc+bv->l)*lds,type,val,PETSC_FALSE);
115: } else {
116: BVNorm_LAPACK_Private(bv,lds,1,pS+(bv->nc+j)*lds,type,val,PETSC_FALSE);
117: }
118: MatDenseRestoreArrayRead(ctx->S,&pS);
119: return(0);
120: }
122: PetscErrorCode BVCopyColumn_Tensor(BV V,PetscInt j,PetscInt i)
123: {
125: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
126: PetscScalar *pS;
127: PetscInt lds = ctx->ld*ctx->d;
130: MatDenseGetArray(ctx->S,&pS);
131: PetscArraycpy(pS+(V->nc+i)*lds,pS+(V->nc+j)*lds,lds);
132: MatDenseRestoreArray(ctx->S,&pS);
133: return(0);
134: }
136: static PetscErrorCode BVTensorNormColumn(BV bv,PetscInt j,PetscReal *norm)
137: {
138: PetscErrorCode ierr;
139: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
140: PetscBLASInt one=1,lds_;
141: PetscScalar sone=1.0,szero=0.0,*x,dot;
142: const PetscScalar *S;
143: PetscReal alpha=1.0,scale=0.0,aval;
144: PetscInt i,lds,ld=ctx->ld;
147: lds = ld*ctx->d;
148: MatDenseGetArrayRead(ctx->S,&S);
149: PetscBLASIntCast(lds,&lds_);
150: if (ctx->qB) {
151: x = ctx->sw;
152: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,S+j*lds,&one,&szero,x,&one));
153: dot = PetscRealPart(BLASdot_(&lds_,S+j*lds,&one,x,&one));
154: BV_SafeSqrt(bv,dot,norm);
155: } else {
156: /* Compute *norm = BLASnrm2_(&lds_,S+j*lds,&one); */
157: if (lds==1) *norm = PetscAbsScalar(S[j*lds]);
158: else {
159: for (i=0;i<lds;i++) {
160: aval = PetscAbsScalar(S[i+j*lds]);
161: if (aval!=0.0) {
162: if (scale<aval) {
163: alpha = 1.0 + alpha*PetscSqr(scale/aval);
164: scale = aval;
165: } else alpha += PetscSqr(aval/scale);
166: }
167: }
168: *norm = scale*PetscSqrtReal(alpha);
169: }
170: }
171: MatDenseRestoreArrayRead(ctx->S,&S);
172: return(0);
173: }
175: PetscErrorCode BVOrthogonalizeGS1_Tensor(BV bv,PetscInt k,Vec v,PetscBool *which,PetscScalar *h,PetscScalar *c,PetscReal *onorm,PetscReal *norm)
176: {
177: PetscErrorCode ierr;
178: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
179: PetscScalar *pS,*cc,*x,dot,sonem=-1.0,sone=1.0,szero=0.0;
180: PetscInt i,lds = ctx->ld*ctx->d;
181: PetscBLASInt lds_,k_,one=1;
182: const PetscScalar *omega;
185: if (v) SETERRQ(PetscObjectComm((PetscObject)bv),PETSC_ERR_SUP,"Orthogonalization against an external vector is not allowed in BVTENSOR");
186: MatDenseGetArray(ctx->S,&pS);
187: if (!c) {
188: VecGetArray(bv->buffer,&cc);
189: } else cc = c;
190: PetscBLASIntCast(lds,&lds_);
191: PetscBLASIntCast(k,&k_);
193: if (onorm) { BVTensorNormColumn(bv,k,onorm); }
195: if (ctx->qB) x = ctx->sw;
196: else x = pS+k*lds;
198: if (bv->orthog_type==BV_ORTHOG_MGS) { /* modified Gram-Schmidt */
200: if (bv->indef) { /* signature */
201: VecGetArrayRead(bv->omega,&omega);
202: }
203: for (i=-bv->nc;i<k;i++) {
204: if (which && i>=0 && !which[i]) continue;
205: if (ctx->qB) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
206: /* c_i = (s_k, s_i) */
207: dot = PetscRealPart(BLASdot_(&lds_,pS+i*lds,&one,x,&one));
208: if (bv->indef) dot /= PetscRealPart(omega[i]);
209: BV_SetValue(bv,i,0,cc,dot);
210: /* s_k = s_k - c_i s_i */
211: dot = -dot;
212: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&lds_,&dot,pS+i*lds,&one,pS+k*lds,&one));
213: }
214: if (bv->indef) {
215: VecRestoreArrayRead(bv->omega,&omega);
216: }
218: } else { /* classical Gram-Schmidt */
219: if (ctx->qB) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&lds_,&sone,ctx->qB,&lds_,pS+k*lds,&one,&szero,x,&one));
221: /* cc = S_{0:k-1}^* s_k */
222: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&lds_,&k_,&sone,pS,&lds_,x,&one,&szero,cc,&one));
224: /* s_k = s_k - S_{0:k-1} cc */
225: if (bv->indef) { BV_ApplySignature(bv,k,cc,PETSC_TRUE); }
226: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&lds_,&k_,&sonem,pS,&lds_,cc,&one,&sone,pS+k*lds,&one));
227: if (bv->indef) { BV_ApplySignature(bv,k,cc,PETSC_FALSE); }
228: }
230: if (norm) { BVTensorNormColumn(bv,k,norm); }
231: BV_AddCoefficients(bv,k,h,cc);
232: MatDenseRestoreArray(ctx->S,&pS);
233: VecRestoreArray(bv->buffer,&cc);
234: return(0);
235: }
237: PetscErrorCode BVView_Tensor(BV bv,PetscViewer viewer)
238: {
239: PetscErrorCode ierr;
240: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
241: PetscViewerFormat format;
242: PetscBool isascii;
243: const char *bvname,*uname,*sname;
246: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
247: if (isascii) {
248: PetscViewerGetFormat(viewer,&format);
249: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
250: PetscViewerASCIIPrintf(viewer,"number of tensor blocks (degree): %D\n",ctx->d);
251: PetscViewerASCIIPrintf(viewer,"number of columns of U factor: %D\n",ctx->ld);
252: return(0);
253: }
254: BVView(ctx->U,viewer);
255: MatView(ctx->S,viewer);
256: if (format == PETSC_VIEWER_ASCII_MATLAB) {
257: PetscObjectGetName((PetscObject)bv,&bvname);
258: PetscObjectGetName((PetscObject)ctx->U,&uname);
259: PetscObjectGetName((PetscObject)ctx->S,&sname);
260: PetscViewerASCIIPrintf(viewer,"%s=kron(eye(%D),%s)*%s(:,1:%D);\n",bvname,ctx->d,uname,sname,bv->k);
261: }
262: } else {
263: BVView(ctx->U,viewer);
264: MatView(ctx->S,viewer);
265: }
266: return(0);
267: }
269: static PetscErrorCode BVTensorUpdateMatrix(BV V,PetscInt ini,PetscInt end)
270: {
272: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
273: PetscInt i,j,r,c,l,k,ld=ctx->ld,lds=ctx->d*ctx->ld;
274: PetscScalar *qB,*sqB;
275: Vec u;
276: Mat A;
279: if (!V->matrix) return(0);
280: l = ctx->U->l; k = ctx->U->k;
281: /* update inner product matrix */
282: if (!ctx->qB) {
283: PetscCalloc2(lds*lds,&ctx->qB,lds,&ctx->sw);
284: VecDuplicate(ctx->U->t,&ctx->u);
285: }
286: ctx->U->l = 0;
287: for (r=0;r<ctx->d;r++) {
288: for (c=0;c<=r;c++) {
289: MatNestGetSubMat(V->matrix,r,c,&A);
290: if (A) {
291: qB = ctx->qB+c*ld*lds+r*ld;
292: for (i=ini;i<end;i++) {
293: BVGetColumn(ctx->U,i,&u);
294: MatMult(A,u,ctx->u);
295: ctx->U->k = i+1;
296: BVDotVec(ctx->U,ctx->u,qB+i*lds);
297: BVRestoreColumn(ctx->U,i,&u);
298: for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
299: qB[i*lds+i] = PetscRealPart(qB[i+i*lds]);
300: }
301: if (c!=r) {
302: sqB = ctx->qB+r*ld*lds+c*ld;
303: for (i=ini;i<end;i++) for (j=0;j<=i;j++) {
304: sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
305: sqB[j+i*lds] = qB[j+i*lds];
306: }
307: }
308: }
309: }
310: }
311: ctx->U->l = l; ctx->U->k = k;
312: return(0);
313: }
315: static PetscErrorCode BVTensorBuildFirstColumn_Tensor(BV V,PetscInt k)
316: {
318: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
319: PetscInt i,nq=0;
320: PetscScalar *pS,*omega;
321: PetscReal norm;
322: PetscBool breakdown=PETSC_FALSE;
325: MatDenseGetArray(ctx->S,&pS);
326: for (i=0;i<ctx->d;i++) {
327: if (i>=k) {
328: BVSetRandomColumn(ctx->U,nq);
329: } else {
330: BVCopyColumn(ctx->U,i,nq);
331: }
332: BVOrthogonalizeColumn(ctx->U,nq,pS+i*ctx->ld,&norm,&breakdown);
333: if (!breakdown) {
334: BVScaleColumn(ctx->U,nq,1.0/norm);
335: pS[nq+i*ctx->ld] = norm;
336: nq++;
337: }
338: }
339: MatDenseRestoreArray(ctx->S,&pS);
340: if (!nq) SETERRQ1(PetscObjectComm((PetscObject)V),1,"Cannot build first column of tensor BV; U should contain k=%D nonzero columns",k);
341: BVTensorUpdateMatrix(V,0,nq);
342: BVTensorNormColumn(V,0,&norm);
343: BVScale_Tensor(V,0,1.0/norm);
344: if (V->indef) {
345: BV_AllocateSignature(V);
346: VecGetArray(V->omega,&omega);
347: omega[0] = (norm<0.0)? -1.0: 1.0;
348: VecRestoreArray(V->omega,&omega);
349: }
350: /* set active columns */
351: ctx->U->l = 0;
352: ctx->U->k = nq;
353: return(0);
354: }
356: /*@
357: BVTensorBuildFirstColumn - Builds the first column of the tensor basis vectors
358: V from the data contained in the first k columns of U.
360: Collective on V
362: Input Parameters:
363: + V - the basis vectors context
364: - k - the number of columns of U with relevant information
366: Notes:
367: At most d columns are considered, where d is the degree of the tensor BV.
368: Given V = (I otimes U) S, this function computes the first column of V, that
369: is, it computes the coefficients of the first column of S by orthogonalizing
370: the first d columns of U. If k is less than d (or linearly dependent columns
371: are found) then additional random columns are used.
373: The computed column has unit norm.
375: Level: advanced
377: .seealso: BVCreateTensor()
378: @*/
379: PetscErrorCode BVTensorBuildFirstColumn(BV V,PetscInt k)
380: {
386: PetscUseMethod(V,"BVTensorBuildFirstColumn_C",(BV,PetscInt),(V,k));
387: return(0);
388: }
390: static PetscErrorCode BVTensorCompress_Tensor(BV V,PetscInt newc)
391: {
393: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
394: PetscInt nwu=0,nnc,nrow,lwa,r,c;
395: PetscInt i,j,k,n,lds=ctx->ld*ctx->d,deg=ctx->d,lock,cs1=V->k,rs1=ctx->U->k,rk=0,offu;
396: PetscScalar *S,*M,*Z,*pQ,*SS,*SS2,t,sone=1.0,zero=0.0,mone=-1.0,*p,*tau,*work,*qB,*sqB;
397: PetscReal *sg,tol,*rwork;
398: PetscBLASInt ld_,cs1_,rs1_,cs1tdeg,n_,info,lw_,newc_,newctdeg,nnc_,nrow_,nnctdeg,lds_,rk_;
399: Mat Q,A;
402: if (!cs1) return(0);
403: lwa = 6*ctx->ld*lds+2*cs1;
404: n = PetscMin(rs1,deg*cs1);
405: lock = ctx->U->l;
406: nnc = cs1-lock-newc;
407: nrow = rs1-lock;
408: PetscCalloc6(deg*newc*nnc,&SS,newc*nnc,&SS2,(rs1+lock+newc)*n,&pQ,deg*rs1,&tau,lwa,&work,6*n,&rwork);
409: offu = lock*(rs1+1);
410: M = work+nwu;
411: nwu += rs1*cs1*deg;
412: sg = rwork;
413: Z = work+nwu;
414: nwu += deg*cs1*n;
415: PetscBLASIntCast(n,&n_);
416: PetscBLASIntCast(nnc,&nnc_);
417: PetscBLASIntCast(cs1,&cs1_);
418: PetscBLASIntCast(rs1,&rs1_);
419: PetscBLASIntCast(newc,&newc_);
420: PetscBLASIntCast(newc*deg,&newctdeg);
421: PetscBLASIntCast(nnc*deg,&nnctdeg);
422: PetscBLASIntCast(cs1*deg,&cs1tdeg);
423: PetscBLASIntCast(lwa-nwu,&lw_);
424: PetscBLASIntCast(nrow,&nrow_);
425: PetscBLASIntCast(lds,&lds_);
426: MatDenseGetArray(ctx->S,&S);
428: if (newc>0) {
429: /* truncate columns associated with new converged eigenpairs */
430: for (j=0;j<deg;j++) {
431: for (i=lock;i<lock+newc;i++) {
432: PetscArraycpy(M+(i-lock+j*newc)*nrow,S+i*lds+j*ctx->ld+lock,nrow);
433: }
434: }
435: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
436: #if !defined (PETSC_USE_COMPLEX)
437: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,&info));
438: #else
439: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&newctdeg,M,&nrow_,sg,pQ+offu,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
440: #endif
441: SlepcCheckLapackInfo("gesvd",info);
442: PetscFPTrapPop();
443: /* SVD has rank min(newc,nrow) */
444: rk = PetscMin(newc,nrow);
445: for (i=0;i<rk;i++) {
446: t = sg[i];
447: PetscStackCallBLAS("BLASscal",BLASscal_(&newctdeg,&t,Z+i,&n_));
448: }
449: for (i=0;i<deg;i++) {
450: for (j=lock;j<lock+newc;j++) {
451: PetscArraycpy(S+j*lds+i*ctx->ld+lock,Z+(newc*i+j-lock)*n,rk);
452: PetscArrayzero(S+j*lds+i*ctx->ld+lock+rk,(ctx->ld-lock-rk));
453: }
454: }
455: /*
456: update columns associated with non-converged vectors, orthogonalize
457: against pQ so that next M has rank nnc+d-1 insted of nrow+d-1
458: */
459: for (i=0;i<deg;i++) {
460: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS+i*newc*nnc,&newc_));
461: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS+i*newc*nnc,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
462: /* repeat orthogonalization step */
463: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&newc_,&nnc_,&nrow_,&sone,pQ+offu,&rs1_,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_,&zero,SS2,&newc_));
464: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nrow_,&nnc_,&newc_,&mone,pQ+offu,&rs1_,SS2,&newc_,&sone,S+(lock+newc)*lds+i*ctx->ld+lock,&lds_));
465: for (j=0;j<newc*nnc;j++) *(SS+i*newc*nnc+j) += SS2[j];
466: }
467: }
469: /* truncate columns associated with non-converged eigenpairs */
470: for (j=0;j<deg;j++) {
471: for (i=lock+newc;i<cs1;i++) {
472: PetscArraycpy(M+(i-lock-newc+j*nnc)*nrow,S+i*lds+j*ctx->ld+lock,nrow);
473: }
474: }
475: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
476: #if !defined (PETSC_USE_COMPLEX)
477: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,&info));
478: #else
479: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&nrow_,&nnctdeg,M,&nrow_,sg,pQ+offu+newc*rs1,&rs1_,Z,&n_,work+nwu,&lw_,rwork+n,&info));
480: #endif
481: SlepcCheckLapackInfo("gesvd",info);
482: PetscFPTrapPop();
483: tol = PetscMax(rs1,deg*cs1)*PETSC_MACHINE_EPSILON*sg[0];
484: rk = 0;
485: for (i=0;i<PetscMin(nrow,nnctdeg);i++) if (sg[i]>tol) rk++;
486: rk = PetscMin(nnc+deg-1,rk);
487: /* the SVD has rank (at most) nnc+deg-1 */
488: for (i=0;i<rk;i++) {
489: t = sg[i];
490: PetscStackCallBLAS("BLASscal",BLASscal_(&nnctdeg,&t,Z+i,&n_));
491: }
492: /* update S */
493: PetscArrayzero(S+cs1*lds,(V->m-cs1)*lds);
494: k = ctx->ld-lock-newc-rk;
495: for (i=0;i<deg;i++) {
496: for (j=lock+newc;j<cs1;j++) {
497: PetscArraycpy(S+j*lds+i*ctx->ld+lock+newc,Z+(nnc*i+j-lock-newc)*n,rk);
498: PetscArrayzero(S+j*lds+i*ctx->ld+lock+newc+rk,k);
499: }
500: }
501: if (newc>0) {
502: for (i=0;i<deg;i++) {
503: p = SS+nnc*newc*i;
504: for (j=lock+newc;j<cs1;j++) {
505: for (k=0;k<newc;k++) S[j*lds+i*ctx->ld+lock+k] = *(p++);
506: }
507: }
508: }
510: /* orthogonalize pQ */
511: rk = rk+newc;
512: PetscBLASIntCast(rk,&rk_);
513: PetscBLASIntCast(cs1-lock,&nnc_);
514: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
515: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&nrow_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
516: SlepcCheckLapackInfo("geqrf",info);
517: for (i=0;i<deg;i++) {
518: PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&rk_,&nnc_,&sone,pQ+offu,&rs1_,S+lock*lds+lock+i*ctx->ld,&lds_));
519: }
520: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&nrow_,&rk_,&rk_,pQ+offu,&rs1_,tau,work+nwu,&lw_,&info));
521: SlepcCheckLapackInfo("orgqr",info);
522: PetscFPTrapPop();
524: /* update vectors U(:,idx) = U*Q(:,idx) */
525: rk = rk+lock;
526: for (i=0;i<lock;i++) pQ[i*(1+rs1)] = 1.0;
527: MatCreateSeqDense(PETSC_COMM_SELF,rs1,rk,pQ,&Q);
528: ctx->U->k = rs1;
529: BVMultInPlace(ctx->U,Q,lock,rk);
530: MatDestroy(&Q);
532: if (ctx->qB) {
533: /* update matrix qB */
534: PetscBLASIntCast(ctx->ld,&ld_);
535: PetscBLASIntCast(rk,&rk_);
536: for (r=0;r<ctx->d;r++) {
537: for (c=0;c<=r;c++) {
538: MatNestGetSubMat(V->matrix,r,c,&A);
539: if (A) {
540: qB = ctx->qB+r*ctx->ld+c*ctx->ld*lds;
541: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&rs1_,&rk_,&rs1_,&sone,qB,&lds_,pQ,&rs1_,&zero,work+nwu,&rs1_));
542: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&rk_,&rk_,&rs1_,&sone,pQ,&rs1_,work+nwu,&rs1_,&zero,qB,&lds_));
543: for (i=0;i<rk;i++) {
544: for (j=0;j<i;j++) qB[i+j*lds] = PetscConj(qB[j+i*lds]);
545: qB[i+i*lds] = PetscRealPart(qB[i+i*lds]);
546: }
547: for (i=rk;i<ctx->ld;i++) {
548: PetscArrayzero(qB+i*lds,ctx->ld);
549: }
550: for (i=0;i<rk;i++) {
551: PetscArrayzero(qB+i*lds+rk,(ctx->ld-rk));
552: }
553: if (c!=r) {
554: sqB = ctx->qB+r*ctx->ld*lds+c*ctx->ld;
555: for (i=0;i<ctx->ld;i++) for (j=0;j<ctx->ld;j++) sqB[i+j*lds] = PetscConj(qB[j+i*lds]);
556: }
557: }
558: }
559: }
560: }
562: /* free work space */
563: PetscFree6(SS,SS2,pQ,tau,work,rwork);
564: MatDenseRestoreArray(ctx->S,&S);
566: /* set active columns */
567: if (newc) ctx->U->l += newc;
568: ctx->U->k = rk;
569: return(0);
570: }
572: /*@
573: BVTensorCompress - Updates the U and S factors of the tensor basis vectors
574: object V by means of an SVD, removing redundant information.
576: Collective on V
578: Input Parameters:
579: + V - the tensor basis vectors context
580: - newc - additional columns to be locked
582: Notes:
583: This function is typically used when restarting Krylov solvers. Truncating a
584: tensor BV V = (I otimes U) S to its leading columns amounts to keeping the
585: leading columns of S. However, to effectively reduce the size of the
586: decomposition, it is necessary to compress it in a way that fewer columns of
587: U are employed. This can be achieved by means of an update that involves the
588: SVD of the low-rank matrix [S_0 S_1 ... S_{d-1}], where S_i are the pieces of S.
590: If newc is nonzero, then newc columns are added to the leading columns of V.
591: This means that the corresponding columns of the U and S factors will remain
592: invariant in subsequent operations.
594: Level: advanced
596: .seealso: BVCreateTensor(), BVSetActiveColumns()
597: @*/
598: PetscErrorCode BVTensorCompress(BV V,PetscInt newc)
599: {
605: PetscUseMethod(V,"BVTensorCompress_C",(BV,PetscInt),(V,newc));
606: return(0);
607: }
609: static PetscErrorCode BVTensorGetDegree_Tensor(BV bv,PetscInt *d)
610: {
611: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
614: *d = ctx->d;
615: return(0);
616: }
618: /*@
619: BVTensorGetDegree - Returns the number of blocks (degree) of the tensor BV.
621: Not collective
623: Input Parameter:
624: . bv - the basis vectors context
626: Output Parameter:
627: . d - the degree
629: Level: advanced
631: .seealso: BVCreateTensor()
632: @*/
633: PetscErrorCode BVTensorGetDegree(BV bv,PetscInt *d)
634: {
640: PetscUseMethod(bv,"BVTensorGetDegree_C",(BV,PetscInt*),(bv,d));
641: return(0);
642: }
644: static PetscErrorCode BVTensorGetFactors_Tensor(BV V,BV *U,Mat *S)
645: {
646: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
649: if (ctx->puk>-1) SETERRQ(PetscObjectComm((PetscObject)V),PETSC_ERR_ORDER,"Previous call to BVTensonGetFactors without a BVTensorRestoreFactors call");
650: ctx->puk = ctx->U->k;
651: if (U) *U = ctx->U;
652: if (S) *S = ctx->S;
653: return(0);
654: }
656: /*@C
657: BVTensorGetFactors - Returns the two factors involved in the definition of the
658: tensor basis vectors object, V = (I otimes U) S.
660: Logically Collective on V
662: Input Parameter:
663: . V - the basis vectors context
665: Output Parameters:
666: + U - the BV factor
667: - S - the Mat factor
669: Notes:
670: The returned factors are references (not copies) of the internal factors,
671: so modifying them will change the tensor BV as well. Some operations of the
672: tensor BV assume that U has orthonormal columns, so if the user modifies U
673: this restriction must be taken into account.
675: The returned factors must not be destroyed. BVTensorRestoreFactors() must
676: be called when they are no longer needed.
678: Pass a NULL vector for any of the arguments that is not needed.
680: Level: advanced
682: .seealso: BVTensorRestoreFactors()
683: @*/
684: PetscErrorCode BVTensorGetFactors(BV V,BV *U,Mat *S)
685: {
690: PetscUseMethod(V,"BVTensorGetFactors_C",(BV,BV*,Mat*),(V,U,S));
691: return(0);
692: }
694: static PetscErrorCode BVTensorRestoreFactors_Tensor(BV V,BV *U,Mat *S)
695: {
697: BV_TENSOR *ctx = (BV_TENSOR*)V->data;
700: PetscObjectStateIncrease((PetscObject)V);
701: if (U) *U = NULL;
702: if (S) *S = NULL;
703: BVTensorUpdateMatrix(V,ctx->puk,ctx->U->k);
704: ctx->puk = -1;
705: return(0);
706: }
708: /*@C
709: BVTensorRestoreFactors - Restore the two factors that were obtained with
710: BVTensorGetFactors().
712: Logically Collective on V
714: Input Parameters:
715: + V - the basis vectors context
716: . U - the BV factor (or NULL)
717: - S - the Mat factor (or NULL)
719: Nots:
720: The arguments must match the corresponding call to BVTensorGetFactors().
722: Level: advanced
724: .seealso: BVTensorGetFactors()
725: @*/
726: PetscErrorCode BVTensorRestoreFactors(BV V,BV *U,Mat *S)
727: {
734: PetscUseMethod(V,"BVTensorRestoreFactors_C",(BV,BV*,Mat*),(V,U,S));
735: return(0);
736: }
738: PetscErrorCode BVDestroy_Tensor(BV bv)
739: {
741: BV_TENSOR *ctx = (BV_TENSOR*)bv->data;
744: BVDestroy(&ctx->U);
745: MatDestroy(&ctx->S);
746: if (ctx->u) {
747: PetscFree2(ctx->qB,ctx->sw);
748: VecDestroy(&ctx->u);
749: }
750: PetscFree(bv->data);
751: PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",NULL);
752: PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",NULL);
753: PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",NULL);
754: PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",NULL);
755: PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",NULL);
756: return(0);
757: }
759: SLEPC_EXTERN PetscErrorCode BVCreate_Tensor(BV bv)
760: {
762: BV_TENSOR *ctx;
765: PetscNewLog(bv,&ctx);
766: bv->data = (void*)ctx;
767: ctx->puk = -1;
769: bv->ops->multinplace = BVMultInPlace_Tensor;
770: bv->ops->multinplacetrans = BVMultInPlaceHermitianTranspose_Tensor;
771: bv->ops->dot = BVDot_Tensor;
772: bv->ops->scale = BVScale_Tensor;
773: bv->ops->norm = BVNorm_Tensor;
774: bv->ops->copycolumn = BVCopyColumn_Tensor;
775: bv->ops->gramschmidt = BVOrthogonalizeGS1_Tensor;
776: bv->ops->destroy = BVDestroy_Tensor;
777: bv->ops->view = BVView_Tensor;
779: PetscObjectComposeFunction((PetscObject)bv,"BVTensorBuildFirstColumn_C",BVTensorBuildFirstColumn_Tensor);
780: PetscObjectComposeFunction((PetscObject)bv,"BVTensorCompress_C",BVTensorCompress_Tensor);
781: PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetDegree_C",BVTensorGetDegree_Tensor);
782: PetscObjectComposeFunction((PetscObject)bv,"BVTensorGetFactors_C",BVTensorGetFactors_Tensor);
783: PetscObjectComposeFunction((PetscObject)bv,"BVTensorRestoreFactors_C",BVTensorRestoreFactors_Tensor);
784: return(0);
785: }
787: /*@
788: BVCreateTensor - Creates a tensor BV that is represented in compact form
789: as V = (I otimes U) S, where U has orthonormal columns.
791: Collective on U
793: Input Parameters:
794: + U - a basis vectors object
795: - d - the number of blocks (degree) of the tensor BV
797: Output Parameter:
798: . V - the new basis vectors context
800: Notes:
801: The new basis vectors object is V = (I otimes U) S, where otimes denotes
802: the Kronecker product, I is the identity matrix of order d, and S is a
803: sequential matrix allocated internally. This compact representation is
804: used e.g. to represent the Krylov basis generated with the linearization
805: of a matrix polynomial of degree d.
807: The size of V (number of rows) is equal to d times n, where n is the size
808: of U. The dimensions of S are d times m rows and m-d+1 columns, where m is
809: the number of columns of U, so m should be at least d.
811: The communicator of V will be the same as U.
813: On input, the content of U is irrelevant. Alternatively, it may contain
814: some nonzero columns that will be used by BVTensorBuildFirstColumn().
816: Level: advanced
818: .seealso: BVTensorGetDegree(), BVTensorGetFactors(), BVTensorBuildFirstColumn()
819: @*/
820: PetscErrorCode BVCreateTensor(BV U,PetscInt d,BV *V)
821: {
823: PetscBool match;
824: PetscInt n,N,m;
825: BV_TENSOR *ctx;
830: PetscObjectTypeCompare((PetscObject)U,BVTENSOR,&match);
831: if (match) SETERRQ(PetscObjectComm((PetscObject)U),PETSC_ERR_SUP,"U cannot be of type tensor");
833: BVCreate(PetscObjectComm((PetscObject)U),V);
834: BVGetSizes(U,&n,&N,&m);
835: if (m<d) SETERRQ2(PetscObjectComm((PetscObject)U),PETSC_ERR_ARG_SIZ,"U has %D columns, it should have at least d=%D",m,d);
836: BVSetSizes(*V,d*n,d*N,m-d+1);
837: PetscObjectChangeTypeName((PetscObject)*V,BVTENSOR);
838: PetscLogEventBegin(BV_Create,*V,0,0,0);
839: BVCreate_Tensor(*V);
840: PetscLogEventEnd(BV_Create,*V,0,0,0);
842: ctx = (BV_TENSOR*)(*V)->data;
843: ctx->U = U;
844: ctx->d = d;
845: ctx->ld = m;
846: PetscObjectReference((PetscObject)U);
847: PetscLogObjectParent((PetscObject)*V,(PetscObject)U);
848: MatCreateSeqDense(PETSC_COMM_SELF,d*m,m-d+1,NULL,&ctx->S);
849: PetscLogObjectParent((PetscObject)*V,(PetscObject)ctx->S);
850: PetscObjectSetName((PetscObject)ctx->S,"S");
852: /* Copy user-provided attributes of U */
853: (*V)->orthog_type = U->orthog_type;
854: (*V)->orthog_ref = U->orthog_ref;
855: (*V)->orthog_eta = U->orthog_eta;
856: (*V)->orthog_block = U->orthog_block;
857: (*V)->vmm = U->vmm;
858: (*V)->rrandom = U->rrandom;
859: return(0);
860: }