Actual source code: dsgsvd.c

slepc-3.18.0 2022-10-01
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: #include <slepc/private/dsimpl.h>
 11: #include <slepcblaslapack.h>

 13: typedef struct {
 14:   PetscInt m;              /* number of columns */
 15:   PetscInt p;              /* number of rows of B */
 16:   PetscInt tm;             /* number of rows of X after truncating */
 17:   PetscInt tp;             /* number of rows of V after truncating */
 18: } DS_GSVD;

 20: PetscErrorCode DSAllocate_GSVD(DS ds,PetscInt ld)
 21: {
 22:   DSAllocateMat_Private(ds,DS_MAT_A);
 23:   DSAllocateMat_Private(ds,DS_MAT_B);
 24:   DSAllocateMat_Private(ds,DS_MAT_X);
 25:   DSAllocateMat_Private(ds,DS_MAT_U);
 26:   DSAllocateMat_Private(ds,DS_MAT_V);
 27:   DSAllocateMat_Private(ds,DS_MAT_T);
 28:   DSAllocateMat_Private(ds,DS_MAT_D);
 29:   PetscFree(ds->perm);
 30:   PetscMalloc1(ld,&ds->perm);
 31:   return 0;
 32: }

 34: /*
 35:   In compact form, A is either in form (a) or (b):

 37:                      (a)                                            (b)
 38:     lower bidiagonal with upper arrow (n=m+1)         square upper bidiagonal with upper arrow (n=m)
 39:      0       l           k                 m-1
 40:     -----------------------------------------         0     l           k                   m-1
 41:     |*                   .                  |        -----------------------------------------
 42:     |  *                 .                  |        |*                 .                    |
 43:     |    *               .                  |        |  *               .                    |
 44:     |      *             .                  |        |    *             .                    |
 45:   l |. . . . o           o                  |      l |. . . o           o                    |
 46:     |          o         o                  |        |        o         o                    |
 47:     |            o       o                  |        |          o       o                    |
 48:     |              o     o                  |        |            o     o                    |
 49:     |                o   o                  |        |              o   o                    |
 50:     |                  o o                  |        |                o o                    |
 51:   k |. . . . . . . . . . o                  |      k |. . . . . . . . . o x                  |
 52:     |                    x x                |        |                    x x                |
 53:     |                      x x              |        |                      x x              |
 54:     |                        x x            |        |                        x x            |
 55:     |                          x x          |        |                          x x          |
 56:     |                            x x        |        |                            x x        |
 57:     |                              x x      |        |                              x x      |
 58:     |                                x x    |        |                                x x    |
 59:     |                                  x x  |        |                                  x x  |
 60:     |                                    x x|        |                                    x x|
 61: n-1 |                                      x|    n-1 |                                      x|
 62:     -----------------------------------------        -----------------------------------------

 64:   and B is square bidiagonal with upper arrow (p=m)

 66:      0       l           k                 m-1
 67:     -----------------------------------------
 68:     |*                   .                  |
 69:     |  *                 .                  |
 70:     |    *               .                  |
 71:     |      *             .                  |
 72:   l |. . . . o           o                  |
 73:     |          o         o                  |
 74:     |            o       o                  |
 75:     |              o     o                  |
 76:     |                o   o                  |
 77:     |                  o o                  |
 78:   k |. . . . . . . . . . o x                |
 79:     |                      x x              |
 80:     |                        x x            |
 81:     |                          x x          |
 82:     |                            x x        |
 83:     |                              x x      |
 84:     |                                x x    |
 85:     |                                  x x  |
 86:     |                                    x x|
 87: p-1 |                                      x|
 88:      ----------------------------------------
 89: */
 90: PetscErrorCode DSView_GSVD(DS ds,PetscViewer viewer)
 91: {
 92:   DS_GSVD           *ctx = (DS_GSVD*)ds->data;
 93:   PetscViewerFormat format;
 94:   PetscInt          i,j,r,k=ds->k,n=ds->n,m=ctx->m,p=ctx->p,rowsa,rowsb,colsa,colsb;
 95:   PetscReal         *T,*S,value;

 97:   PetscViewerGetFormat(viewer,&format);
 98:   if (format == PETSC_VIEWER_ASCII_INFO) return 0;
 99:   if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
100:     PetscViewerASCIIPrintf(viewer,"number of columns: %" PetscInt_FMT "\n",m);
101:     PetscViewerASCIIPrintf(viewer,"number of rows of B: %" PetscInt_FMT "\n",p);
102:     return 0;
103:   }
105:   if (ds->compact) {
106:     DSGetArrayReal(ds,DS_MAT_T,&T);
107:     DSGetArrayReal(ds,DS_MAT_D,&S);
108:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
109:     rowsa = n;
110:     colsa = ds->extrarow? m+1: m;
111:     rowsb = p;
112:     colsb = ds->extrarow? m+1: m;
113:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
114:       PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rowsa,colsa);
115:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n);
116:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
117:       for (i=0;i<PetscMin(rowsa,colsa);i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)T[i]);
118:       for (i=0;i<k;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,k+1,(double)T[i+ds->ld]);
119:       if (n>m) { /* A lower bidiagonal */
120:         for (i=k;i<rowsa-1;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+2,i+1,(double)T[i+ds->ld]);
121:       } else { /* A (square) upper bidiagonal */
122:         for (i=k;i<colsa-1;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+2,(double)T[i+ds->ld]);
123:       }
124:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
125:       PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rowsb,colsb);
126:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n);
127:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
128:       for (i=0;i<rowsb;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)S[i]);
129:       for (i=0;i<colsb-1;i++) {
130:         r = PetscMax(i+2,ds->k+1);
131:         PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,r,(double)T[i+2*ds->ld]);
132:       }
133:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_D]);
134:     } else {
135:       PetscViewerASCIIPrintf(viewer,"Matrix %s =\n",DSMatName[DS_MAT_T]);
136:       for (i=0;i<rowsa;i++) {
137:         for (j=0;j<colsa;j++) {
138:           if (i==j) value = T[i];
139:           else if (i<ds->k && j==ds->k) value = T[i+ds->ld];
140:           else if (n>m && i==j+1 && i>ds->k) value = T[j+ds->ld];
141:           else if (n<=m && i+1==j && i>=ds->k) value = T[i+ds->ld];
142:           else value = 0.0;
143:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
144:         }
145:         PetscViewerASCIIPrintf(viewer,"\n");
146:       }
147:       PetscViewerASCIIPrintf(viewer,"Matrix %s =\n",DSMatName[DS_MAT_D]);
148:       for (i=0;i<rowsb;i++) {
149:         for (j=0;j<colsb;j++) {
150:           if (i==j) value = S[i];
151:           else if (i<ds->k && j==ds->k) value = T[PetscMin(i,j)+2*ds->ld];
152:           else if (i+1==j && i>=ds->k) value = T[i+2*ds->ld];
153:           else value = 0.0;
154:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
155:         }
156:         PetscViewerASCIIPrintf(viewer,"\n");
157:       }
158:     }
159:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
160:     PetscViewerFlush(viewer);
161:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
162:     DSRestoreArrayReal(ds,DS_MAT_D,&S);
163:   } else {
164:     DSViewMat(ds,viewer,DS_MAT_A);
165:     DSViewMat(ds,viewer,DS_MAT_B);
166:   }
167:   if (ds->state>DS_STATE_INTERMEDIATE) {
168:     DSViewMat(ds,viewer,DS_MAT_X);
169:     DSViewMat(ds,viewer,DS_MAT_U);
170:     DSViewMat(ds,viewer,DS_MAT_V);
171:   }
172:   return 0;
173: }

175: PetscErrorCode DSVectors_GSVD(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
176: {
177:   switch (mat) {
178:     case DS_MAT_U:
179:     case DS_MAT_V:
180:       if (rnorm) *rnorm = 0.0;
181:       break;
182:     case DS_MAT_X:
183:       break;
184:     default:
185:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
186:   }
187:   return 0;
188: }

190: PetscErrorCode DSSort_GSVD(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
191: {
192:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
193:   PetscInt       t,l,ld=ds->ld,i,*perm,*perm2;
194:   PetscReal      *T=NULL,*D=NULL,*eig;
195:   PetscScalar    *A=NULL,*B=NULL;
196:   PetscBool      compact=ds->compact;

198:   if (!ds->sc) return 0;
200:   l = ds->l;
201:   t = ds->t;
202:   perm = ds->perm;
203:   PetscMalloc2(t,&eig,t,&perm2);
204:   if (compact) {
205:     DSGetArrayReal(ds,DS_MAT_T,&T);
206:     DSGetArrayReal(ds,DS_MAT_D,&D);
207:     for (i=0;i<t;i++) eig[i] = (D[i]==0)?PETSC_INFINITY:T[i]/D[i];
208:   } else {
209:     MatDenseGetArray(ds->omat[DS_MAT_A],&A);
210:     MatDenseGetArray(ds->omat[DS_MAT_B],&B);
211:     for (i=0;i<t;i++) eig[i] = (B[i+i*ld]==0)?PETSC_INFINITY:PetscRealPart(A[i+i*ld])/PetscRealPart(B[i*(1+ld)]);
212:   }
213:   DSSortEigenvaluesReal_Private(ds,eig,perm);
214:   PetscArraycpy(perm2,perm,t);
215:   for (i=l;i<t;i++) wr[i] = eig[perm[i]];
216:   if (compact) {
217:     PetscArraycpy(eig,T,t);
218:     for (i=l;i<t;i++) T[i] = eig[perm[i]];
219:     PetscArraycpy(eig,D,t);
220:     for (i=l;i<t;i++) D[i] = eig[perm[i]];
221:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
222:     DSRestoreArrayReal(ds,DS_MAT_D,&D);
223:   } else {
224:     for (i=l;i<t;i++) eig[i] = PetscRealPart(A[i*(1+ld)]);
225:     for (i=l;i<t;i++) A[i*(1+ld)] = eig[perm[i]];
226:     for (i=l;i<t;i++) eig[i] = PetscRealPart(B[i*(1+ld)]);
227:     for (i=l;i<t;i++) B[i*(1+ld)] = eig[perm[i]];
228:     MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
229:     MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
230:   }
231:   DSPermuteColumns_Private(ds,l,t,ds->n,DS_MAT_U,perm2);
232:   PetscArraycpy(perm2,perm,t);
233:   DSPermuteColumns_Private(ds,l,t,ctx->m,DS_MAT_X,perm2);
234:   DSPermuteColumns_Private(ds,l,t,ctx->p,DS_MAT_V,perm);
235:   PetscFree2(eig,perm2);
236:   return 0;
237: }

239: PetscErrorCode DSUpdateExtraRow_GSVD(DS ds)
240: {
241:   DS_GSVD           *ctx = (DS_GSVD*)ds->data;
242:   PetscInt          i;
243:   PetscBLASInt      n=0,m=0,ld=0;
244:   const PetscScalar *U,*V;
245:   PetscReal         *T,*e,*f,alpha,beta,betah;

249:   PetscBLASIntCast(ds->n,&n);
250:   PetscBLASIntCast(ctx->m,&m);
251:   PetscBLASIntCast(ds->ld,&ld);
252:   DSGetArrayReal(ds,DS_MAT_T,&T);
253:   e = T+ld;
254:   f = T+2*ld;
255:   MatDenseGetArrayRead(ds->omat[DS_MAT_U],&U);
256:   MatDenseGetArrayRead(ds->omat[DS_MAT_V],&V);
257:   if (n<=m) {   /* upper variant, A is square upper bidiagonal */
258:     beta  = e[m-1];   /* in compact, we assume all entries are zero except the last one */
259:     betah = f[m-1];
260:     for (i=0;i<m;i++) {
261:       e[i] = PetscRealPart(beta*U[m-1+i*ld]);
262:       f[i] = PetscRealPart(betah*V[m-1+i*ld]);
263:     }
264:   } else {   /* lower variant, A is (m+1)xm lower bidiagonal */
265:     alpha = T[m];
266:     betah = f[m-1];
267:     for (i=0;i<m;i++) {
268:       e[i] = PetscRealPart(alpha*U[m+i*ld]);
269:       f[i] = PetscRealPart(betah*V[m-1+i*ld]);
270:     }
271:     T[m] = PetscRealPart(alpha*U[m+m*ld]);
272:   }
273:   ds->k = m;
274:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_U],&U);
275:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_V],&V);
276:   DSRestoreArrayReal(ds,DS_MAT_T,&T);
277:   return 0;
278: }

280: PetscErrorCode DSTruncate_GSVD(DS ds,PetscInt n,PetscBool trim)
281: {
282:   DS_GSVD     *ctx = (DS_GSVD*)ds->data;
283:   PetscScalar *U;
284:   PetscReal   *T;
285:   PetscInt    i,m=ctx->m,ld=ds->ld;
286:   PetscBool   lower=(ds->n>ctx->m)?PETSC_TRUE:PETSC_FALSE;

289:   if (trim) {
290:     ds->l   = 0;
291:     ds->k   = 0;
292:     ds->n   = lower? n+1: n;
293:     ctx->m  = n;
294:     ctx->p  = n;
295:     ds->t   = ds->n;   /* truncated length equal to the new dimension */
296:     ctx->tm = ctx->m;  /* must also keep the previous dimension of X */
297:     ctx->tp = ctx->p;  /* must also keep the previous dimension of V */
298:   } else {
299:     if (lower) {
300:       /* move value of diagonal element of arrow (alpha) */
301:       DSGetArrayReal(ds,DS_MAT_T,&T);
302:       T[n] = T[m];
303:       DSRestoreArrayReal(ds,DS_MAT_T,&T);
304:       /* copy last column of U so that it updates the next initial vector of U1 */
305:       MatDenseGetArray(ds->omat[DS_MAT_U],&U);
306:       for (i=0;i<=m;i++) U[i+n*ld] = U[i+m*ld];
307:       MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
308:     }
309:     ds->k   = (ds->extrarow)? n: 0;
310:     ds->t   = ds->n;   /* truncated length equal to previous dimension */
311:     ctx->tm = ctx->m;  /* must also keep the previous dimension of X */
312:     ctx->tp = ctx->p;  /* must also keep the previous dimension of V */
313:     ds->n   = lower? n+1: n;
314:     ctx->m  = n;
315:     ctx->p  = n;
316:   }
317:   return 0;
318: }

320: static PetscErrorCode DSSwitchFormat_GSVD(DS ds)
321: {
322:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
323:   PetscReal      *T,*D;
324:   PetscScalar    *A,*B;
325:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld,m=ctx->m;

328:   /* switch from compact (arrow) to dense storage */
329:   /* bidiagonal associated to B is stored in D and T+2*ld */
330:   MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A);
331:   MatDenseGetArrayWrite(ds->omat[DS_MAT_B],&B);
332:   DSGetArrayReal(ds,DS_MAT_T,&T);
333:   DSGetArrayReal(ds,DS_MAT_D,&D);
334:   PetscArrayzero(A,ld*ld);
335:   PetscArrayzero(B,ld*ld);
336:   for (i=0;i<k;i++) {
337:     A[i+i*ld] = T[i];
338:     A[i+k*ld] = T[i+ld];
339:     B[i+i*ld] = D[i];
340:     B[i+k*ld] = T[i+2*ld];
341:   }
342:   /* B is upper bidiagonal */
343:   B[k+k*ld] = D[k];
344:   for (i=k+1;i<m;i++) {
345:     B[i+i*ld]   = D[i];
346:     B[i-1+i*ld] = T[i-1+2*ld];
347:   }
348:   /* A can be upper (square) or lower bidiagonal */
349:   for (i=k;i<m;i++) A[i+i*ld] = T[i];
350:   if (n>m) for (i=k;i<m;i++) A[i+1+i*ld] = T[i+ld];
351:   else for (i=k+1;i<m;i++) A[i-1+i*ld] = T[i-1+ld];
352:   MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A);
353:   MatDenseRestoreArrayWrite(ds->omat[DS_MAT_B],&B);
354:   DSRestoreArrayReal(ds,DS_MAT_T,&T);
355:   DSRestoreArrayReal(ds,DS_MAT_D,&D);
356:   return 0;
357: }

359: /*
360:   Compact format is used when [A;B] has orthonormal columns.
361:   In this case R=I and the GSVD of (A,B) is the CS decomposition
362: */
363: PetscErrorCode DSSolve_GSVD(DS ds,PetscScalar *wr,PetscScalar *wi)
364: {
365:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
366:   PetscInt       i,j;
367:   PetscBLASInt   n1,m1,info,lc = 0,n = 0,m = 0,p = 0,p1,l,k,q,ld,off,lwork,r;
368:   PetscScalar    *A,*B,*X,*U,*V,sone=1.0,smone=-1.0;
369:   PetscReal      *alpha,*beta,*T,*D;
370: #if !defined(SLEPC_MISSING_LAPACK_GGSVD3)
371:   PetscScalar    a,dummy;
372:   PetscReal      rdummy;
373:   PetscBLASInt   idummy;
374: #endif

377:   PetscBLASIntCast(ds->n,&m);
378:   PetscBLASIntCast(ctx->m,&n);
379:   PetscBLASIntCast(ctx->p,&p);
380:   PetscBLASIntCast(ds->l,&lc);
382:   /* In compact storage B is always nxn and A can be either nxn or (n+1)xn */
384:   PetscBLASIntCast(ds->ld,&ld);
385:   n1 = n-lc;     /* n1 = size of leading block, excl. locked + size of trailing block */
386:   m1 = m-lc;
387:   p1 = p-lc;
388:   off = lc+lc*ld;
389:   MatDenseGetArray(ds->omat[DS_MAT_A],&A);
390:   MatDenseGetArray(ds->omat[DS_MAT_B],&B);
391:   MatDenseGetArray(ds->omat[DS_MAT_X],&X);
392:   MatDenseGetArray(ds->omat[DS_MAT_U],&U);
393:   MatDenseGetArray(ds->omat[DS_MAT_V],&V);
394:   PetscArrayzero(X,ld*ld);
395:   for (i=0;i<lc;i++) X[i+i*ld] = 1.0;
396:   PetscArrayzero(U,ld*ld);
397:   for (i=0;i<lc;i++) U[i+i*ld] = 1.0;
398:   PetscArrayzero(V,ld*ld);
399:   for (i=0;i<lc;i++) V[i+i*ld] = 1.0;
400:   if (ds->compact) DSSwitchFormat_GSVD(ds);

402: #if !defined(SLEPC_MISSING_LAPACK_GGSVD3)
403:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
404:   /* workspace query and memory allocation */
405:   lwork = -1;
406: #if !defined (PETSC_USE_COMPLEX)
407:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,&dummy,&ld,&dummy,&ld,&rdummy,&rdummy,&dummy,&ld,&dummy,&ld,&dummy,&ld,&a,&lwork,&idummy,&info));
408:   PetscBLASIntCast((PetscInt)a,&lwork);
409: #else
410:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,&dummy,&ld,&dummy,&ld,&rdummy,&rdummy,&dummy,&ld,&dummy,&ld,&dummy,&ld,&a,&lwork,&rdummy,&idummy,&info));
411:   PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
412: #endif

414: #if !defined (PETSC_USE_COMPLEX)
415:   DSAllocateWork_Private(ds,lwork,2*ds->ld,ds->ld);
416:   alpha = ds->rwork;
417:   beta  = ds->rwork+ds->ld;
418:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,&lwork,ds->iwork,&info));
419: #else
420:   DSAllocateWork_Private(ds,lwork,4*ds->ld,ds->ld);
421:   alpha = ds->rwork+2*ds->ld;
422:   beta  = ds->rwork+3*ds->ld;
423:   PetscCallBLAS("LAPACKggsvd3",LAPACKggsvd3_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,&lwork,ds->rwork,ds->iwork,&info));
424: #endif
425:   PetscFPTrapPop();
426:   SlepcCheckLapackInfo("ggsvd3",info);

428: #else  /* defined(SLEPC_MISSING_LAPACK_GGSVD3) */

430:   lwork = PetscMax(PetscMax(3*n,m),p)+n;
431:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
432: #if !defined (PETSC_USE_COMPLEX)
433:   DSAllocateWork_Private(ds,lwork,2*ds->ld,ds->ld);
434:   alpha = ds->rwork;
435:   beta  = ds->rwork+ds->ld;
436:   PetscCallBLAS("LAPACKggsvd",LAPACKggsvd_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,ds->iwork,&info));
437: #else
438:   DSAllocateWork_Private(ds,lwork,4*ds->ld,ds->ld);
439:   alpha = ds->rwork+2*ds->ld;
440:   beta  = ds->rwork+3*ds->ld;
441:   PetscCallBLAS("LAPACKggsvd",LAPACKggsvd_("U","V","Q",&m1,&n1,&p1,&k,&l,A+off,&ld,B+off,&ld,alpha,beta,U+off,&ld,V+off,&ld,X+off,&ld,ds->work,ds->rwork,ds->iwork,&info));
442: #endif
443:   PetscFPTrapPop();
444:   SlepcCheckLapackInfo("ggsvd",info);

446: #endif

449:   if (ds->compact) {
450:     DSGetArrayReal(ds,DS_MAT_T,&T);
451:     DSGetArrayReal(ds,DS_MAT_D,&D);
452:     /* R is the identity matrix (except the sign) */
453:     for (i=lc;i<n;i++) {
454:       if (PetscRealPart(A[i+i*ld])<0.0) { /* scale column i */
455:         for (j=lc;j<n;j++) X[j+i*ld] = -X[j+i*ld];
456:       }
457:     }
458:     PetscArrayzero(T+ld,m-1);
459:     PetscArrayzero(T+2*ld,n-1);
460:     for (i=lc;i<n;i++) {
461:       T[i] = alpha[i-lc];
462:       D[i] = beta[i-lc];
463:       if (D[i]==0.0) wr[i] = PETSC_INFINITY;
464:       else wr[i] = T[i]/D[i];
465:     }
466:     ds->t = n;
467:     DSRestoreArrayReal(ds,DS_MAT_D,&D);
468:     DSRestoreArrayReal(ds,DS_MAT_T,&T);
469:   } else {
470:     /* X = X*inv(R) */
471:     q = PetscMin(m,n);
472:     PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n,&q,&sone,A,&ld,X,&ld));
473:     if (m<n) {
474:       r = n-m;
475:       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&r,&m,&sone,X,&ld,A,&ld,&smone,X+m*ld,&ld));
476:       PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n,&r,&sone,B+m*ld,&ld,X+m*ld,&ld));
477:     }
478:     if (k>0) {
479:       for (i=k;i<PetscMin(m,k+l);i++) {
480:         PetscArraycpy(X+(i-k)*ld,X+i*ld,ld);
481:         PetscArraycpy(U+(i-k)*ld,U+i*ld,ld);
482:       }
483:     }
484:     /* singular values */
485:     PetscArrayzero(A,ld*ld);
486:     PetscArrayzero(B,ld*ld);
487:     for (j=k;j<PetscMin(m,k+l);j++) {
488:       A[(j-k)*(1+ld)] = alpha[j];
489:       B[(j-k)*(1+ld)] = beta[j];
490:       wr[j-k] = alpha[j]/beta[j];
491:     }
492:     ds->t = PetscMin(m,k+l)-k; /* set number of computed values */
493:   }
494:   MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
495:   MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
496:   MatDenseRestoreArray(ds->omat[DS_MAT_X],&X);
497:   MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
498:   MatDenseRestoreArray(ds->omat[DS_MAT_V],&V);
499:   return 0;
500: }

502: PetscErrorCode DSCond_GSVD(DS ds,PetscReal *cond)
503: {
504:   DS_GSVD           *ctx = (DS_GSVD*)ds->data;
505:   PetscBLASInt      lwork,lrwork=0,info,m,n,p,ld;
506:   PetscScalar       *A,*work;
507:   const PetscScalar *M;
508:   PetscReal         *sigma,conda,condb;
509: #if defined(PETSC_USE_COMPLEX)
510:   PetscReal         *rwork;
511: #endif

513:   PetscBLASIntCast(ds->n,&m);
514:   PetscBLASIntCast(ctx->m,&n);
515:   PetscBLASIntCast(ctx->p,&p);
516:   PetscBLASIntCast(ds->ld,&ld);
517:   lwork = 5*n;
518: #if defined(PETSC_USE_COMPLEX)
519:   lrwork = 5*n;
520: #endif
521:   DSAllocateWork_Private(ds,ld*n+lwork,n+lrwork,0);
522:   A     = ds->work;
523:   work  = ds->work+ld*n;
524:   sigma = ds->rwork;
525: #if defined(PETSC_USE_COMPLEX)
526:   rwork = ds->rwork+n;
527: #endif
528:   if (ds->compact) DSSwitchFormat_GSVD(ds);

530:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
531:   MatDenseGetArrayRead(ds->omat[DS_MAT_A],&M);
532:   PetscArraycpy(A,M,ld*n);
533:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&M);
534: #if defined(PETSC_USE_COMPLEX)
535:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,rwork,&info));
536: #else
537:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&m,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,&info));
538: #endif
539:   SlepcCheckLapackInfo("gesvd",info);
540:   conda = sigma[0]/sigma[PetscMin(m,n)-1];

542:   MatDenseGetArrayRead(ds->omat[DS_MAT_B],&M);
543:   PetscArraycpy(A,M,ld*n);
544:   MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&M);
545: #if defined(PETSC_USE_COMPLEX)
546:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&p,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,rwork,&info));
547: #else
548:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","N",&p,&n,A,&ld,sigma,NULL,&ld,NULL,&ld,work,&lwork,&info));
549: #endif
550:   SlepcCheckLapackInfo("gesvd",info);
551:   condb = sigma[0]/sigma[PetscMin(p,n)-1];
552:   PetscFPTrapPop();

554:   *cond = PetscMax(conda,condb);
555:   return 0;
556: }

558: PetscErrorCode DSSynchronize_GSVD(DS ds,PetscScalar eigr[],PetscScalar eigi[])
559: {
560:   DS_GSVD        *ctx = (DS_GSVD*)ds->data;
561:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
562:   PetscMPIInt    m=ctx->m,rank,off=0,size,n,ldn,ld3;
563:   PetscScalar    *A,*U,*V,*X;
564:   PetscReal      *T;

566:   if (ds->compact) kr = 3*ld;
567:   else k = 2*(m-l)*ld;
568:   if (ds->state>DS_STATE_RAW) k += 3*(m-l)*ld;
569:   if (eigr) k += m-l;
570:   DSAllocateWork_Private(ds,k+kr,0,0);
571:   PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
572:   PetscMPIIntCast(m-l,&n);
573:   PetscMPIIntCast(ld*(m-l),&ldn);
574:   PetscMPIIntCast(3*ld,&ld3);
575:   if (ds->compact) DSGetArrayReal(ds,DS_MAT_T,&T);
576:   else MatDenseGetArray(ds->omat[DS_MAT_A],&A);
577:   if (ds->state>DS_STATE_RAW) {
578:     MatDenseGetArray(ds->omat[DS_MAT_U],&U);
579:     MatDenseGetArray(ds->omat[DS_MAT_V],&V);
580:     MatDenseGetArray(ds->omat[DS_MAT_X],&X);
581:   }
582:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
583:   if (!rank) {
584:     if (ds->compact) MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
585:     else MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
586:     if (ds->state>DS_STATE_RAW) {
587:       MPI_Pack(U+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
588:       MPI_Pack(V+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
589:       MPI_Pack(X+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
590:     }
591:     if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
592:   }
593:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
594:   if (rank) {
595:     if (ds->compact) MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
596:     else MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
597:     if (ds->state>DS_STATE_RAW) {
598:       MPI_Unpack(ds->work,size,&off,U+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
599:       MPI_Unpack(ds->work,size,&off,V+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
600:       MPI_Unpack(ds->work,size,&off,X+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
601:     }
602:     if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
603:   }
604:   if (ds->compact) DSRestoreArrayReal(ds,DS_MAT_T,&T);
605:   else MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
606:   if (ds->state>DS_STATE_RAW) {
607:     MatDenseRestoreArray(ds->omat[DS_MAT_U],&U);
608:     MatDenseRestoreArray(ds->omat[DS_MAT_V],&V);
609:     MatDenseRestoreArray(ds->omat[DS_MAT_X],&X);
610:   }
611:   return 0;
612: }

614: PetscErrorCode DSMatGetSize_GSVD(DS ds,DSMatType t,PetscInt *rows,PetscInt *cols)
615: {
616:   DS_GSVD *ctx = (DS_GSVD*)ds->data;

619:   switch (t) {
620:     case DS_MAT_A:
621:       *rows = ds->n;
622:       *cols = ds->extrarow? ctx->m+1: ctx->m;
623:       break;
624:     case DS_MAT_B:
625:       *rows = ctx->p;
626:       *cols = ds->extrarow? ctx->m+1: ctx->m;
627:       break;
628:     case DS_MAT_T:
629:       *rows = ds->n;
630:       *cols = PetscDefined(USE_COMPLEX)? 2: 3;
631:       break;
632:     case DS_MAT_D:
633:       *rows = ctx->p;
634:       *cols = 1;
635:       break;
636:     case DS_MAT_U:
637:       *rows = ds->state==DS_STATE_TRUNCATED? ds->t: ds->n;
638:       *cols = ds->n;
639:       break;
640:     case DS_MAT_V:
641:       *rows = ds->state==DS_STATE_TRUNCATED? ctx->tp: ctx->p;
642:       *cols = ctx->p;
643:       break;
644:     case DS_MAT_X:
645:       *rows = ds->state==DS_STATE_TRUNCATED? ctx->tm: ctx->m;
646:       *cols = ctx->m;
647:       break;
648:     default:
649:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid t parameter");
650:   }
651:   return 0;
652: }

654: static PetscErrorCode DSGSVDSetDimensions_GSVD(DS ds,PetscInt m,PetscInt p)
655: {
656:   DS_GSVD *ctx = (DS_GSVD*)ds->data;

658:   DSCheckAlloc(ds,1);
659:   if (m==PETSC_DECIDE || m==PETSC_DEFAULT) {
660:     ctx->m = ds->ld;
661:   } else {
663:     ctx->m = m;
664:   }
665:   if (p==PETSC_DECIDE || p==PETSC_DEFAULT) {
666:     ctx->p = ds->n;
667:   } else {
669:     ctx->p = p;
670:   }
671:   return 0;
672: }

674: /*@
675:    DSGSVDSetDimensions - Sets the number of columns and rows for a DSGSVD.

677:    Logically Collective on ds

679:    Input Parameters:
680: +  ds - the direct solver context
681: .  m  - the number of columns
682: -  p  - the number of rows for the second matrix (B)

684:    Notes:
685:    This call is complementary to DSSetDimensions(), to provide two dimensions
686:    that are specific to this DS type. The number of rows for the first matrix (A)
687:    is set by DSSetDimensions().

689:    Level: intermediate

691: .seealso: DSGSVDGetDimensions(), DSSetDimensions()
692: @*/
693: PetscErrorCode DSGSVDSetDimensions(DS ds,PetscInt m,PetscInt p)
694: {
698:   PetscTryMethod(ds,"DSGSVDSetDimensions_C",(DS,PetscInt,PetscInt),(ds,m,p));
699:   return 0;
700: }

702: static PetscErrorCode DSGSVDGetDimensions_GSVD(DS ds,PetscInt *m,PetscInt *p)
703: {
704:   DS_GSVD *ctx = (DS_GSVD*)ds->data;

706:   if (m) *m = ctx->m;
707:   if (p) *p = ctx->p;
708:   return 0;
709: }

711: /*@
712:    DSGSVDGetDimensions - Returns the number of columns and rows for a DSGSVD.

714:    Not collective

716:    Input Parameter:
717: .  ds - the direct solver context

719:    Output Parameters:
720: +  m - the number of columns
721: -  p - the number of rows for the second matrix (B)

723:    Level: intermediate

725: .seealso: DSGSVDSetDimensions()
726: @*/
727: PetscErrorCode DSGSVDGetDimensions(DS ds,PetscInt *m,PetscInt *p)
728: {
730:   PetscUseMethod(ds,"DSGSVDGetDimensions_C",(DS,PetscInt*,PetscInt*),(ds,m,p));
731:   return 0;
732: }

734: PetscErrorCode DSDestroy_GSVD(DS ds)
735: {
736:   PetscFree(ds->data);
737:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDSetDimensions_C",NULL);
738:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDGetDimensions_C",NULL);
739:   return 0;
740: }

742: /*MC
743:    DSGSVD - Dense Generalized Singular Value Decomposition.

745:    Level: beginner

747:    Notes:
748:    The problem is expressed as A*X = U*C, B*X = V*S, where A and B are
749:    matrices with the same number of columns, m, U and V are orthogonal
750:    (unitary), and X is an mxm invertible matrix. The DS object does not
751:    expose matrices C and S, instead the singular values sigma, which are
752:    the ratios c_i/s_i, are returned in the arguments of DSSolve().
753:    Note that the number of columns of the returned X, U, V may be smaller
754:    in the case that some c_i or s_i are zero.

756:    The number of rows of A (and U) is the value n passed with DSSetDimensions().
757:    The number of columns m and the number of rows of B (and V) must be
758:    set via DSGSVDSetDimensions().

760:    Internally, LAPACK's representation is used, U'*A*Q = C*[0 R], V'*B*Q = S*[0 R],
761:    where X = Q*inv(R) is computed at the end of DSSolve().

763:    If the compact storage format is selected, then a simplified problem is
764:    solved, where A and B are bidiagonal (possibly with an arrow), and [A;B]
765:    is assumed to have orthonormal columns. We consider two cases: (1) A and B
766:    are square mxm upper bidiagonal, and (2) A is lower bidiagonal with m+1
767:    rows and B is square upper bidiagonal. In these cases, R=I so it
768:    corresponds to the CS decomposition. The first matrix is stored in two
769:    diagonals of DS_MAT_T, while the second matrix is stored in DS_MAT_D
770:    and the remaining diagonal of DS_MAT_T.

772:    Allowed arguments of DSVectors() are DS_MAT_U, DS_MAT_V and DS_MAT_X.

774:    Used DS matrices:
775: +  DS_MAT_A - first problem matrix
776: .  DS_MAT_B - second problem matrix
777: .  DS_MAT_T - first upper bidiagonal matrix (if compact storage is selected)
778: -  DS_MAT_D - second upper bidiagonal matrix (if compact storage is selected)

780:    Implemented methods:
781: .  0 - Lapack (_ggsvd3 if available, or _ggsvd)

783: .seealso: DSCreate(), DSSetType(), DSType, DSGSVDSetDimensions()
784: M*/
785: SLEPC_EXTERN PetscErrorCode DSCreate_GSVD(DS ds)
786: {
787:   DS_GSVD        *ctx;

789:   PetscNew(&ctx);
790:   ds->data = (void*)ctx;

792:   ds->ops->allocate      = DSAllocate_GSVD;
793:   ds->ops->view          = DSView_GSVD;
794:   ds->ops->vectors       = DSVectors_GSVD;
795:   ds->ops->sort          = DSSort_GSVD;
796:   ds->ops->solve[0]      = DSSolve_GSVD;
797:   ds->ops->synchronize   = DSSynchronize_GSVD;
798:   ds->ops->truncate      = DSTruncate_GSVD;
799:   ds->ops->update        = DSUpdateExtraRow_GSVD;
800:   ds->ops->cond          = DSCond_GSVD;
801:   ds->ops->matgetsize    = DSMatGetSize_GSVD;
802:   ds->ops->destroy       = DSDestroy_GSVD;
803:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDSetDimensions_C",DSGSVDSetDimensions_GSVD);
804:   PetscObjectComposeFunction((PetscObject)ds,"DSGSVDGetDimensions_C",DSGSVDGetDimensions_GSVD);
805:   return 0;
806: }