Actual source code: invit.c
slepc-3.18.0 2022-10-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: struct HRtr
15: {
16: PetscScalar *data;
17: PetscInt m;
18: PetscInt idx[2];
19: PetscInt n[2];
20: PetscScalar tau[2];
21: PetscReal alpha;
22: PetscReal cs;
23: PetscReal sn;
24: PetscInt type;
25: };
27: /*
28: Generates a hyperbolic rotation
29: if x1*x1 - x2*x2 != 0
30: r = sqrt(|x1*x1 - x2*x2|)
31: c = x1/r s = x2/r
33: | c -s||x1| |d*r|
34: |-s c||x2| = | 0 |
35: where d = 1 for type==1 and -1 for type==2
36: Returns the condition number of the reduction
37: */
38: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
39: {
40: PetscReal t,n2,xa,xb;
41: PetscInt type_;
43: if (x2==0.0) {
44: *r = PetscAbsReal(x1); *c = (x1>=0.0)?1.0:-1.0; *s = 0.0;
45: if (type) *type = 1;
46: return 0;
47: }
48: if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
49: /* hyperbolic rotation doesn't exist */
50: *c = *s = *r = 0.0;
51: if (type) *type = 0;
52: *cond = PETSC_MAX_REAL;
53: return 0;
54: }
56: if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
57: xa = x1; xb = x2; type_ = 1;
58: } else {
59: xa = x2; xb = x1; type_ = 2;
60: }
61: t = xb/xa;
62: n2 = PetscAbsReal(1 - t*t);
63: *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
64: *c = x1/(*r);
65: *s = x2/(*r);
66: if (type_ == 2) *r *= -1;
67: if (type) *type = type_;
68: if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
69: return 0;
70: }
72: /*
73: |c s|
74: Applies an hyperbolic rotator |s c|
75: |c s|
76: [x1 x2]|s c|
77: */
78: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
79: {
80: PetscInt i;
81: PetscReal t;
82: PetscScalar tmp;
84: if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
85: t = s/c;
86: for (i=0;i<n;i++) {
87: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
88: x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
89: }
90: } else { /* Type II */
91: t = c/s;
92: for (i=0;i<n;i++) {
93: tmp = x1[i*inc1];
94: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
95: x2[i*inc2] = t*x1[i*inc1] + tmp/s;
96: }
97: }
98: return 0;
99: }
101: /*
102: Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
104: Input:
105: A symmetric (only lower triangular part is referred)
106: s vector +1 and -1 (signature matrix)
107: Output:
108: d,e
109: s
110: Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
111: */
112: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
113: {
114: PetscInt i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
115: PetscInt nwu=0;
116: PetscReal *ss,cond=1.0,cs,sn,r;
117: PetscScalar tau,t,*AA;
118: PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
119: PetscBool breakdown = PETSC_TRUE;
121: if (n<3) {
122: if (n==1) Q[0]=1;
123: if (n==2) {
124: Q[0] = Q[1+ldq] = 1;
125: Q[1] = Q[ldq] = 0;
126: }
127: return 0;
128: }
129: PetscBLASIntCast(lda,&lda_);
130: PetscBLASIntCast(n,&n_);
131: PetscBLASIntCast(ldq,&ldq_);
132: ss = rwork;
133: perm = iwork;
134: AA = work;
135: for (i=0;i<n;i++) PetscArraycpy(AA+i*n,A+i*lda,n);
136: nwu += n*n;
137: k=0;
138: while (breakdown && k<n) {
139: breakdown = PETSC_FALSE;
140: /* Classify (and flip) A and s according to sign */
141: if (flip) {
142: for (i=0;i<n;i++) {
143: perm[i] = n-1-perm_[i];
144: if (perm[i]==0) i0 = i;
145: if (perm[i]==k) ik = i;
146: }
147: } else {
148: for (i=0;i<n;i++) {
149: perm[i] = perm_[i];
150: if (perm[i]==0) i0 = i;
151: if (perm[i]==k) ik = i;
152: }
153: }
154: perm[ik] = 0;
155: perm[i0] = k;
156: i=1;
157: while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
158: if (s[perm[i]]!=s[perm[0]]) {
159: j=i+1;
160: while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
161: tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
162: }
163: i++;
164: }
165: for (i=0;i<n;i++) {
166: ss[i] = s[perm[i]];
167: }
168: if (flip) {
169: ii = &j;
170: jj = &i;
171: } else {
172: ii = &i;
173: jj = &j;
174: }
175: for (i=0;i<n;i++)
176: for (j=0;j<n;j++)
177: A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
178: /* Initialize Q */
179: for (i=0;i<n;i++) {
180: PetscArrayzero(Q+i*ldq,n);
181: Q[perm[i]+i*ldq] = 1.0;
182: }
183: for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
184: n0 = ni-1;
185: n1 = n_-ni;
186: for (j=0;j<n-2;j++) {
187: PetscBLASIntCast(n-j-1,&m);
188: /* Forming and applying reflectors */
189: if (n0 > 1) {
190: PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
191: /* Apply reflector */
192: if (PetscAbsScalar(tau) != 0.0) {
193: t=*(A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
194: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
195: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
196: /* Update Q */
197: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
198: *(A+ni-n0+j*lda) = t;
199: for (i=1;i<n0;i++) {
200: *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
201: }
202: *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
203: }
204: }
205: if (n1 > 1) {
206: PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
207: /* Apply reflector */
208: if (PetscAbsScalar(tau) != 0.0) {
209: t=*(A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
210: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
211: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
212: /* Update Q */
213: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
214: *(A+n-n1+j*lda) = t;
215: for (i=1;i<n1;i++) {
216: *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
217: }
218: *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
219: }
220: }
221: /* Hyperbolic rotation */
222: if (n0 > 0 && n1 > 0) {
223: HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
224: /* Check condition number */
225: if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
226: breakdown = PETSC_TRUE;
227: k++;
229: break;
230: }
231: A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
232: A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
233: /* Apply to A */
234: HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn);
235: HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn);
237: /* Update Q */
238: HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn);
239: if (type==2) {
240: ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
241: n0++;ni++;n1--;
242: }
243: }
244: if (n0>0) n0--;
245: else n1--;
246: }
247: }
249: /* flip matrices */
250: if (flip) {
251: for (i=0;i<n-1;i++) {
252: d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
253: e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
254: s[i] = ss[n-i-1];
255: }
256: s[n-1] = ss[0];
257: d[n-1] = PetscRealPart(A[0]);
258: for (i=0;i<n;i++) PetscArraycpy(work+i*n,Q+i*ldq,n);
259: for (i=0;i<n;i++)
260: for (j=0;j<n;j++)
261: Q[i+j*ldq] = work[i+(n-j-1)*n];
262: } else {
263: for (i=0;i<n-1;i++) {
264: d[i] = PetscRealPart(A[i+i*lda]);
265: e[i] = PetscRealPart(A[i+1+i*lda]);
266: s[i] = ss[i];
267: }
268: s[n-1] = ss[n-1];
269: d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
270: }
271: return 0;
272: }
274: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
275: {
276: PetscScalar *x,*y;
277: PetscReal ncond2=1.0;
278: PetscBLASInt n0_,n1_,inc=1;
280: /* Hyperbolic transformation to make zeros in x */
281: x = tr1->data;
282: tr1->n[0] = n0;
283: tr1->n[1] = n1;
284: tr1->idx[0] = idx0;
285: tr1->idx[1] = idx1;
286: PetscBLASIntCast(tr1->n[0],&n0_);
287: PetscBLASIntCast(tr1->n[1],&n1_);
288: if (tr1->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
289: if (tr1->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
290: if (tr1->idx[0]<tr1->idx[1]) HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&(tr1->type),&(tr1->cs),&(tr1->sn),&(tr1->alpha),ncond);
291: else {
292: tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
293: *ncond = 1.0;
294: }
295: if (sz==2) {
296: y = tr2->data;
297: /* Apply first transformation to second column */
298: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
299: x[tr1->idx[0]] = 1.0;
300: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
301: }
302: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
303: x[tr1->idx[1]] = 1.0;
304: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
305: }
306: if (tr1->idx[0]<tr1->idx[1]) HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn);
307: tr2->n[0] = tr1->n[0];
308: tr2->n[1] = tr1->n[1];
309: tr2->idx[0] = tr1->idx[0];
310: tr2->idx[1] = tr1->idx[1];
311: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
312: tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
313: }
314: if (tr2->n[0]>0) {
315: tr2->n[0]--; tr2->idx[0]++;
316: if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
317: } else {
318: tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
319: }
320: /* Hyperbolic transformation to make zeros in y */
321: PetscBLASIntCast(tr2->n[0],&n0_);
322: PetscBLASIntCast(tr2->n[1],&n1_);
323: if (tr2->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
324: if (tr2->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
325: if (tr2->idx[0]<tr2->idx[1]) HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&(tr2->type),&(tr2->cs),&(tr2->sn),&(tr2->alpha),&ncond2);
326: else {
327: tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
328: ncond2 = 1.0;
329: }
330: if (ncond2>*ncond) *ncond = ncond2;
331: }
332: return 0;
333: }
335: /*
336: Auxiliary function to try perform one iteration of hr routine,
337: checking condition number. If it is < tolD, apply the
338: transformation to H and R, if not, ok=false and it do nothing
339: tolE, tolerance to exchange complex pairs to improve conditioning
340: */
341: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
342: {
343: struct HRtr *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
344: PetscScalar *x,*y;
345: PetscReal ncond,ncond_e;
346: PetscInt nwu=0,i,d=1;
347: PetscBLASInt n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
348: PetscReal tolD = 1e+5;
350: if (cond) *cond = 1.0;
351: PetscBLASIntCast(n,&n_);
352: PetscBLASIntCast(ldr,&ldr_);
353: PetscBLASIntCast(ldh,&ldh_);
354: x = work+nwu;
355: nwu += n;
356: PetscArraycpy(x,R+j*ldr,n);
357: *exg = PETSC_FALSE;
358: *ok = PETSC_TRUE;
359: tr1_t.data = x;
360: if (sz==1) {
361: /* Hyperbolic transformation to make zeros in x */
362: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu);
363: /* Check condition number to single column*/
364: if (ncond>tolD) *ok = PETSC_FALSE;
365: tr1 = &tr1_t;
366: tr2 = &tr2_t;
367: } else {
368: y = work+nwu;
369: nwu += n;
370: PetscArraycpy(y,R+(j+1)*ldr,n);
371: tr2_t.data = y;
372: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu);
373: /* Computing hyperbolic transformations also for exchanged vectors */
374: tr1_te.data = work+nwu;
375: nwu += n;
376: PetscArraycpy(tr1_te.data,R+(j+1)*ldr,n);
377: tr2_te.data = work+nwu;
378: nwu += n;
379: PetscArraycpy(tr2_te.data,R+j*ldr,n);
380: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu);
381: if (ncond > d*ncond_e) {
382: *exg = PETSC_TRUE;
383: tr1 = &tr1_te;
384: tr2 = &tr2_te;
385: ncond = ncond_e;
386: } else {
387: tr1 = &tr1_t;
388: tr2 = &tr2_t;
389: }
390: if (ncond>tolD) *ok = PETSC_FALSE;
391: }
392: if (*ok) {
393: /* Everything is OK, apply transformations to R and H */
394: /* First column */
395: if (cond && *cond<ncond) *cond = ncond;
396: x = tr1->data;
397: PetscBLASIntCast(tr1->n[0],&n0_);
398: PetscBLASIntCast(tr1->n[1],&n1_);
399: PetscBLASIntCast(n-j-sz,&mr);
400: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
401: x[tr1->idx[0]] = 1.0;
402: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
403: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
404: }
405: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
406: x[tr1->idx[1]] = 1.0;
407: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
408: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
409: }
410: if (tr1->idx[0]<tr1->idx[1]) {
411: HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn);
412: if (tr1->type==1) HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn);
413: else {
414: HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn);
415: s[tr1->idx[0]] = -s[tr1->idx[0]];
416: s[tr1->idx[1]] = -s[tr1->idx[1]];
417: }
418: }
419: for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
420: for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
421: *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
422: if (sz==2) {
423: y = tr2->data;
424: /* Second column */
425: PetscBLASIntCast(tr2->n[0],&n0_);
426: PetscBLASIntCast(tr2->n[1],&n1_);
427: PetscBLASIntCast(n-j-sz,&mr);
428: PetscBLASIntCast(n-tr2->idx[0],&mh);
429: if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
430: y[tr2->idx[0]] = 1.0;
431: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
432: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
433: }
434: if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
435: y[tr2->idx[1]] = 1.0;
436: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
437: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
438: }
439: if (tr2->idx[0]<tr2->idx[1]) {
440: HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn);
441: if (tr2->type==1) HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn);
442: else {
443: HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn);
444: s[tr2->idx[0]] = -s[tr2->idx[0]];
445: s[tr2->idx[1]] = -s[tr2->idx[1]];
446: }
447: }
448: for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
449: *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
450: for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
451: *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
452: *n0 = tr2->n[0];
453: *n1 = tr2->n[1];
454: *idx0 = tr2->idx[0];
455: *idx1 = tr2->idx[1];
456: if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
457: (*idx1)++; (*n1)--; (*n0)++;
458: }
459: } else {
460: *n0 = tr1->n[0];
461: *n1 = tr1->n[1];
462: *idx0 = tr1->idx[0];
463: *idx1 = tr1->idx[1];
464: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
465: (*idx1)++; (*n1)--; (*n0)++;
466: }
467: }
468: if (*n0>0) {
469: (*n0)--; (*idx0)++;
470: if (*n1==0) *idx1 = *idx0;
471: } else {
472: (*n1)--; (*idx1)++; *idx0 = *idx1;
473: }
474: }
475: return 0;
476: }
478: /*
479: compute V = HR whit H s-orthogonal and R upper triangular
480: */
481: static PetscErrorCode PseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
482: {
483: PetscInt i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwu=0;
484: PetscScalar *col1,*col2;
485: PetscBool exg=PETSC_FALSE,ok=PETSC_FALSE;
487: n = *nv;
488: col1 = work+nwu;
489: nwu += n;
490: col2 = work+nwu;
491: nwu += n;
492: /* Sort R and s according to sing(s) */
493: np = 0;
494: for (i=0;i<n;i++) if (s[i]>0) np++;
495: if (s[0]>0) n1 = np;
496: else n1 = n-np;
497: n0 = 0;
498: for (i=0;i<n;i++) {
499: if (s[i]==s[0]) {
500: s[n0] = s[0];
501: perm[n0++] = i;
502: } else perm[n1++] = i;
503: }
504: for (i=n0;i<n;i++) s[i] = -s[0];
505: n1 -= n0;
506: idx0 = 0;
507: idx1 = n0;
508: if (idx1==n) idx1=idx0;
509: for (i=0;i<n;i++) {
510: for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
511: }
512: /* Initialize H */
513: for (i=0;i<n;i++) {
514: PetscArrayzero(V+i*ldv,n);
515: V[perm[i]+i*ldv] = 1.0;
516: }
517: for (i=0;i<n;i++) perm[i] = i;
518: j = 0;
519: while (j<n-k) {
520: if (cmplxEig[j]==0) sz=1;
521: else sz=2;
522: TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu);
523: if (ok) {
524: if (exg) cmplxEig[j] = -cmplxEig[j];
525: j = j+sz;
526: } else { /* to be discarded */
527: k = k+1;
528: if (cmplxEig[j]==0) {
529: if (j<n) {
530: t1 = perm[j];
531: for (i=j;i<n-1;i++) perm[i] = perm[i+1];
532: perm[n-1] = t1;
533: t1 = cmplxEig[j];
534: for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
535: cmplxEig[n-1] = t1;
536: PetscArraycpy(col1,R+j*ldr,n);
537: for (i=j;i<n-1;i++) PetscArraycpy(R+i*ldr,R+(i+1)*ldr,n);
538: PetscArraycpy(R+(n-1)*ldr,col1,n);
539: }
540: } else {
541: k = k+1;
542: if (j<n-1) {
543: t1 = perm[j]; t2 = perm[j+1];
544: for (i=j;i<n-2;i++) perm[i] = perm[i+2];
545: perm[n-2] = t1; perm[n-1] = t2;
546: t1 = cmplxEig[j]; t2 = cmplxEig[j+1];
547: for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
548: cmplxEig[n-2] = t1; cmplxEig[n-1] = t2;
549: PetscArraycpy(col1,R+j*ldr,n);
550: PetscArraycpy(col2,R+(j+1)*ldr,n);
551: for (i=j;i<n-2;i++) PetscArraycpy(R+i*ldr,R+(i+2)*ldr,n);
552: PetscArraycpy(R+(n-2)*ldr,col1,n);
553: PetscArraycpy(R+(n-1)*ldr,col2,n);
554: }
555: }
556: }
557: }
558: if (k!=0) {
559: if (breakdown) *breakdown = PETSC_TRUE;
560: *nv = n-k;
561: }
562: return 0;
563: }
565: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
566: {
567: PetscInt lws,nwus=0,nwui=0,lwi,off,n,nv,ld,i,ldr,l;
568: const PetscScalar *B,*W;
569: PetscScalar *Q,*X,*R,*ts,szero=0.0,sone=1.0;
570: PetscReal *s,vi,vr,tr,*d,*e;
571: PetscBLASInt ld_,n_,nv_,*perm,*cmplxEig;
573: l = ds->l;
574: n = ds->n-l;
575: PetscBLASIntCast(n,&n_);
576: ld = ds->ld;
577: PetscBLASIntCast(ld,&ld_);
578: off = l*ld+l;
579: DSGetArrayReal(ds,DS_MAT_D,&s);
580: if (!ds->compact) {
581: MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B);
582: for (i=l;i<ds->n;i++) s[i] = PetscRealPart(B[i*ld+i]);
583: MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B);
584: }
585: lws = n*n+7*n;
586: lwi = 2*n;
587: DSAllocateWork_Private(ds,lws,0,lwi);
588: R = ds->work+nwus;
589: nwus += n*n;
590: ldr = n;
591: perm = ds->iwork + nwui;
592: nwui += n;
593: cmplxEig = ds->iwork+nwui;
594: MatDenseGetArray(ds->omat[mat],&X);
595: for (i=0;i<n;i++) {
596: #if defined(PETSC_USE_COMPLEX)
597: vi = PetscImaginaryPart(wr[l+i]);
598: #else
599: vi = PetscRealPart(wi[l+i]);
600: #endif
601: if (vi!=0) {
602: cmplxEig[i] = 1;
603: cmplxEig[i+1] = 2;
604: i++;
605: } else cmplxEig[i] = 0;
606: }
607: nv = n;
609: /* Perform HR decomposition */
610: /* Hyperbolic rotators */
611: PseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus);
612: /* Sort wr,wi perm */
613: ts = ds->work+nwus;
614: PetscArraycpy(ts,wr+l,n);
615: for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
616: #if !defined(PETSC_USE_COMPLEX)
617: PetscArraycpy(ts,wi+l,n);
618: for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
619: #endif
620: /* Projected Matrix */
621: DSGetArrayReal(ds,DS_MAT_T,&d);
622: PetscArrayzero(d+2*ld,ld);
623: e = d+ld;
624: d[l+nv-1] = PetscRealPart(wr[l+nv-1]*s[l+nv-1]);
625: for (i=0;i<nv-1;i++) {
626: if (cmplxEig[i]==0) { /* Real */
627: d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
628: e[l+i] = 0.0;
629: } else {
630: vr = PetscRealPart(wr[l+i]);
631: #if defined(PETSC_USE_COMPLEX)
632: vi = PetscImaginaryPart(wr[l+i]);
633: #else
634: vi = PetscRealPart(wi[l+i]);
635: #endif
636: if (cmplxEig[i]==-1) vi = -vi;
637: tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
638: d[l+i] = (vr-tr)*s[l+i];
639: d[l+i+1] = (vr+tr)*s[l+i+1];
640: e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
641: if (i<nv-2) e[l+i+1] = 0.0;
642: i++;
643: }
644: }
645: DSRestoreArrayReal(ds,DS_MAT_T,&d);
646: DSRestoreArrayReal(ds,DS_MAT_D,&s);
647: /* accumulate previous Q */
648: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
649: if (accum) {
650: PetscBLASIntCast(nv,&nv_);
651: DSAllocateMat_Private(ds,DS_MAT_W);
652: MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN);
653: MatDenseGetArrayRead(ds->omat[DS_MAT_W],&W);
654: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,W+off,&ld_,X+off,&ld_,&szero,Q+off,&ld_));
655: MatDenseRestoreArrayRead(ds->omat[DS_MAT_W],&W);
656: } else {
657: PetscArrayzero(Q,ld*ld);
658: for (i=0;i<ds->l;i++) Q[i+i*ld] = 1.0;
659: for (i=0;i<n;i++) PetscArraycpy(Q+off+i*ld,X+off+i*ld,n);
660: }
661: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
662: ds->t = nv+l;
663: MatDenseRestoreArray(ds->omat[mat],&X);
664: if (!ds->compact) DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
665: return 0;
666: }
668: /*
669: Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
670: */
671: PetscErrorCode DSIntermediate_GHIEP(DS ds)
672: {
673: PetscInt i,ld,off;
674: PetscInt nwall,nwallr,nwalli;
675: PetscScalar *A,*B,*Q;
676: PetscReal *d,*e,*s;
678: ld = ds->ld;
679: MatDenseGetArray(ds->omat[DS_MAT_A],&A);
680: MatDenseGetArray(ds->omat[DS_MAT_B],&B);
681: MatDenseGetArray(ds->omat[DS_MAT_Q],&Q);
682: DSGetArrayReal(ds,DS_MAT_T,&d);
683: DSGetArrayReal(ds,DS_MAT_D,&s);
684: e = d+ld;
685: off = ds->l+ds->l*ld;
686: PetscArrayzero(Q,ld*ld);
687: nwall = ld*ld+ld;
688: nwallr = ld;
689: nwalli = ld;
690: DSAllocateWork_Private(ds,nwall,nwallr,nwalli);
691: for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
692: for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
693: if (ds->compact) {
694: if (ds->state < DS_STATE_INTERMEDIATE) {
695: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
696: TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
697: ds->k = ds->l;
698: PetscArrayzero(d+2*ld+ds->l,ds->n-ds->l);
699: }
700: } else {
701: if (ds->state < DS_STATE_INTERMEDIATE) {
702: for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
703: TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
704: PetscArrayzero(d+2*ld,ds->n);
705: ds->k = ds->l;
706: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
707: } else DSSwitchFormat_GHIEP(ds,PETSC_TRUE);
708: }
709: MatDenseRestoreArray(ds->omat[DS_MAT_A],&A);
710: MatDenseRestoreArray(ds->omat[DS_MAT_B],&B);
711: MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q);
712: DSRestoreArrayReal(ds,DS_MAT_T,&d);
713: DSRestoreArrayReal(ds,DS_MAT_D,&s);
714: return 0;
715: }