Actual source code: contiguous.c
1: /*
2: Subroutines related to special Vecs that share a common contiguous storage.
4: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5: SLEPc - Scalable Library for Eigenvalue Problem Computations
6: Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain
8: This file is part of SLEPc.
10: SLEPc is free software: you can redistribute it and/or modify it under the
11: terms of version 3 of the GNU Lesser General Public License as published by
12: the Free Software Foundation.
14: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
15: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
17: more details.
19: You should have received a copy of the GNU Lesser General Public License
20: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22: */
24: #include <slepc-private/vecimplslepc.h> /*I "slepcvec.h" I*/
25: #include <petsc-private/vecimpl.h> /*I "petscvec.h" I*/
26: #include <petscblaslapack.h>
28: PetscLogEvent SLEPC_UpdateVectors = 0,SLEPC_VecMAXPBY = 0;
32: /*
33: Frees the array of the contiguous vectors when all vectors have been destroyed.
34: */
35: static PetscErrorCode Vecs_ContiguousDestroy(void *ctx)
36: {
37: PetscErrorCode ierr;
38: Vecs_Contiguous *vc = (Vecs_Contiguous*)ctx;
41: PetscFree(vc->array);
42: PetscFree(vc);
43: return(0);
44: }
48: /*
49: Version of VecDuplicateVecs that sets contiguous storage.
50: */
51: static PetscErrorCode VecDuplicateVecs_Contiguous(Vec v,PetscInt m,Vec *V[])
52: {
53: PetscErrorCode ierr;
54: PetscInt i,nloc;
55: PetscScalar *pV;
56: PetscContainer container;
57: Vecs_Contiguous *vc;
60: /* Allocate array */
61: VecGetLocalSize(v,&nloc);
62: PetscMalloc(m*nloc*sizeof(PetscScalar),&pV);
63: /* Create container */
64: PetscNew(Vecs_Contiguous,&vc);
65: vc->nvecs = m;
66: vc->array = pV;
67: PetscContainerCreate(PetscObjectComm((PetscObject)v),&container);
68: PetscContainerSetPointer(container,vc);
69: PetscContainerSetUserDestroy(container,Vecs_ContiguousDestroy);
70: /* Create vectors */
71: PetscMalloc(m*sizeof(Vec),V);
72: for (i=0;i<m;i++) {
73: VecCreateMPIWithArray(PetscObjectComm((PetscObject)v),1,nloc,PETSC_DECIDE,pV+i*nloc,*V+i);
74: PetscObjectCompose((PetscObject)*(*V+i),"contiguous",(PetscObject)container);
75: }
76: PetscContainerDestroy(&container);
77: return(0);
78: }
82: /*@
83: SlepcVecSetTemplate - Sets a vector as a template for contiguous storage.
85: Collective on Vec
87: Input Parameters:
88: . v - the vector
90: Note:
91: Once this function is called, subsequent calls to VecDuplicateVecs()
92: with this vector will use a special version that generates vectors with
93: contiguous storage, that is, the array of values of V[1] immediately
94: follows the array of V[0], and so on.
96: Level: developer
97: @*/
98: PetscErrorCode SlepcVecSetTemplate(Vec v)
99: {
101: PetscBool flg;
105: PetscObjectTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
106: if (!flg) SETERRQ(PetscObjectComm((PetscObject)v),PETSC_ERR_SUP,"Only available for standard vectors (VECSEQ or VECMPI)");
107: v->ops->duplicatevecs = VecDuplicateVecs_Contiguous;
108: return(0);
109: }
113: /*@
114: SlepcMatGetVecsTemplate - Get vectors compatible with a matrix,
115: i.e. with the same parallel layout, and mark them as templates
116: for contiguous storage.
118: Collective on Mat
120: Input Parameter:
121: . mat - the matrix
123: Output Parameters:
124: + right - (optional) vector that the matrix can be multiplied against
125: - left - (optional) vector that the matrix vector product can be stored in
127: Options Database Keys:
128: . -slepc_non_contiguous - Disable contiguous vector storage
130: Notes:
131: Use -slepc_non_contiguous to disable contiguous storage throughout SLEPc.
132: Contiguous storage is currently also disabled in AIJCUSP matrices.
134: Level: developer
136: .seealso: SlepcVecSetTemplate()
137: @*/
138: PetscErrorCode SlepcMatGetVecsTemplate(Mat mat,Vec *right,Vec *left)
139: {
141: PetscBool flg;
142: Vec v;
147: MatGetVecs(mat,right,left);
148: v = right? *right: *left;
149: PetscObjectTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");
150: if (!flg) return(0);
151: PetscOptionsHasName(NULL,"-slepc_non_contiguous",&flg);
152: if (!flg) {
153: if (right) { SlepcVecSetTemplate(*right); }
154: if (left) { SlepcVecSetTemplate(*left); }
155: }
156: return(0);
157: }
161: /*
162: SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
163: (non-contiguous).
164: */
165: static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m_,Vec *V,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
166: {
167: PetscInt l;
168: PetscBLASInt j,ls,bs=64,m,k,ldq;
169: PetscScalar *pv,*pq=(PetscScalar*)Q,*work,*out,one=1.0,zero=0.0;
173: PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
174: VecGetLocalSize(V[0],&l);
175: PetscBLASIntCast(l,&ls);
176: PetscBLASIntCast(m_,&m);
177: PetscBLASIntCast(ldq_,&ldq);
178: PetscMalloc(sizeof(PetscScalar)*2*bs*m,&work);
179: out = work+m*bs;
180: k = ls % bs;
181: if (k) {
182: for (j=0;j<m;j++) {
183: VecGetArray(V[j],&pv);
184: PetscMemcpy(work+j*bs,pv,k*sizeof(PetscScalar));
185: VecRestoreArray(V[j],&pv);
186: }
187: PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qtrans?"C":"N",&k,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs));
188: for (j=0;j<m;j++) {
189: VecGetArray(V[j],&pv);
190: PetscMemcpy(pv,out+j*bs,k*sizeof(PetscScalar));
191: VecRestoreArray(V[j],&pv);
192: }
193: }
194: for (;k<ls;k+=bs) {
195: for (j=0;j<m;j++) {
196: VecGetArray(V[j],&pv);
197: PetscMemcpy(work+j*bs,pv+k,bs*sizeof(PetscScalar));
198: VecRestoreArray(V[j],&pv);
199: }
200: PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qtrans?"C":"N",&bs,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs));
201: for (j=0;j<m;j++) {
202: VecGetArray(V[j],&pv);
203: PetscMemcpy(pv+k,out+j*bs,bs*sizeof(PetscScalar));
204: VecRestoreArray(V[j],&pv);
205: }
206: }
207: PetscFree(work);
208: PetscLogFlops(m*m*2.0*ls);
209: PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
210: return(0);
211: }
215: /*
216: SlepcUpdateVectors_Noncontiguous - V(:,s:e-1) = V*Q(:,s:e-1) for
217: regular vectors (non-contiguous).
219: Writing V = [ V1 V2 V3 ] and Q = [ Q1 Q2 Q3 ], where the V2 and Q2
220: correspond to the columns s:e-1, the computation is done as
221: V2 := V2*Q2 + V1*Q1 + V3*Q3
222: (the first term is computed with SlepcUpdateVectors_Noncontiguous_Inplace).
223: */
224: static PetscErrorCode SlepcUpdateVectors_Noncontiguous(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
225: {
226: PetscInt i,j,m,ln;
227: PetscScalar *pq,qt[100];
228: PetscBool allocated = PETSC_FALSE;
232: m = e-s;
233: if (qtrans) {
234: ln = PetscMax(s,n-e);
235: if (ln<=100) pq = qt;
236: else {
237: PetscMalloc(ln*sizeof(PetscScalar),&pq);
238: allocated = PETSC_TRUE;
239: }
240: }
241: /* V2 */
242: SlepcUpdateVectors_Noncontiguous_Inplace(m,V+s,Q+s*ldq+s,ldq,qtrans);
243: /* V1 */
244: if (s>0) {
245: for (i=s;i<e;i++) {
246: if (qtrans) {
247: for (j=0;j<s;j++) pq[j] = Q[i+j*ldq];
248: } else pq = (PetscScalar*)Q+i*ldq;
249: VecMAXPY(V[i],s,pq,V);
250: }
251: }
252: /* V3 */
253: if (n>e) {
254: for (i=s;i<e;i++) {
255: if (qtrans) {
256: for (j=0;j<n-e;j++) pq[j] = Q[i+(j+e)*ldq];
257: } else pq = (PetscScalar*)Q+i*ldq+e;
258: VecMAXPY(V[i],n-e,pq,V+e);
259: }
260: }
261: if (allocated) { PetscFree(pq); }
262: return(0);
263: }
267: /*@
268: SlepcUpdateVectors - Update a set of vectors V as V(:,s:e-1) = V*Q(:,s:e-1).
270: Not Collective
272: Input parameters:
273: + n - number of vectors in V
274: . s - first column of V to be overwritten
275: . e - first column of V not to be overwritten
276: . Q - matrix containing the coefficients of the update
277: . ldq - leading dimension of Q
278: - qtrans - flag indicating if Q is to be transposed
280: Input/Output parameter:
281: . V - set of vectors
283: Notes:
284: This function computes V(:,s:e-1) = V*Q(:,s:e-1), that is, given a set of
285: vectors V, columns from s to e-1 are overwritten with columns from s to
286: e-1 of the matrix-matrix product V*Q.
288: Matrix V is represented as an array of Vec, whereas Q is represented as
289: a column-major dense array of leading dimension ldq. Only columns s to e-1
290: of Q are referenced.
292: If qtrans=PETSC_TRUE, the operation is V*Q'.
294: This routine is implemented with a call to BLAS, therefore V is an array
295: of Vec which have the data stored contiguously in memory as a Fortran matrix.
296: PETSc does not create such arrays by default.
298: Level: developer
300: .seealso: SlepcUpdateStrideVectors()
301: @*/
302: PetscErrorCode SlepcUpdateVectors(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
303: {
304: PetscContainer container;
308: if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",n);
309: if (!n || s>=e) return(0);
314: PetscObjectQuery((PetscObject)(V[0]),"contiguous",(PetscObject*)&container);
315: if (container) {
316: /* contiguous Vecs, use BLAS calls */
317: SlepcUpdateStrideVectors(n,V,s,1,e,Q,ldq,qtrans);
318: } else {
319: /* use regular Vec operations */
320: SlepcUpdateVectors_Noncontiguous(n,V,s,e,Q,ldq,qtrans);
321: }
322: return(0);
323: }
327: /*@
328: SlepcUpdateStrideVectors - Update a set of vectors V as
329: V(:,s:d:e-1) = V*Q(:,s:e-1).
331: Not Collective
333: Input parameters:
334: + n - number of vectors in V
335: . s - first column of V to be overwritten
336: . d - stride
337: . e - first column of V not to be overwritten
338: . Q - matrix containing the coefficients of the update
339: . ldq - leading dimension of Q
340: - qtrans - flag indicating if Q is to be transposed
342: Input/Output parameter:
343: . V - set of vectors
345: Notes:
346: This function computes V(:,s:d:e-1) = V*Q(:,s:e-1), that is, given a set
347: of vectors V, columns from s to e-1 are overwritten with columns from s to
348: e-1 of the matrix-matrix product V*Q.
350: Matrix V is represented as an array of Vec, whereas Q is represented as
351: a column-major dense array of leading dimension ldq. Only columns s to e-1
352: of Q are referenced.
354: If qtrans=PETSC_TRUE, the operation is V*Q'.
356: This routine is implemented with a call to BLAS, therefore V is an array
357: of Vec which have the data stored contiguously in memory as a Fortran matrix.
358: PETSc does not create such arrays by default.
360: Level: developer
362: .seealso: SlepcUpdateVectors()
363: @*/
364: PetscErrorCode SlepcUpdateStrideVectors(PetscInt n_,Vec *V,PetscInt s,PetscInt d,PetscInt e,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
365: {
367: PetscInt l;
368: PetscBLASInt i,j,k,bs=64,m,n,ldq,ls,ld;
369: PetscScalar *pv,*pw,*pq,*work,*pwork,one=1.0,zero=0.0;
370: const char *qt;
373: PetscBLASIntCast(n_/d,&n);
374: PetscBLASIntCast(ldq_,&ldq);
375: m = (e-s)/d;
376: if (!m) return(0);
378: if (m<0 || n<0 || s<0 || m>n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index argument out of range");
379: PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);
380: VecGetLocalSize(V[0],&l);
381: PetscBLASIntCast(l,&ls);
382: PetscBLASIntCast(ls*d,&ld);
383: VecGetArray(V[0],&pv);
384: if (qtrans) {
385: pq = (PetscScalar*)Q+s;
386: qt = "C";
387: } else {
388: pq = (PetscScalar*)Q+s*ldq;
389: qt = "N";
390: }
391: PetscMalloc(sizeof(PetscScalar)*bs*m,&work);
392: k = ls % bs;
393: if (k) {
394: PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qt,&k,&m,&n,&one,pv,&ld,pq,&ldq,&zero,work,&k));
395: for (j=0;j<m;j++) {
396: pw = pv+(s+j)*ld;
397: pwork = work+j*k;
398: for (i=0;i<k;i++) {
399: *pw++ = *pwork++;
400: }
401: }
402: }
403: for (;k<ls;k+=bs) {
404: PetscStackCallBLAS("BLASgemm",BLASgemm_("N",qt,&bs,&m,&n,&one,pv+k,&ld,pq,&ldq,&zero,work,&bs));
405: for (j=0;j<m;j++) {
406: pw = pv+(s+j)*ld+k;
407: pwork = work+j*bs;
408: for (i=0;i<bs;i++) {
409: *pw++ = *pwork++;
410: }
411: }
412: }
413: VecRestoreArray(V[0],&pv);
414: PetscFree(work);
415: PetscLogFlops(m*n*2.0*ls);
416: PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);
417: return(0);
418: }
422: /*@
423: SlepcVecMAXPBY - Computes y = beta*y + sum alpha*a[j]*x[j]
425: Logically Collective on Vec
427: Input parameters:
428: + beta - scalar beta
429: . alpha - scalar alpha
430: . nv - number of vectors in x and scalars in a
431: . a - array of scalars
432: - x - set of vectors
434: Input/Output parameter:
435: . y - the vector to update
437: Notes:
438: If x are Vec's with contiguous storage, then the operation is done
439: through a call to BLAS. Otherwise, VecMAXPY() is called.
441: Level: developer
443: .seealso: SlepcVecSetTemplate()
444: @*/
445: PetscErrorCode SlepcVecMAXPBY(Vec y,PetscScalar beta,PetscScalar alpha,PetscInt nv,PetscScalar a[],Vec x[])
446: {
447: PetscErrorCode ierr;
448: PetscBLASInt i,n,m,one=1;
449: PetscScalar *py;
450: const PetscScalar *px;
451: PetscContainer container;
452: Vec z;
456: if (!nv) return(0);
457: if (nv < 0) SETERRQ1(PetscObjectComm((PetscObject)y),PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",nv);
467: if ((*x)->map->N != (y)->map->N) SETERRQ(PetscObjectComm((PetscObject)y),PETSC_ERR_ARG_INCOMP,"Incompatible vector global lengths");
468: if ((*x)->map->n != (y)->map->n) SETERRQ(PetscObjectComm((PetscObject)y),PETSC_ERR_ARG_INCOMP,"Incompatible vector local lengths");
470: PetscObjectQuery((PetscObject)(x[0]),"contiguous",(PetscObject*)&container);
471: if (container) {
472: /* assume x Vecs are contiguous, use BLAS calls */
473: PetscLogEventBegin(SLEPC_VecMAXPBY,*x,y,0,0);
474: VecGetArray(y,&py);
475: VecGetArrayRead(*x,&px);
476: PetscBLASIntCast(nv,&n);
477: PetscBLASIntCast((y)->map->n,&m);
478: if (m>0) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&m,&n,&alpha,px,&m,a,&one,&beta,py,&one));
479: VecRestoreArray(y,&py);
480: VecRestoreArrayRead(*x,&px);
481: PetscLogFlops(nv*2*(y)->map->n);
482: PetscLogEventEnd(SLEPC_VecMAXPBY,*x,y,0,0);
483: } else {
484: /* use regular Vec operations */
485: if (alpha==-beta) {
486: for (i=0;i<nv;i++) a[i] = -a[i];
487: VecMAXPY(y,nv,a,x);
488: for (i=0;i<nv;i++) a[i] = -a[i];
489: VecScale(y,beta);
490: } else {
491: VecDuplicate(y,&z);
492: VecCopy(y,z);
493: VecMAXPY(y,nv,a,x);
494: VecAXPBY(y,beta-alpha,alpha,z);
495: VecDestroy(&z);
496: }
497: }
498: return(0);
499: }