1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.renderscript;
18 
19 import android.annotation.IntDef;
20 import java.lang.annotation.Retention;
21 import java.lang.annotation.RetentionPolicy;
22 
23 /**
24  *
25  * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS.
26  *
27  * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard
28  * building blocks for performing basic vector and matrix operations.
29  *
30  * For detailed description of BLAS, please refer to http://www.netlib.org/blas/
31  *
32  * @deprecated Renderscript has been deprecated in API level 31. Please refer to the <a
33  * href="https://developer.android.com/guide/topics/renderscript/migration-guide">migration
34  * guide</a> for the proposed alternatives.
35  **/
36 @Deprecated
37 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic {
38     private Allocation mLUT;
39 
ScriptIntrinsicBLAS(long id, RenderScript rs)40     private ScriptIntrinsicBLAS(long id, RenderScript rs) {
41         super(id, rs);
42     }
43 
44     private static final int RsBlas_sdsdot = 1;
45     private static final int RsBlas_dsdot = 2;
46     private static final int RsBlas_sdot = 3;
47     private static final int RsBlas_ddot = 4;
48     private static final int RsBlas_cdotu_sub = 5;
49     private static final int RsBlas_cdotc_sub = 6;
50     private static final int RsBlas_zdotu_sub = 7;
51     private static final int RsBlas_zdotc_sub = 8;
52     private static final int RsBlas_snrm2 = 9;
53     private static final int RsBlas_sasum = 10;
54     private static final int RsBlas_dnrm2 = 11;
55     private static final int RsBlas_dasum = 12;
56     private static final int RsBlas_scnrm2 = 13;
57     private static final int RsBlas_scasum = 14;
58     private static final int RsBlas_dznrm2 = 15;
59     private static final int RsBlas_dzasum = 16;
60     private static final int RsBlas_isamax = 17;
61     private static final int RsBlas_idamax = 18;
62     private static final int RsBlas_icamax = 19;
63     private static final int RsBlas_izamax = 20;
64     private static final int RsBlas_sswap = 21;
65     private static final int RsBlas_scopy = 22;
66     private static final int RsBlas_saxpy = 23;
67     private static final int RsBlas_dswap = 24;
68     private static final int RsBlas_dcopy = 25;
69     private static final int RsBlas_daxpy = 26;
70     private static final int RsBlas_cswap = 27;
71     private static final int RsBlas_ccopy = 28;
72     private static final int RsBlas_caxpy = 29;
73     private static final int RsBlas_zswap = 30;
74     private static final int RsBlas_zcopy = 31;
75     private static final int RsBlas_zaxpy = 32;
76     private static final int RsBlas_srotg = 33;
77     private static final int RsBlas_srotmg = 34;
78     private static final int RsBlas_srot = 35;
79     private static final int RsBlas_srotm = 36;
80     private static final int RsBlas_drotg = 37;
81     private static final int RsBlas_drotmg = 38;
82     private static final int RsBlas_drot = 39;
83     private static final int RsBlas_drotm = 40;
84     private static final int RsBlas_sscal = 41;
85     private static final int RsBlas_dscal = 42;
86     private static final int RsBlas_cscal = 43;
87     private static final int RsBlas_zscal = 44;
88     private static final int RsBlas_csscal = 45;
89     private static final int RsBlas_zdscal = 46;
90     private static final int RsBlas_sgemv = 47;
91     private static final int RsBlas_sgbmv = 48;
92     private static final int RsBlas_strmv = 49;
93     private static final int RsBlas_stbmv = 50;
94     private static final int RsBlas_stpmv = 51;
95     private static final int RsBlas_strsv = 52;
96     private static final int RsBlas_stbsv = 53;
97     private static final int RsBlas_stpsv = 54;
98     private static final int RsBlas_dgemv = 55;
99     private static final int RsBlas_dgbmv = 56;
100     private static final int RsBlas_dtrmv = 57;
101     private static final int RsBlas_dtbmv = 58;
102     private static final int RsBlas_dtpmv = 59;
103     private static final int RsBlas_dtrsv = 60;
104     private static final int RsBlas_dtbsv = 61;
105     private static final int RsBlas_dtpsv = 62;
106     private static final int RsBlas_cgemv = 63;
107     private static final int RsBlas_cgbmv = 64;
108     private static final int RsBlas_ctrmv = 65;
109     private static final int RsBlas_ctbmv = 66;
110     private static final int RsBlas_ctpmv = 67;
111     private static final int RsBlas_ctrsv = 68;
112     private static final int RsBlas_ctbsv = 69;
113     private static final int RsBlas_ctpsv = 70;
114     private static final int RsBlas_zgemv = 71;
115     private static final int RsBlas_zgbmv = 72;
116     private static final int RsBlas_ztrmv = 73;
117     private static final int RsBlas_ztbmv = 74;
118     private static final int RsBlas_ztpmv = 75;
119     private static final int RsBlas_ztrsv = 76;
120     private static final int RsBlas_ztbsv = 77;
121     private static final int RsBlas_ztpsv = 78;
122     private static final int RsBlas_ssymv = 79;
123     private static final int RsBlas_ssbmv = 80;
124     private static final int RsBlas_sspmv = 81;
125     private static final int RsBlas_sger = 82;
126     private static final int RsBlas_ssyr = 83;
127     private static final int RsBlas_sspr = 84;
128     private static final int RsBlas_ssyr2 = 85;
129     private static final int RsBlas_sspr2 = 86;
130     private static final int RsBlas_dsymv = 87;
131     private static final int RsBlas_dsbmv = 88;
132     private static final int RsBlas_dspmv = 89;
133     private static final int RsBlas_dger = 90;
134     private static final int RsBlas_dsyr = 91;
135     private static final int RsBlas_dspr = 92;
136     private static final int RsBlas_dsyr2 = 93;
137     private static final int RsBlas_dspr2 = 94;
138     private static final int RsBlas_chemv = 95;
139     private static final int RsBlas_chbmv = 96;
140     private static final int RsBlas_chpmv = 97;
141     private static final int RsBlas_cgeru = 98;
142     private static final int RsBlas_cgerc = 99;
143     private static final int RsBlas_cher = 100;
144     private static final int RsBlas_chpr = 101;
145     private static final int RsBlas_cher2 = 102;
146     private static final int RsBlas_chpr2 = 103;
147     private static final int RsBlas_zhemv = 104;
148     private static final int RsBlas_zhbmv = 105;
149     private static final int RsBlas_zhpmv = 106;
150     private static final int RsBlas_zgeru = 107;
151     private static final int RsBlas_zgerc = 108;
152     private static final int RsBlas_zher = 109;
153     private static final int RsBlas_zhpr = 110;
154     private static final int RsBlas_zher2 = 111;
155     private static final int RsBlas_zhpr2 = 112;
156     private static final int RsBlas_sgemm = 113;
157     private static final int RsBlas_ssymm = 114;
158     private static final int RsBlas_ssyrk = 115;
159     private static final int RsBlas_ssyr2k = 116;
160     private static final int RsBlas_strmm = 117;
161     private static final int RsBlas_strsm = 118;
162     private static final int RsBlas_dgemm = 119;
163     private static final int RsBlas_dsymm = 120;
164     private static final int RsBlas_dsyrk = 121;
165     private static final int RsBlas_dsyr2k = 122;
166     private static final int RsBlas_dtrmm = 123;
167     private static final int RsBlas_dtrsm = 124;
168     private static final int RsBlas_cgemm = 125;
169     private static final int RsBlas_csymm = 126;
170     private static final int RsBlas_csyrk = 127;
171     private static final int RsBlas_csyr2k = 128;
172     private static final int RsBlas_ctrmm = 129;
173     private static final int RsBlas_ctrsm = 130;
174     private static final int RsBlas_zgemm = 131;
175     private static final int RsBlas_zsymm = 132;
176     private static final int RsBlas_zsyrk = 133;
177     private static final int RsBlas_zsyr2k = 134;
178     private static final int RsBlas_ztrmm = 135;
179     private static final int RsBlas_ztrsm = 136;
180     private static final int RsBlas_chemm = 137;
181     private static final int RsBlas_cherk = 138;
182     private static final int RsBlas_cher2k = 139;
183     private static final int RsBlas_zhemm = 140;
184     private static final int RsBlas_zherk = 141;
185     private static final int RsBlas_zher2k = 142;
186 
187     // BLAS extensions start here
188     private static final int RsBlas_bnnm = 1000;
189 
190     /**
191      * Create an intrinsic to access BLAS subroutines.
192      *
193      * @param rs The RenderScript context
194      * @return ScriptIntrinsicBLAS
195      */
create(RenderScript rs)196     public static ScriptIntrinsicBLAS create(RenderScript rs) {
197         long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs));
198         return new ScriptIntrinsicBLAS(id, rs);
199     }
200 
201     /**
202      * @hide
203      */
204     @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE})
205     @Retention(RetentionPolicy.SOURCE)
206     public @interface Transpose {}
207 
208     /**
209      * @hide
210      */
211     @IntDef({UPPER, LOWER})
212     @Retention(RetentionPolicy.SOURCE)
213     public @interface Uplo {}
214 
215     /**
216      * @hide
217      */
218     @IntDef({NON_UNIT, UNIT})
219     @Retention(RetentionPolicy.SOURCE)
220     public @interface Diag {}
221 
222     /**
223      * @hide
224      */
225     @IntDef({LEFT, RIGHT})
226     @Retention(RetentionPolicy.SOURCE)
227     public @interface Side {}
228 
229     public static final int NO_TRANSPOSE = 111;
230     public static final int TRANSPOSE = 112;
231     public static final int CONJ_TRANSPOSE = 113;
232 
233     public static final int UPPER = 121;
234     public static final int LOWER = 122;
235 
236     public static final int NON_UNIT = 131;
237     public static final int UNIT = 132;
238 
239     public static final int LEFT = 141;
240     public static final int RIGHT = 142;
241 
validateSide(@ide int Side)242     static void validateSide(@Side int Side) {
243         if (Side != LEFT && Side != RIGHT) {
244             throw new RSRuntimeException("Invalid side passed to BLAS");
245         }
246     }
247 
validateTranspose(@ranspose int Trans)248     static void validateTranspose(@Transpose int Trans) {
249         if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE &&
250             Trans != CONJ_TRANSPOSE) {
251             throw new RSRuntimeException("Invalid transpose passed to BLAS");
252         }
253     }
254 
validateConjTranspose(@ranspose int Trans)255     static void validateConjTranspose(@Transpose int Trans) {
256         if (Trans != NO_TRANSPOSE &&
257             Trans != CONJ_TRANSPOSE) {
258             throw new RSRuntimeException("Invalid transpose passed to BLAS");
259         }
260     }
261 
validateDiag(@iag int Diag)262     static void validateDiag(@Diag int Diag) {
263         if (Diag != NON_UNIT && Diag != UNIT) {
264             throw new RSRuntimeException("Invalid diag passed to BLAS");
265         }
266     }
267 
validateUplo(@plo int Uplo)268     static void validateUplo(@Uplo int Uplo) {
269         if (Uplo != UPPER && Uplo != LOWER) {
270             throw new RSRuntimeException("Invalid uplo passed to BLAS");
271         }
272     }
273 
274 
275     /**
276      * Level 2 BLAS
277      */
278 
validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY)279     static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) {
280         validateTranspose(TransA);
281         int M = A.getType().getY();
282         int N = A.getType().getX();
283         if (!A.getType().getElement().isCompatible(e) ||
284             !X.getType().getElement().isCompatible(e) ||
285             !Y.getType().getElement().isCompatible(e)) {
286             throw new RSRuntimeException("Called BLAS with wrong Element type");
287         }
288         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
289             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
290         }
291 
292         if (incX <= 0 || incY <= 0) {
293             throw new RSRuntimeException("Vector increments must be greater than 0");
294         }
295         int expectedXDim = -1, expectedYDim = -1;
296         if (TransA == NO_TRANSPOSE) {
297             expectedXDim = 1 + (N - 1) * incX;
298             expectedYDim = 1 + (M - 1) * incY;
299         } else {
300             expectedXDim = 1 + (M - 1) * incX;
301             expectedYDim = 1 + (N - 1) * incY;
302         }
303         if (X.getType().getX() != expectedXDim ||
304             Y.getType().getX() != expectedYDim) {
305             throw new RSRuntimeException("Incorrect vector dimensions for GEMV");
306         }
307     }
308 
309     /**
310      * SGEMV performs one of the matrix-vector operations
311      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
312      *
313      * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html
314      *
315      * @param TransA The type of transpose applied to matrix A.
316      * @param alpha The scalar alpha.
317      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
318      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
319      * @param incX The increment for the elements of vector x, must be larger than zero.
320      * @param beta The scalar beta.
321      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
322      * @param incY The increment for the elements of vector y, must be larger than zero.
323      */
SGEMV(@ranspose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)324     public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
325         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
326         int M = A.getType().getY();
327         int N = A.getType().getX();
328         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
329     }
330 
331     /**
332      * DGEMV performs one of the matrix-vector operations
333      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
334      *
335      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html
336      *
337      * @param TransA The type of transpose applied to matrix A.
338      * @param alpha The scalar alpha.
339      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
340      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
341      * @param incX The increment for the elements of vector x, must be larger than zero.
342      * @param beta The scalar beta.
343      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
344      * @param incY The increment for the elements of vector y, must be larger than zero.
345      */
DGEMV(@ranspose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)346     public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
347         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
348         int M = A.getType().getY();
349         int N = A.getType().getX();
350         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
351     }
352 
353     /**
354      * CGEMV performs one of the matrix-vector operations
355      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
356      *
357      * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html
358      *
359      * @param TransA The type of transpose applied to matrix A.
360      * @param alpha The scalar alpha.
361      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
362      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
363      * @param incX The increment for the elements of vector x, must be larger than zero.
364      * @param beta The scalar beta.
365      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
366      * @param incY The increment for the elements of vector y, must be larger than zero.
367      */
CGEMV(@ranspose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)368     public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
369         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
370         int M = A.getType().getY();
371         int N = A.getType().getX();
372         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
373     }
374 
375     /**
376      * ZGEMV performs one of the matrix-vector operations
377      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
378      *
379      * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html
380      *
381      * @param TransA The type of transpose applied to matrix A.
382      * @param alpha The scalar alpha.
383      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
384      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
385      * @param incX The increment for the elements of vector x, must be larger than zero.
386      * @param beta The scalar beta.
387      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
388      * @param incY The increment for the elements of vector y, must be larger than zero.
389      */
ZGEMV(@ranspose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)390     public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
391         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
392         int M = A.getType().getY();
393         int N = A.getType().getX();
394         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
395     }
396 
397     /**
398      * SGBMV performs one of the matrix-vector operations
399      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
400      *
401      * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html
402      *
403      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
404      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
405      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
406      *           for i in range(0, m):
407      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
408      *                  b[i, j-i+kl] = a[i, j]
409      *
410      * @param TransA The type of transpose applied to matrix A.
411      * @param KL The number of sub-diagonals of the matrix A.
412      * @param KU The number of super-diagonals of the matrix A.
413      * @param alpha The scalar alpha.
414      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}.
415      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
416      * @param incX The increment for the elements of vector x, must be larger than zero.
417      * @param beta The scalar beta.
418      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
419      * @param incY The increment for the elements of vector y, must be larger than zero.
420      */
SGBMV(@ranspose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)421     public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
422         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
423         validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY);
424         if (KL < 0 || KU < 0) {
425             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
426         }
427         int M = A.getType().getY();
428         int N = A.getType().getX();
429         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
430     }
431 
432     /**
433      * DGBMV performs one of the matrix-vector operations
434      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y
435      *
436      * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html
437      *
438      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
439      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
440      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
441      *           for i in range(0, m):
442      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
443      *                  b[i, j-i+kl] = a[i, j]
444      *
445      * @param TransA The type of transpose applied to matrix A.
446      * @param KL The number of sub-diagonals of the matrix A.
447      * @param KU The number of super-diagonals of the matrix A.
448      * @param alpha The scalar alpha.
449      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}.
450      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
451      * @param incX The increment for the elements of vector x, must be larger than zero.
452      * @param beta The scalar beta.
453      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
454      * @param incY The increment for the elements of vector y, must be larger than zero.
455      */
DGBMV(@ranspose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)456     public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
457         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
458         validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY);
459         if (KL < 0 || KU < 0) {
460             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
461         }
462         int M = A.getType().getY();
463         int N = A.getType().getX();
464         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU);
465     }
466 
467     /**
468      * CGBMV performs one of the matrix-vector operations
469      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
470      *
471      * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html
472      *
473      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
474      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
475      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
476      *           for i in range(0, m):
477      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
478      *                  b[i, j-i+kl] = a[i, j]
479      *
480      * @param TransA The type of transpose applied to matrix A.
481      * @param KL The number of sub-diagonals of the matrix A.
482      * @param KU The number of super-diagonals of the matrix A.
483      * @param alpha The scalar alpha.
484      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}.
485      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
486      * @param incX The increment for the elements of vector x, must be larger than zero.
487      * @param beta The scalar beta.
488      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
489      * @param incY The increment for the elements of vector y, must be larger than zero.
490      */
CGBMV(@ranspose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)491     public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
492         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
493         validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY);
494         if (KL < 0 || KU < 0) {
495             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
496         }
497         int M = A.getType().getY();
498         int N = A.getType().getX();
499         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
500     }
501 
502     /**
503      * ZGBMV performs one of the matrix-vector operations
504      * y := alpha*A*x + beta*y   or   y := alpha*A**T*x + beta*y   or   y := alpha*A**H*x + beta*y
505      *
506      * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html
507      *
508      * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N),
509      *       but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an
510      *       example showing how to convert the original matrix 'a' to row-based band matrix 'b'.
511      *           for i in range(0, m):
512      *              for j in range(max(0, i-kl), min(i+ku+1, n)):
513      *                  b[i, j-i+kl] = a[i, j]
514      *
515      * @param TransA The type of transpose applied to matrix A.
516      * @param KL The number of sub-diagonals of the matrix A.
517      * @param KU The number of super-diagonals of the matrix A.
518      * @param alpha The scalar alpha.
519      * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}.
520      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
521      * @param incX The increment for the elements of vector x, must be larger than zero.
522      * @param beta The scalar beta.
523      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
524      * @param incY The increment for the elements of vector y, must be larger than zero.
525      */
ZGBMV(@ranspose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)526     public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
527         // GBMV has the same validation requirements as GEMV + KL and KU >= 0
528         validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY);
529         if (KL < 0 || KU < 0) {
530             throw new RSRuntimeException("KL and KU must be greater than or equal to 0");
531         }
532         int M = A.getType().getY();
533         int N = A.getType().getX();
534         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU);
535     }
536 
validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)537     static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
538         validateTranspose(TransA);
539         validateUplo(Uplo);
540         validateDiag(Diag);
541         int N = A.getType().getY();
542         if (A.getType().getX() != N) {
543             throw new RSRuntimeException("A must be a square matrix for TRMV");
544         }
545         if (!A.getType().getElement().isCompatible(e) ||
546             !X.getType().getElement().isCompatible(e)) {
547             throw new RSRuntimeException("Called BLAS with wrong Element type");
548         }
549         if (X.getType().getY() > 1) {
550             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
551         }
552 
553         if (incX <= 0) {
554             throw new RSRuntimeException("Vector increments must be greater than 0");
555         }
556         int expectedXDim = 1 + (N - 1) * incX;
557         if (X.getType().getX() != expectedXDim) {
558             throw new RSRuntimeException("Incorrect vector dimensions for TRMV");
559         }
560     }
561 
validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)562     static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) {
563         validateTranspose(TransA);
564         validateUplo(Uplo);
565         validateDiag(Diag);
566         if (!Ap.getType().getElement().isCompatible(e) ||
567             !X.getType().getElement().isCompatible(e)) {
568             throw new RSRuntimeException("Called BLAS with wrong Element type");
569         }
570         if (X.getType().getY() > 1) {
571             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
572         }
573 
574         if (Ap.getType().getY() > 1) {
575             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
576         }
577 
578         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
579         //is it really doing anything?
580         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
581             throw new RSRuntimeException("Invalid dimension for Ap");
582         }
583         if (incX <= 0) {
584             throw new RSRuntimeException("Vector increments must be greater than 0");
585         }
586         int expectedXDim = 1 + (N - 1) * incX;
587         if (X.getType().getX() != expectedXDim) {
588             throw new RSRuntimeException("Incorrect vector dimensions for TPMV");
589         }
590 
591         return N;
592     }
593 
594     /**
595      * STRMV performs one of the matrix-vector operations
596      * x := A*x   or   x := A**T*x
597      *
598      * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html
599      *
600      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
601      * @param TransA The type of transpose applied to matrix A.
602      * @param Diag Specifies whether or not A is unit triangular.
603      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
604      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
605      * @param incX The increment for the elements of vector x, must be larger than zero.
606      */
STRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)607     public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
608         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
609         int N = A.getType().getY();
610         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
611     }
612 
613     /**
614      * DTRMV performs one of the matrix-vector operations
615      * x := A*x   or   x := A**T*x
616      *
617      * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html
618      *
619      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
620      * @param TransA The type of transpose applied to matrix A.
621      * @param Diag Specifies whether or not A is unit triangular.
622      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
623      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
624      * @param incX The increment for the elements of vector x, must be larger than zero.
625      */
DTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)626     public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
627         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
628         int N = A.getType().getY();
629         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
630     }
631 
632     /**
633      * CTRMV performs one of the matrix-vector operations
634      * x := A*x   or   x := A**T*x   or   x := A**H*x
635      *
636      * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html
637      *
638      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
639      * @param TransA The type of transpose applied to matrix A.
640      * @param Diag Specifies whether or not A is unit triangular.
641      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
642      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
643      * @param incX The increment for the elements of vector x, must be larger than zero.
644      */
CTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)645     public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
646         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
647         int N = A.getType().getY();
648         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
649     }
650 
651     /**
652      * ZTRMV performs one of the matrix-vector operations
653      * x := A*x   or   x := A**T*x   or   x := A**H*x
654      *
655      * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html
656      *
657      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
658      * @param TransA The type of transpose applied to matrix A.
659      * @param Diag Specifies whether or not A is unit triangular.
660      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
661      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
662      * @param incX The increment for the elements of vector x, must be larger than zero.
663      */
ZTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)664     public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) {
665         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
666         int N = A.getType().getY();
667         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
668     }
669 
670     /**
671      * STBMV performs one of the matrix-vector operations
672      * x := A*x   or   x := A**T*x
673      *
674      * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html
675      *
676      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
677      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
678      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
679      *           for i in range(0, n):
680      *              for j in range(i, min(i+k+1, n)):
681      *                  b[i, j-i] = a[i, j]
682      *
683      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
684      * @param TransA The type of transpose applied to matrix A.
685      * @param Diag Specifies whether or not A is unit triangular.
686      * @param K The number of off-diagonals of the matrix A
687      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
688      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
689      * @param incX The increment for the elements of vector x, must be larger than zero.
690      */
STBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)691     public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
692         // TBMV has the same requirements as TRMV + K >= 0
693         if (K < 0) {
694             throw new RSRuntimeException("K must be greater than or equal to 0");
695         }
696         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
697         int N = A.getType().getY();
698         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
699     }
700 
701     /**
702      * DTBMV performs one of the matrix-vector operations
703      * x := A*x   or   x := A**T*x
704      *
705      * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html
706      *
707      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
708      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
709      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
710      *           for i in range(0, n):
711      *              for j in range(i, min(i+k+1, n)):
712      *                  b[i, j-i] = a[i, j]
713      *
714      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
715      * @param TransA The type of transpose applied to matrix A.
716      * @param Diag Specifies whether or not A is unit triangular.
717      * @param K The number of off-diagonals of the matrix A
718      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
719      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
720      * @param incX The increment for the elements of vector x, must be larger than zero.
721      */
DTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)722     public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
723         // TBMV has the same requirements as TRMV + K >= 0
724         if (K < 0) {
725             throw new RSRuntimeException("K must be greater than or equal to 0");
726         }
727         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
728         int N = A.getType().getY();
729         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
730     }
731 
732     /**
733      * CTBMV performs one of the matrix-vector operations
734      * x := A*x   or   x := A**T*x   or   x := A**H*x
735      *
736      * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html
737      *
738      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
739      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
740      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
741      *           for i in range(0, n):
742      *              for j in range(i, min(i+k+1, n)):
743      *                  b[i, j-i] = a[i, j]
744      *
745      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
746      * @param TransA The type of transpose applied to matrix A.
747      * @param Diag Specifies whether or not A is unit triangular.
748      * @param K The number of off-diagonals of the matrix A
749      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
750      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
751      * @param incX The increment for the elements of vector x, must be larger than zero.
752      */
CTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)753     public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
754         // TBMV has the same requirements as TRMV + K >= 0
755         if (K < 0) {
756             throw new RSRuntimeException("K must be greater than or equal to 0");
757         }
758         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
759         int N = A.getType().getY();
760         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
761     }
762 
763     /**
764      * ZTBMV performs one of the matrix-vector operations
765      * x := A*x   or   x := A**T*x   or   x := A**H*x
766      *
767      * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html
768      *
769      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
770      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
771      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
772      *           for i in range(0, n):
773      *              for j in range(i, min(i+k+1, n)):
774      *                  b[i, j-i] = a[i, j]
775      *
776      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
777      * @param TransA The type of transpose applied to matrix A.
778      * @param Diag Specifies whether or not A is unit triangular.
779      * @param K The number of off-diagonals of the matrix A
780      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
781      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
782      * @param incX The increment for the elements of vector x, must be larger than zero.
783      */
ZTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)784     public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
785         // TBMV has the same requirements as TRMV + K >= 0
786         if (K < 0) {
787             throw new RSRuntimeException("K must be greater than or equal to 0");
788         }
789         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
790         int N = A.getType().getY();
791         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
792     }
793 
794     /**
795      * STPMV performs one of the matrix-vector operations
796      * x := A*x   or   x := A**T*x
797      *
798      * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html
799      *
800      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
801      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
802      *       'a' to packed matrix 'b'.
803      *           k = 0
804      *           for i in range(0, n):
805      *              for j in range(i, n):
806      *                  b[k++] = a[i, j]
807      *
808      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
809      * @param TransA The type of transpose applied to matrix A.
810      * @param Diag Specifies whether or not A is unit triangular.
811      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
812      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
813      * @param incX The increment for the elements of vector x, must be larger than zero.
814      */
STPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)815     public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
816         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
817         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
818     }
819 
820     /**
821      * DTPMV performs one of the matrix-vector operations
822      * x := A*x   or   x := A**T*x
823      *
824      * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html
825      *
826      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
827      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
828      *       'a' to packed matrix 'b'.
829      *           k = 0
830      *           for i in range(0, n):
831      *              for j in range(i, n):
832      *                  b[k++] = a[i, j]
833      *
834      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
835      * @param TransA The type of transpose applied to matrix A.
836      * @param Diag Specifies whether or not A is unit triangular.
837      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
838      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
839      * @param incX The increment for the elements of vector x, must be larger than zero.
840      */
DTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)841     public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
842         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
843         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
844     }
845 
846     /**
847      * CTPMV performs one of the matrix-vector operations
848      * x := A*x   or   x := A**T*x   or   x := A**H*x
849      *
850      * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html
851      *
852      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
853      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
854      *       'a' to packed matrix 'b'.
855      *           k = 0
856      *           for i in range(0, n):
857      *              for j in range(i, n):
858      *                  b[k++] = a[i, j]
859      *
860      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
861      * @param TransA The type of transpose applied to matrix A.
862      * @param Diag Specifies whether or not A is unit triangular.
863      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
864      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
865      * @param incX The increment for the elements of vector x, must be larger than zero.
866      */
CTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)867     public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
868         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
869         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
870     }
871 
872     /**
873      * ZTPMV performs one of the matrix-vector operations
874      * x := A*x   or   x := A**T*x   or   x := A**H*x
875      *
876      * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html
877      *
878      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
879      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
880      *       'a' to packed matrix 'b'.
881      *           k = 0
882      *           for i in range(0, n):
883      *              for j in range(i, n):
884      *                  b[k++] = a[i, j]
885      *
886      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
887      * @param TransA The type of transpose applied to matrix A.
888      * @param Diag Specifies whether or not A is unit triangular.
889      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
890      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
891      * @param incX The increment for the elements of vector x, must be larger than zero.
892      */
ZTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)893     public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
894         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
895         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
896     }
897 
898     /**
899      * STRSV solves one of the systems of equations
900      * A*x = b   or   A**T*x = b
901      *
902      * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html
903      *
904      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
905      * @param TransA The type of transpose applied to matrix A.
906      * @param Diag Specifies whether or not A is unit triangular.
907      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
908      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
909      * @param incX The increment for the elements of vector x, must be larger than zero.
910      */
STRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)911     public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
912         // TRSV is the same as TRMV
913         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
914         int N = A.getType().getY();
915         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
916 
917     }
918 
919     /**
920      * DTRSV solves one of the systems of equations
921      * A*x = b   or   A**T*x = b
922      *
923      * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html
924      *
925      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
926      * @param TransA The type of transpose applied to matrix A.
927      * @param Diag Specifies whether or not A is unit triangular.
928      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
929      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
930      * @param incX The increment for the elements of vector x, must be larger than zero.
931      */
DTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)932     public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
933         // TRSV is the same as TRMV
934         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
935         int N = A.getType().getY();
936         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
937 
938     }
939 
940     /**
941      * CTRSV solves one of the systems of equations
942      * A*x = b   or   A**T*x = b   or   A**H*x = b
943      *
944      * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html
945      *
946      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
947      * @param TransA The type of transpose applied to matrix A.
948      * @param Diag Specifies whether or not A is unit triangular.
949      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
950      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
951      * @param incX The increment for the elements of vector x, must be larger than zero.
952      */
CTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)953     public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
954         // TRSV is the same as TRMV
955         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
956         int N = A.getType().getY();
957         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
958 
959     }
960 
961     /**
962      * ZTRSV solves one of the systems of equations
963      * A*x = b   or   A**T*x = b   or   A**H*x = b
964      *
965      * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html
966      *
967      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
968      * @param TransA The type of transpose applied to matrix A.
969      * @param Diag Specifies whether or not A is unit triangular.
970      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
971      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
972      * @param incX The increment for the elements of vector x, must be larger than zero.
973      */
ZTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)974     public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation A,  Allocation X,  int incX) {
975         // TRSV is the same as TRMV
976         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
977         int N = A.getType().getY();
978         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
979 
980     }
981 
982     /**
983      * STBSV solves one of the systems of equations
984      * A*x = b   or   A**T*x = b
985      *
986      * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html
987      *
988      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
989      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
990      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
991      *           for i in range(0, n):
992      *              for j in range(i, min(i+k+1, n)):
993      *                  b[i, j-i] = a[i, j]
994      *
995      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
996      * @param TransA The type of transpose applied to matrix A.
997      * @param Diag Specifies whether or not A is unit triangular.
998      * @param K The number of off-diagonals of the matrix A
999      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1000      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1001      * @param incX The increment for the elements of vector x, must be larger than zero.
1002      */
STBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1003     public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1004         // TBSV is the same as TRMV + K >= 0
1005         validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX);
1006         int N = A.getType().getY();
1007         if (K < 0) {
1008             throw new RSRuntimeException("Number of diagonals must be positive");
1009         }
1010         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1011     }
1012 
1013     /**
1014      * DTBSV solves one of the systems of equations
1015      * A*x = b   or   A**T*x = b
1016      *
1017      * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html
1018      *
1019      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1020      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1021      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1022      *           for i in range(0, n):
1023      *              for j in range(i, min(i+k+1, n)):
1024      *                  b[i, j-i] = a[i, j]
1025      *
1026      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1027      * @param TransA The type of transpose applied to matrix A.
1028      * @param Diag Specifies whether or not A is unit triangular.
1029      * @param K The number of off-diagonals of the matrix A
1030      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1031      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1032      * @param incX The increment for the elements of vector x, must be larger than zero.
1033      */
DTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1034     public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1035         // TBSV is the same as TRMV + K >= 0
1036         validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX);
1037         int N = A.getType().getY();
1038         if (K < 0) {
1039             throw new RSRuntimeException("Number of diagonals must be positive");
1040         }
1041         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1042     }
1043 
1044     /**
1045      * CTBSV solves one of the systems of equations
1046      * A*x = b   or   A**T*x = b   or   A**H*x = b
1047      *
1048      * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html
1049      *
1050      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1051      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1052      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1053      *           for i in range(0, n):
1054      *              for j in range(i, min(i+k+1, n)):
1055      *                  b[i, j-i] = a[i, j]
1056      *
1057      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1058      * @param TransA The type of transpose applied to matrix A.
1059      * @param Diag Specifies whether or not A is unit triangular.
1060      * @param K The number of off-diagonals of the matrix A
1061      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1062      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1063      * @param incX The increment for the elements of vector x, must be larger than zero.
1064      */
CTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1065     public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1066         // TBSV is the same as TRMV + K >= 0
1067         validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX);
1068         int N = A.getType().getY();
1069         if (K < 0) {
1070             throw new RSRuntimeException("Number of diagonals must be positive");
1071         }
1072         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1073     }
1074 
1075     /**
1076      * ZTBSV solves one of the systems of equations
1077      * A*x = b   or   A**T*x = b   or   A**H*x = b
1078      *
1079      * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html
1080      *
1081      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1082      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1083      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1084      *           for i in range(0, n):
1085      *              for j in range(i, min(i+k+1, n)):
1086      *                  b[i, j-i] = a[i, j]
1087      *
1088      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1089      * @param TransA The type of transpose applied to matrix A.
1090      * @param Diag Specifies whether or not A is unit triangular.
1091      * @param K The number of off-diagonals of the matrix A
1092      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
1093      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1094      * @param incX The increment for the elements of vector x, must be larger than zero.
1095      */
ZTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1096     public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  int K, Allocation A,  Allocation X,  int incX) {
1097         // TBSV is the same as TRMV + K >= 0
1098         validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX);
1099         int N = A.getType().getY();
1100         if (K < 0) {
1101             throw new RSRuntimeException("Number of diagonals must be positive");
1102         }
1103         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1104     }
1105 
1106     /**
1107      * STPSV solves one of the systems of equations
1108      * A*x = b   or   A**T*x = b
1109      *
1110      * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html
1111      *
1112      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1113      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1114      *       'a' to packed matrix 'b'.
1115      *           k = 0
1116      *           for i in range(0, n):
1117      *              for j in range(i, n):
1118      *                  b[k++] = a[i, j]
1119      *
1120      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1121      * @param TransA The type of transpose applied to matrix A.
1122      * @param Diag Specifies whether or not A is unit triangular.
1123      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}.
1124      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1125      * @param incX The increment for the elements of vector x, must be larger than zero.
1126      */
STPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1127     public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1128         // TPSV is same as TPMV
1129         int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX);
1130         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1131     }
1132 
1133     /**
1134      * DTPSV solves one of the systems of equations
1135      * A*x = b   or   A**T*x = b
1136      *
1137      * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html
1138      *
1139      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1140      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1141      *       'a' to packed matrix 'b'.
1142      *           k = 0
1143      *           for i in range(0, n):
1144      *              for j in range(i, n):
1145      *                  b[k++] = a[i, j]
1146      *
1147      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1148      * @param TransA The type of transpose applied to matrix A.
1149      * @param Diag Specifies whether or not A is unit triangular.
1150      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}.
1151      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1152      * @param incX The increment for the elements of vector x, must be larger than zero.
1153      */
DTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1154     public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1155         // TPSV is same as TPMV
1156         int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX);
1157         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0);
1158     }
1159 
1160     /**
1161      * CTPSV solves one of the systems of equations
1162      * A*x = b   or   A**T*x = b   or   A**H*x = b
1163      *
1164      * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html
1165      *
1166      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1167      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1168      *       'a' to packed matrix 'b'.
1169      *           k = 0
1170      *           for i in range(0, n):
1171      *              for j in range(i, n):
1172      *                  b[k++] = a[i, j]
1173      *
1174      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1175      * @param TransA The type of transpose applied to matrix A.
1176      * @param Diag Specifies whether or not A is unit triangular.
1177      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}.
1178      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1179      * @param incX The increment for the elements of vector x, must be larger than zero.
1180      */
CTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1181     public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1182         // TPSV is same as TPMV
1183         int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1184         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1185     }
1186 
1187     /**
1188      * ZTPSV solves one of the systems of equations
1189      * A*x = b   or   A**T*x = b   or   A**H*x = b
1190      *
1191      * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html
1192      *
1193      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1194      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1195      *       'a' to packed matrix 'b'.
1196      *           k = 0
1197      *           for i in range(0, n):
1198      *              for j in range(i, n):
1199      *                  b[k++] = a[i, j]
1200      *
1201      * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix.
1202      * @param TransA The type of transpose applied to matrix A.
1203      * @param Diag Specifies whether or not A is unit triangular.
1204      * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}.
1205      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
1206      * @param incX The increment for the elements of vector x, must be larger than zero.
1207      */
ZTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1208     public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag,  Allocation Ap,  Allocation X,  int incX) {
1209         // TPSV is same as TPMV
1210         int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX);
1211         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0);
1212     }
1213 
1214     /**
1215      * Level 2, S and D only
1216      */
validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY)1217     static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) {
1218         validateUplo(Uplo);
1219         int N = A.getType().getY();
1220         if (A.getType().getX() != N) {
1221             throw new RSRuntimeException("A must be a square matrix for SYMV");
1222         }
1223         if (!A.getType().getElement().isCompatible(e) ||
1224             !X.getType().getElement().isCompatible(e) ||
1225             !Y.getType().getElement().isCompatible(e) ) {
1226             throw new RSRuntimeException("Called BLAS with wrong Element type");
1227         }
1228         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1229             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1230         }
1231 
1232         if (incX <= 0 || incY <= 0) {
1233             throw new RSRuntimeException("Vector increments must be greater than 0");
1234         }
1235         int expectedXDim = 1 + (N - 1) * incX;
1236         if (X.getType().getX() != expectedXDim) {
1237             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1238         }
1239         int expectedYDim = 1 + (N - 1) * incY;
1240         if (Y.getType().getX() != expectedYDim) {
1241             throw new RSRuntimeException("Incorrect vector dimensions for SYMV");
1242         }
1243         return N;
1244     }
validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1245     static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) {
1246         validateUplo(Uplo);
1247         if (!Ap.getType().getElement().isCompatible(e) ||
1248             !X.getType().getElement().isCompatible(e) ||
1249             !Y.getType().getElement().isCompatible(e)) {
1250             throw new RSRuntimeException("Called BLAS with wrong Element type");
1251         }
1252         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1253             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1254         }
1255 
1256         if (Ap.getType().getY() > 1) {
1257             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1258         }
1259 
1260         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1261         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1262             throw new RSRuntimeException("Invalid dimension for Ap");
1263         }
1264         if (incX <= 0 || incY <= 0) {
1265             throw new RSRuntimeException("Vector increments must be greater than 0");
1266         }
1267         int expectedXDim = 1 + (N - 1) * incX;
1268         if (X.getType().getX() != expectedXDim) {
1269             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1270         }
1271         int expectedYDim = 1 + (N - 1) * incY;
1272         if (Y.getType().getX() != expectedYDim) {
1273             throw new RSRuntimeException("Incorrect vector dimensions for SPMV");
1274         }
1275 
1276         return N;
1277     }
validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1278     static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1279         if (!A.getType().getElement().isCompatible(e) ||
1280             !X.getType().getElement().isCompatible(e) ||
1281             !Y.getType().getElement().isCompatible(e) ) {
1282             throw new RSRuntimeException("Called BLAS with wrong Element type");
1283         }
1284 
1285         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1286             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1287         }
1288 
1289         int M = A.getType().getY();
1290         int N = A.getType().getX();
1291 
1292         if (N < 1 || M < 1) {
1293             throw new RSRuntimeException("M and N must be 1 or greater for GER");
1294         }
1295         if (incX <= 0 || incY <= 0) {
1296             throw new RSRuntimeException("Vector increments must be greater than 0");
1297         }
1298         int expectedXDim = 1 + (M - 1) * incX;
1299         if (X.getType().getX() != expectedXDim) {
1300             throw new RSRuntimeException("Incorrect vector dimensions for GER");
1301         }
1302         int expectedYDim = 1 + (N - 1) * incY;
1303         if (Y.getType().getX() != expectedYDim) {
1304             throw new RSRuntimeException("Incorrect vector dimensions for GER");
1305         }
1306 
1307 
1308     }
validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A)1309     static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) {
1310         validateUplo(Uplo);
1311         if (!A.getType().getElement().isCompatible(e) ||
1312             !X.getType().getElement().isCompatible(e)) {
1313             throw new RSRuntimeException("Called BLAS with wrong Element type");
1314         }
1315 
1316         int N = A.getType().getX();
1317 
1318         if (X.getType().getY() > 1) {
1319             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1320         }
1321         if (N != A.getType().getY()) {
1322             throw new RSRuntimeException("A must be a symmetric matrix");
1323         }
1324         if (incX <= 0) {
1325             throw new RSRuntimeException("Vector increments must be greater than 0");
1326         }
1327         int expectedXDim = 1 + (N - 1) * incX;
1328         if (X.getType().getX() != expectedXDim) {
1329             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1330         }
1331         return N;
1332     }
validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap)1333     static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) {
1334         validateUplo(Uplo);
1335         if (!Ap.getType().getElement().isCompatible(e) ||
1336             !X.getType().getElement().isCompatible(e)) {
1337             throw new RSRuntimeException("Called BLAS with wrong Element type");
1338         }
1339         if (X.getType().getY() > 1) {
1340             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1341         }
1342 
1343         if (Ap.getType().getY() > 1) {
1344             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1345         }
1346 
1347         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1348         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1349             throw new RSRuntimeException("Invalid dimension for Ap");
1350         }
1351         if (incX <= 0) {
1352             throw new RSRuntimeException("Vector increments must be greater than 0");
1353         }
1354         int expectedXDim = 1 + (N - 1) * incX;
1355         if (X.getType().getX() != expectedXDim) {
1356             throw new RSRuntimeException("Incorrect vector dimensions for SPR");
1357         }
1358 
1359         return N;
1360     }
1361 
validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)1362     static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1363         validateUplo(Uplo);
1364         if (!A.getType().getElement().isCompatible(e) ||
1365             !X.getType().getElement().isCompatible(e) ||
1366             !Y.getType().getElement().isCompatible(e)) {
1367             throw new RSRuntimeException("Called BLAS with wrong Element type");
1368         }
1369 
1370         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1371             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1372         }
1373 
1374         int N = A.getType().getX();
1375 
1376         if (N != A.getType().getY()) {
1377             throw new RSRuntimeException("A must be a symmetric matrix");
1378         }
1379         if (incX <= 0 || incY <= 0) {
1380             throw new RSRuntimeException("Vector increments must be greater than 0");
1381         }
1382         int expectedXDim = 1 + (N - 1) * incX;
1383         int expectedYDim = 1 + (N - 1) * incY;
1384         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1385             throw new RSRuntimeException("Incorrect vector dimensions for SYR");
1386         }
1387         return N;
1388 
1389     }
validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1390     static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1391         validateUplo(Uplo);
1392         if (!Ap.getType().getElement().isCompatible(e) ||
1393             !X.getType().getElement().isCompatible(e) ||
1394             !Y.getType().getElement().isCompatible(e)) {
1395             throw new RSRuntimeException("Called BLAS with wrong Element type");
1396         }
1397         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1398             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1399         }
1400 
1401         if (Ap.getType().getY() > 1) {
1402             throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1");
1403         }
1404 
1405         int N = (int)Math.sqrt((double)Ap.getType().getX() * 2);
1406         if (Ap.getType().getX() != ((N * (N+1)) / 2)) {
1407             throw new RSRuntimeException("Invalid dimension for Ap");
1408         }
1409         if (incX <= 0 || incY <= 0) {
1410             throw new RSRuntimeException("Vector increments must be greater than 0");
1411         }
1412         int expectedXDim = 1 + (N - 1) * incX;
1413         int expectedYDim = 1 + (N - 1) * incY;
1414         if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) {
1415             throw new RSRuntimeException("Incorrect vector dimensions for SPR2");
1416         }
1417 
1418         return N;
1419     }
1420 
1421     /**
1422      * SSYMV performs the matrix-vector operation
1423      * y := alpha*A*x + beta*y
1424      *
1425      * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html
1426      *
1427      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1428      * @param alpha The scalar alpha.
1429      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1430      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1431      * @param incX The increment for the elements of vector x, must be larger than zero.
1432      * @param beta The scalar beta.
1433      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1434      * @param incY The increment for the elements of vector y, must be larger than zero.
1435      */
SSYMV(@plo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1436     public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1437         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1438         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1439     }
1440 
1441     /**
1442      * SSBMV performs the matrix-vector operation
1443      * y := alpha*A*x + beta*y
1444      *
1445      * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html
1446      *
1447      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1448      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1449      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1450      *           for i in range(0, n):
1451      *              for j in range(i, min(i+k+1, n)):
1452      *                  b[i, j-i] = a[i, j]
1453      *
1454      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1455      * @param K The number of off-diagonals of the matrix A
1456      * @param alpha The scalar alpha.
1457      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1458      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1459      * @param incX The increment for the elements of vector x, must be larger than zero.
1460      * @param beta The scalar beta.
1461      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1462      * @param incY The increment for the elements of vector y, must be larger than zero.
1463      */
SSBMV(@plo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1464     public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) {
1465         // SBMV is the same as SYMV + K >= 0
1466         if (K < 0) {
1467             throw new RSRuntimeException("K must be greater than or equal to 0");
1468         }
1469         int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY);
1470         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1471     }
1472 
1473     /**
1474      * SSPMV performs the matrix-vector operation
1475      * y := alpha*A*x + beta*y
1476      *
1477      * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html
1478      *
1479      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1480      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1481      *       'a' to packed matrix 'b'.
1482      *           k = 0
1483      *           for i in range(0, n):
1484      *              for j in range(i, n):
1485      *                  b[k++] = a[i, j]
1486      *
1487      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1488      * @param alpha The scalar alpha.
1489      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1490      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1491      * @param incX The increment for the elements of vector x, must be larger than zero.
1492      * @param beta The scalar beta.
1493      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1494      * @param incY The increment for the elements of vector y, must be larger than zero.
1495      */
SSPMV(@plo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY)1496     public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) {
1497         int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY);
1498         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1499     }
1500 
1501     /**
1502      * SGER performs the rank 1 operation
1503      * A := alpha*x*y**T + A
1504      *
1505      * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html
1506      *
1507      * @param alpha The scalar alpha.
1508      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1509      * @param incX The increment for the elements of vector x, must be larger than zero.
1510      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1511      * @param incY The increment for the elements of vector y, must be larger than zero.
1512      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1513      */
SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1514     public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1515         int M = A.getType().getY();
1516         int N = A.getType().getX();
1517         validateGER(Element.F32(mRS), X, incX, Y, incY, A);
1518         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1519     }
1520 
1521     /**
1522      * SSYR performs the rank 1 operation
1523      * A := alpha*x*x**T + A
1524      *
1525      * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html
1526      *
1527      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1528      * @param alpha The scalar alpha.
1529      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1530      * @param incX The increment for the elements of vector x, must be larger than zero.
1531      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1532      */
SSYR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1533     public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1534         int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A);
1535         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1536     }
1537 
1538     /**
1539      * SSPR performs the rank 1 operation
1540      * A := alpha*x*x**T + A
1541      *
1542      * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html
1543      *
1544      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1545      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1546      *       'a' to packed matrix 'b'.
1547      *           k = 0
1548      *           for i in range(0, n):
1549      *              for j in range(i, n):
1550      *                  b[k++] = a[i, j]
1551      *
1552      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1553      * @param alpha The scalar alpha.
1554      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1555      * @param incX The increment for the elements of vector x, must be larger than zero.
1556      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1557      */
SSPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1558     public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1559         int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap);
1560         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1561     }
1562 
1563     /**
1564      * SSYR2 performs the symmetric rank 2 operation
1565      * A := alpha*x*y**T + alpha*y*x**T + A
1566      *
1567      * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html
1568      *
1569      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1570      * @param alpha The scalar alpha.
1571      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1572      * @param incX The increment for the elements of vector x, must be larger than zero.
1573      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1574      * @param incY The increment for the elements of vector y, must be larger than zero.
1575      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
1576      */
SSYR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1577     public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1578         int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A);
1579         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1580     }
1581 
1582     /**
1583      * SSPR2 performs the symmetric rank 2 operation
1584      * A := alpha*x*y**T + alpha*y*x**T + A
1585      *
1586      * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html
1587      *
1588      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1589      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1590      *       'a' to packed matrix 'b'.
1591      *           k = 0
1592      *           for i in range(0, n):
1593      *              for j in range(i, n):
1594      *                  b[k++] = a[i, j]
1595      *
1596      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1597      * @param alpha The scalar alpha.
1598      * @param X The input allocation contains vector x, supported elements type {@link Element#F32}.
1599      * @param incX The increment for the elements of vector x, must be larger than zero.
1600      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}.
1601      * @param incY The increment for the elements of vector y, must be larger than zero.
1602      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}.
1603      */
SSPR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1604     public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1605         int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap);
1606         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1607     }
1608 
1609     /**
1610      * DSYMV performs the matrix-vector operation
1611      * y := alpha*A*x + beta*y
1612      *
1613      * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html
1614      *
1615      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1616      * @param alpha The scalar alpha.
1617      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1618      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1619      * @param incX The increment for the elements of vector x, must be larger than zero.
1620      * @param beta The scalar beta.
1621      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1622      * @param incY The increment for the elements of vector y, must be larger than zero.
1623      */
DSYMV(@plo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1624     public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1625         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1626         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1627     }
1628 
1629     /**
1630      * DSBMV performs the matrix-vector operation
1631      * y := alpha*A*x + beta*y
1632      *
1633      * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html
1634      *
1635      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1636      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1637      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1638      *           for i in range(0, n):
1639      *              for j in range(i, min(i+k+1, n)):
1640      *                  b[i, j-i] = a[i, j]
1641      *
1642      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1643      * @param K The number of off-diagonals of the matrix A
1644      * @param alpha The scalar alpha.
1645      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1646      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1647      * @param incX The increment for the elements of vector x, must be larger than zero.
1648      * @param beta The scalar beta.
1649      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1650      * @param incY The increment for the elements of vector y, must be larger than zero.
1651      */
DSBMV(@plo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1652     public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) {
1653         // SBMV is the same as SYMV + K >= 0
1654         if (K < 0) {
1655             throw new RSRuntimeException("K must be greater than or equal to 0");
1656         }
1657         int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY);
1658         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1659     }
1660 
1661     /**
1662      * DSPMV performs the matrix-vector operation
1663      * y := alpha*A*x + beta*y
1664      *
1665      * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html
1666      *
1667      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1668      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1669      *       'a' to packed matrix 'b'.
1670      *           k = 0
1671      *           for i in range(0, n):
1672      *              for j in range(i, n):
1673      *                  b[k++] = a[i, j]
1674      *
1675      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1676      * @param alpha The scalar alpha.
1677      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1678      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1679      * @param incX The increment for the elements of vector x, must be larger than zero.
1680      * @param beta The scalar beta.
1681      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1682      * @param incY The increment for the elements of vector y, must be larger than zero.
1683      */
DSPMV(@plo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY)1684     public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) {
1685         int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY);
1686         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0);
1687     }
1688 
1689     /**
1690      * DGER performs the rank 1 operation
1691      * A := alpha*x*y**T + A
1692      *
1693      * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html
1694      *
1695      * @param alpha The scalar alpha.
1696      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1697      * @param incX The increment for the elements of vector x, must be larger than zero.
1698      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1699      * @param incY The increment for the elements of vector y, must be larger than zero.
1700      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1701      */
DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1702     public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1703         int M = A.getType().getY();
1704         int N = A.getType().getX();
1705         validateGER(Element.F64(mRS), X, incX, Y, incY, A);
1706         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0);
1707     }
1708 
1709     /**
1710      * DSYR performs the rank 1 operation
1711      * A := alpha*x*x**T + A
1712      *
1713      * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html
1714      *
1715      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1716      * @param alpha The scalar alpha.
1717      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1718      * @param incX The increment for the elements of vector x, must be larger than zero.
1719      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1720      */
DSYR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)1721     public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
1722         int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A);
1723         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1724     }
1725 
1726     /**
1727      * DSPR performs the rank 1 operation
1728      * A := alpha*x*x**T + A
1729      *
1730      * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html
1731      *
1732      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1733      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1734      *       'a' to packed matrix 'b'.
1735      *           k = 0
1736      *           for i in range(0, n):
1737      *              for j in range(i, n):
1738      *                  b[k++] = a[i, j]
1739      *
1740      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1741      * @param alpha The scalar alpha.
1742      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1743      * @param incX The increment for the elements of vector x, must be larger than zero.
1744      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1745      */
DSPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)1746     public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
1747         int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap);
1748         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0);
1749     }
1750 
1751     /**
1752      * DSYR2 performs the symmetric rank 2 operation
1753      * A := alpha*x*y**T + alpha*y*x**T + A
1754      *
1755      * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html
1756      *
1757      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1758      * @param alpha The scalar alpha.
1759      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1760      * @param incX The increment for the elements of vector x, must be larger than zero.
1761      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1762      * @param incY The increment for the elements of vector y, must be larger than zero.
1763      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
1764      */
DSYR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1765     public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1766         int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A);
1767         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0);
1768     }
1769 
1770     /**
1771      * DSPR2 performs the symmetric rank 2 operation
1772      * A := alpha*x*y**T + alpha*y*x**T + A
1773      *
1774      * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html
1775      *
1776      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1777      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1778      *       'a' to packed matrix 'b'.
1779      *           k = 0
1780      *           for i in range(0, n):
1781      *              for j in range(i, n):
1782      *                  b[k++] = a[i, j]
1783      *
1784      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1785      * @param alpha The scalar alpha.
1786      * @param X The input allocation contains vector x, supported elements type {@link Element#F64}.
1787      * @param incX The increment for the elements of vector x, must be larger than zero.
1788      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}.
1789      * @param incY The increment for the elements of vector y, must be larger than zero.
1790      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}.
1791      */
DSPR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1792     public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
1793         int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap);
1794         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0);
1795     }
1796 
1797 
1798     /**
1799      * Level 2, C and Z only
1800      */
1801 
validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1802     static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1803         if (!A.getType().getElement().isCompatible(e) ||
1804             !X.getType().getElement().isCompatible(e) ||
1805             !Y.getType().getElement().isCompatible(e)) {
1806             throw new RSRuntimeException("Called BLAS with wrong Element type");
1807         }
1808         if (X.getType().getY() > 1 || Y.getType().getY() > 1) {
1809             throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1");
1810         }
1811 
1812         int M = A.getType().getY();
1813         int N = A.getType().getX();
1814         if (incX <= 0 || incY <= 0) {
1815             throw new RSRuntimeException("Vector increments must be greater than 0");
1816         }
1817         int expectedXDim = 1 + (M - 1) * incX;
1818         if (X.getType().getX() != expectedXDim) {
1819             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1820         }
1821         int expectedYDim = 1 + (N - 1) * incY;
1822         if (Y.getType().getX() != expectedYDim) {
1823             throw new RSRuntimeException("Incorrect vector dimensions for GERU");
1824         }
1825 
1826     }
1827 
1828     /**
1829      * CHEMV performs the matrix-vector operation
1830      * y := alpha*A*x + beta*y
1831      *
1832      * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html
1833      *
1834      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1835      * @param alpha The scalar alpha.
1836      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1837      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1838      * @param incX The increment for the elements of vector x, must be larger than zero.
1839      * @param beta The scalar beta.
1840      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1841      * @param incY The increment for the elements of vector y, must be larger than zero.
1842      */
CHEMV(@plo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1843     public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
1844         // HEMV is the same as SYR2 validation-wise
1845         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1846         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1847     }
1848 
1849     /**
1850      * CHBMV performs the matrix-vector operation
1851      * y := alpha*A*x + beta*y
1852      *
1853      * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html
1854      *
1855      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
1856      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
1857      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
1858      *           for i in range(0, n):
1859      *              for j in range(i, min(i+k+1, n)):
1860      *                  b[i, j-i] = a[i, j]
1861      *
1862      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
1863      * @param K The number of off-diagonals of the matrix A
1864      * @param alpha The scalar alpha.
1865      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1866      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1867      * @param incX The increment for the elements of vector x, must be larger than zero.
1868      * @param beta The scalar beta.
1869      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1870      * @param incY The increment for the elements of vector y, must be larger than zero.
1871      */
CHBMV(@plo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1872     public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
1873         // HBMV is the same as SYR2 validation-wise
1874         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
1875         if (K < 0) {
1876             throw new RSRuntimeException("K must be 0 or greater for HBMV");
1877         }
1878         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1879     }
1880 
1881     /**
1882      * CHPMV performs the matrix-vector operation
1883      * y := alpha*A*x + beta*y
1884      *
1885      * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html
1886      *
1887      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1888      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1889      *       'a' to packed matrix 'b'.
1890      *           k = 0
1891      *           for i in range(0, n):
1892      *              for j in range(i, n):
1893      *                  b[k++] = a[i, j]
1894      *
1895      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
1896      * @param alpha The scalar alpha.
1897      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1898      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1899      * @param incX The increment for the elements of vector x, must be larger than zero.
1900      * @param beta The scalar beta.
1901      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1902      * @param incY The increment for the elements of vector y, must be larger than zero.
1903      */
CHPMV(@plo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1904     public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) {
1905         // HPMV is the same as SPR2
1906         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
1907         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
1908     }
1909 
1910     /**
1911      * CGERU performs the rank 1 operation
1912      * A := alpha*x*y**T + A
1913      *
1914      * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html
1915      *
1916      * @param alpha The scalar alpha.
1917      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1918      * @param incX The increment for the elements of vector x, must be larger than zero.
1919      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1920      * @param incY The increment for the elements of vector y, must be larger than zero.
1921      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1922      */
CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1923     public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1924         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1925         int M = A.getType().getY();
1926         int N = A.getType().getX();
1927         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1928     }
1929 
1930     /**
1931      * CGERC performs the rank 1 operation
1932      * A := alpha*x*y**H + A
1933      *
1934      * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html
1935      *
1936      * @param alpha The scalar alpha.
1937      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1938      * @param incX The increment for the elements of vector x, must be larger than zero.
1939      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
1940      * @param incY The increment for the elements of vector y, must be larger than zero.
1941      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1942      */
CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1943     public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
1944         // same as GERU
1945         validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A);
1946         int M = A.getType().getY();
1947         int N = A.getType().getX();
1948         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
1949     }
1950 
1951     /**
1952      * CHER performs the rank 1 operation
1953      * A := alpha*x*x**H + A
1954      *
1955      * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html
1956      *
1957      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
1958      * @param alpha The scalar alpha.
1959      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1960      * @param incX The increment for the elements of vector x, must be larger than zero.
1961      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1962      */
CHER(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1963     public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) {
1964         // same as SYR
1965         int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A);
1966         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
1967     }
1968 
1969     /**
1970      * CHPR performs the rank 1 operation
1971      * A := alpha*x*x**H + A
1972      *
1973      * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html
1974      *
1975      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
1976      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
1977      *       'a' to packed matrix 'b'.
1978      *           k = 0
1979      *           for i in range(0, n):
1980      *              for j in range(i, n):
1981      *                  b[k++] = a[i, j]
1982      *
1983      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
1984      * @param alpha The scalar alpha.
1985      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
1986      * @param incX The increment for the elements of vector x, must be larger than zero.
1987      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
1988      */
CHPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1989     public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) {
1990         // equivalent to SPR for validation
1991         int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap);
1992         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
1993     }
1994 
1995     /**
1996      * CHER2 performs the symmetric rank 2 operation
1997      * A := alpha*x*y**H + alpha*y*x**H + A
1998      *
1999      * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html
2000      *
2001      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2002      * @param alpha The scalar alpha.
2003      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2004      * @param incX The increment for the elements of vector x, must be larger than zero.
2005      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2006      * @param incY The increment for the elements of vector y, must be larger than zero.
2007      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2008      */
CHER2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2009     public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2010         // same as SYR2
2011         int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A);
2012         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2013     }
2014 
2015     /**
2016      * CHPR2 performs the symmetric rank 2 operation
2017      * A := alpha*x*y**H + alpha*y*x**H + A
2018      *
2019      * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html
2020      *
2021      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2022      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2023      *       'a' to packed matrix 'b'.
2024      *           k = 0
2025      *           for i in range(0, n):
2026      *              for j in range(i, n):
2027      *                  b[k++] = a[i, j]
2028      *
2029      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2030      * @param alpha The scalar alpha.
2031      * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}.
2032      * @param incX The increment for the elements of vector x, must be larger than zero.
2033      * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}.
2034      * @param incY The increment for the elements of vector y, must be larger than zero.
2035      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2036      */
CHPR2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2037     public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2038         // same as SPR2
2039         int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap);
2040         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2041     }
2042 
2043     /**
2044      * ZHEMV performs the matrix-vector operation
2045      * y := alpha*A*x + beta*y
2046      *
2047      * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html
2048      *
2049      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2050      * @param alpha The scalar alpha.
2051      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2052      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2053      * @param incX The increment for the elements of vector x, must be larger than zero.
2054      * @param beta The scalar beta.
2055      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2056      * @param incY The increment for the elements of vector y, must be larger than zero.
2057      */
ZHEMV(@plo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2058     public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2059         // HEMV is the same as SYR2 validation-wise
2060         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2061         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2062     }
2063 
2064     /**
2065      * ZHBMV performs the matrix-vector operation
2066      * y := alpha*A*x + beta*y
2067      *
2068      * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html
2069      *
2070      * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N),
2071      *       but only the region N*(K+1) will be referenced. The following subroutine can is an
2072      *       example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'.
2073      *           for i in range(0, n):
2074      *              for j in range(i, min(i+k+1, n)):
2075      *                  b[i, j-i] = a[i, j]
2076      *
2077      * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied.
2078      * @param K The number of off-diagonals of the matrix A
2079      * @param alpha The scalar alpha.
2080      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2081      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2082      * @param incX The increment for the elements of vector x, must be larger than zero.
2083      * @param beta The scalar beta.
2084      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2085      * @param incY The increment for the elements of vector y, must be larger than zero.
2086      */
ZHBMV(@plo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2087     public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2088         // HBMV is the same as SYR2 validation-wise
2089         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2090         if (K < 0) {
2091             throw new RSRuntimeException("K must be 0 or greater for HBMV");
2092         }
2093         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2094     }
2095 
2096     /**
2097      * ZHPMV performs the matrix-vector operation
2098      * y := alpha*A*x + beta*y
2099      *
2100      * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html
2101      *
2102      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2103      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2104      *       'a' to packed matrix 'b'.
2105      *           k = 0
2106      *           for i in range(0, n):
2107      *              for j in range(i, n):
2108      *                  b[k++] = a[i, j]
2109      *
2110      * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form.
2111      * @param alpha The scalar alpha.
2112      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2113      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2114      * @param incX The increment for the elements of vector x, must be larger than zero.
2115      * @param beta The scalar beta.
2116      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2117      * @param incY The increment for the elements of vector y, must be larger than zero.
2118      */
ZHPMV(@plo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2119     public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) {
2120         // HPMV is the same as SPR2
2121         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2122         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0);
2123     }
2124 
2125     /**
2126      * ZGERU performs the rank 1 operation
2127      * A := alpha*x*y**T + A
2128      *
2129      * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html
2130      *
2131      * @param alpha The scalar alpha.
2132      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2133      * @param incX The increment for the elements of vector x, must be larger than zero.
2134      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2135      * @param incY The increment for the elements of vector y, must be larger than zero.
2136      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2137      */
ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2138     public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2139         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2140         int M = A.getType().getY();
2141         int N = A.getType().getX();
2142         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2143     }
2144 
2145     /**
2146      * ZGERC performs the rank 1 operation
2147      * A := alpha*x*y**H + A
2148      *
2149      * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html
2150      *
2151      * @param alpha The scalar alpha.
2152      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2153      * @param incX The increment for the elements of vector x, must be larger than zero.
2154      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2155      * @param incY The increment for the elements of vector y, must be larger than zero.
2156      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2157      */
ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2158     public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2159         // same as GERU
2160         validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A);
2161         int M = A.getType().getY();
2162         int N = A.getType().getX();
2163         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2164     }
2165 
2166     /**
2167      * ZHER performs the rank 1 operation
2168      * A := alpha*x*x**H + A
2169      *
2170      * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html
2171      *
2172      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2173      * @param alpha The scalar alpha.
2174      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2175      * @param incX The increment for the elements of vector x, must be larger than zero.
2176      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2177      */
ZHER(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2178     public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) {
2179         // same as SYR
2180         int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A);
2181         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0);
2182     }
2183 
2184     /**
2185      * ZHPR performs the rank 1 operation
2186      * A := alpha*x*x**H + A
2187      *
2188      * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html
2189      *
2190      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2191      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2192      *       'a' to packed matrix 'b'.
2193      *           k = 0
2194      *           for i in range(0, n):
2195      *              for j in range(i, n):
2196      *                  b[k++] = a[i, j]
2197      *
2198      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2199      * @param alpha The scalar alpha.
2200      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2201      * @param incX The increment for the elements of vector x, must be larger than zero.
2202      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2203      */
ZHPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2204     public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) {
2205         // equivalent to SPR for validation
2206         int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap);
2207         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0);
2208     }
2209 
2210     /**
2211      * ZHER2 performs the symmetric rank 2 operation
2212      * A := alpha*x*y**H + alpha*y*x**H + A
2213      *
2214      * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html
2215      *
2216      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2217      * @param alpha The scalar alpha.
2218      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2219      * @param incX The increment for the elements of vector x, must be larger than zero.
2220      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2221      * @param incY The increment for the elements of vector y, must be larger than zero.
2222      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2223      */
ZHER2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2224     public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) {
2225         // same as SYR2
2226         int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A);
2227         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0);
2228     }
2229 
2230     /**
2231      * ZHPR2 performs the symmetric rank 2 operation
2232      * A := alpha*x*y**H + alpha*y*x**H + A
2233      *
2234      * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html
2235      *
2236      * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2,
2237      *       The following subroutine can is an example showing how to convert a UPPER trianglar matrix
2238      *       'a' to packed matrix 'b'.
2239      *           k = 0
2240      *           for i in range(0, n):
2241      *              for j in range(i, n):
2242      *                  b[k++] = a[i, j]
2243      *
2244      * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form.
2245      * @param alpha The scalar alpha.
2246      * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}.
2247      * @param incX The increment for the elements of vector x, must be larger than zero.
2248      * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}.
2249      * @param incY The increment for the elements of vector y, must be larger than zero.
2250      * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2251      */
ZHPR2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2252     public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) {
2253         // same as SPR2
2254         int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap);
2255         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0);
2256     }
2257 
2258 
2259     /**
2260      * Level 3 BLAS
2261      */
2262 
validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)2263     static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) {
2264         int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1;
2265         if ((A != null && !A.getType().getElement().isCompatible(e)) ||
2266             (B != null && !B.getType().getElement().isCompatible(e)) ||
2267             (C != null && !C.getType().getElement().isCompatible(e))) {
2268             throw new RSRuntimeException("Called BLAS with wrong Element type");
2269         }
2270         if (C == null) {
2271             //since matrix C is used to store the result, it cannot be null.
2272             throw new RSRuntimeException("Allocation C cannot be null");
2273         }
2274         cM = C.getType().getY();
2275         cN = C.getType().getX();
2276 
2277         if (Side == RIGHT) {
2278             if ((A == null && B != null) || (A != null && B == null)) {
2279                 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa");
2280             }
2281             if (B != null) {
2282                 bM = A.getType().getY();
2283                 bN = A.getType().getX();
2284             }
2285             if (A != null) {
2286                 aM = B.getType().getY();
2287                 aN = B.getType().getX();
2288             }
2289         } else {
2290             if (A != null) {
2291                 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) {
2292                     aN = A.getType().getY();
2293                     aM = A.getType().getX();
2294                 } else {
2295                     aM = A.getType().getY();
2296                     aN = A.getType().getX();
2297                 }
2298             }
2299             if (B != null) {
2300                 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) {
2301                     bN = B.getType().getY();
2302                     bM = B.getType().getX();
2303                 } else {
2304                     bM = B.getType().getY();
2305                     bN = B.getType().getX();
2306                 }
2307             }
2308         }
2309         if (A != null && B != null && C != null) {
2310             if (aN != bM || aM != cM || bN != cN) {
2311                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2312             }
2313         } else if (A != null && C != null) {
2314             // A and C only, for SYRK
2315             if (cM != cN) {
2316                 throw new RSRuntimeException("Matrix C is not symmetric");
2317             }
2318             if (aM != cM) {
2319                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2320             }
2321         } else if (A != null && B != null) {
2322             // A and B only
2323             if (aN != bM) {
2324                 throw new RSRuntimeException("Called BLAS with invalid dimensions");
2325             }
2326         }
2327 
2328     }
2329 
2330     /**
2331      * SGEMM performs one of the matrix-matrix operations
2332      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2333      *
2334      * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html
2335      *
2336      * @param TransA The type of transpose applied to matrix A.
2337      * @param TransB The type of transpose applied to matrix B.
2338      * @param alpha The scalar alpha.
2339      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2340      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2341      * @param beta The scalar beta.
2342      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2343      */
SGEMM(@ranspose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C)2344     public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A,
2345                       Allocation B, float beta, Allocation C) {
2346         validateTranspose(TransA);
2347         validateTranspose(TransB);
2348         validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C);
2349 
2350         int M = -1, N = -1, K = -1;
2351         if (TransA != NO_TRANSPOSE) {
2352             M = A.getType().getX();
2353             K = A.getType().getY();
2354         } else {
2355             M = A.getType().getY();
2356             K = A.getType().getX();
2357         }
2358         if (TransB != NO_TRANSPOSE) {
2359             N = B.getType().getY();
2360         } else {
2361             N = B.getType().getX();
2362         }
2363         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, A.getID(mRS), B.getID(mRS),
2364                                         beta, C.getID(mRS), 0, 0, 0, 0);
2365     }
2366 
2367     /**
2368      * DGEMM performs one of the matrix-matrix operations
2369      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T
2370      *
2371      * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html
2372      *
2373      * @param TransA The type of transpose applied to matrix A.
2374      * @param TransB The type of transpose applied to matrix B.
2375      * @param alpha The scalar alpha.
2376      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2377      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2378      * @param beta The scalar beta.
2379      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2380      */
DGEMM(@ranspose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C)2381     public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A,
2382                       Allocation B, double beta, Allocation C) {
2383         validateTranspose(TransA);
2384         validateTranspose(TransB);
2385         validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C);
2386         int M = -1, N = -1, K = -1;
2387         if (TransA != NO_TRANSPOSE) {
2388             M = A.getType().getX();
2389             K = A.getType().getY();
2390         } else {
2391             M = A.getType().getY();
2392             K = A.getType().getX();
2393         }
2394         if (TransB != NO_TRANSPOSE) {
2395             N = B.getType().getY();
2396         } else {
2397             N = B.getType().getX();
2398         }
2399         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha, A.getID(mRS), B.getID(mRS),
2400                                         beta, C.getID(mRS), 0, 0, 0, 0);
2401     }
2402 
2403     /**
2404      * CGEMM performs one of the matrix-matrix operations
2405      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
2406      *
2407      * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html
2408      *
2409      * @param TransA The type of transpose applied to matrix A.
2410      * @param TransB The type of transpose applied to matrix B.
2411      * @param alpha The scalar alpha.
2412      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2413      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2414      * @param beta The scalar beta.
2415      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2416      */
CGEMM(@ranspose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2417     public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A,
2418                       Allocation B, Float2 beta, Allocation C) {
2419         validateTranspose(TransA);
2420         validateTranspose(TransB);
2421         validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C);
2422         int M = -1, N = -1, K = -1;
2423         if (TransA != NO_TRANSPOSE) {
2424             M = A.getType().getX();
2425             K = A.getType().getY();
2426         } else {
2427             M = A.getType().getY();
2428             K = A.getType().getX();
2429         }
2430         if (TransB != NO_TRANSPOSE) {
2431             N = B.getType().getY();
2432         } else {
2433             N = B.getType().getX();
2434         }
2435         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2436                                          beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2437     }
2438 
2439     /**
2440      * ZGEMM performs one of the matrix-matrix operations
2441      * C := alpha*op(A)*op(B) + beta*C   where op(X) is one of op(X) = X  or  op(X) = X**T  or  op(X) = X**H
2442      *
2443      * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html
2444      *
2445      * @param TransA The type of transpose applied to matrix A.
2446      * @param TransB The type of transpose applied to matrix B.
2447      * @param alpha The scalar alpha.
2448      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2449      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2450      * @param beta The scalar beta.
2451      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2452      */
ZGEMM(@ranspose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2453     public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A,
2454                       Allocation B, Double2 beta, Allocation C) {
2455         validateTranspose(TransA);
2456         validateTranspose(TransB);
2457         validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C);
2458         int M = -1, N = -1, K = -1;
2459         if (TransA != NO_TRANSPOSE) {
2460             M = A.getType().getX();
2461             K = A.getType().getY();
2462         } else {
2463             M = A.getType().getY();
2464             K = A.getType().getX();
2465         }
2466         if (TransB != NO_TRANSPOSE) {
2467             N = B.getType().getY();
2468         } else {
2469             N = B.getType().getX();
2470         }
2471         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K,  alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2472                                    beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2473     }
2474 
2475     /**
2476      * SSYMM performs one of the matrix-matrix operations
2477      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2478      *
2479      * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html
2480      *
2481      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2482      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2483      * @param alpha The scalar alpha.
2484      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2485      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2486      * @param beta The scalar beta.
2487      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2488      */
SSYMM(@ide int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C)2489     public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A,
2490                       Allocation B, float beta, Allocation C) {
2491         validateSide(Side);
2492         validateUplo(Uplo);
2493         //For SYMM, Matrix A should be symmetric
2494         if (A.getType().getX() != A.getType().getY()) {
2495             throw new RSRuntimeException("Matrix A is not symmetric");
2496         }
2497         validateL3(Element.F32(mRS), 0, 0, Side, A, B, C);
2498         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2499                                         beta, C.getID(mRS), 0, 0, 0, 0);
2500     }
2501 
2502     /**
2503      * DSYMM performs one of the matrix-matrix operations
2504      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2505      *
2506      * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html
2507      *
2508      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2509      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2510      * @param alpha The scalar alpha.
2511      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2512      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2513      * @param beta The scalar beta.
2514      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2515      */
DSYMM(@ide int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C)2516     public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A,
2517                       Allocation B, double beta, Allocation C) {
2518         validateSide(Side);
2519         validateUplo(Uplo);
2520         if (A.getType().getX() != A.getType().getY()) {
2521             throw new RSRuntimeException("Matrix A is not symmetric");
2522         }
2523         validateL3(Element.F64(mRS), 0, 0, Side, A, B, C);
2524         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS),
2525                                         beta, C.getID(mRS), 0, 0, 0, 0);
2526     }
2527 
2528     /**
2529      * CSYMM performs one of the matrix-matrix operations
2530      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2531      *
2532      * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html
2533      *
2534      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2535      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2536      * @param alpha The scalar alpha.
2537      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2538      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2539      * @param beta The scalar beta.
2540      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2541      */
CSYMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2542     public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A,
2543                       Allocation B, Float2 beta, Allocation C) {
2544         validateSide(Side);
2545         validateUplo(Uplo);
2546         if (A.getType().getX() != A.getType().getY()) {
2547             throw new RSRuntimeException("Matrix A is not symmetric");
2548         }
2549         validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C);
2550         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2551                                          beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2552     }
2553 
2554     /**
2555      * ZSYMM performs one of the matrix-matrix operations
2556      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
2557      *
2558      * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html
2559      *
2560      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2561      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
2562      * @param alpha The scalar alpha.
2563      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2564      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2565      * @param beta The scalar beta.
2566      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2567      */
ZSYMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2568     public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A,
2569                       Allocation B, Double2 beta, Allocation C) {
2570         validateSide(Side);
2571         validateUplo(Uplo);
2572         if (A.getType().getX() != A.getType().getY()) {
2573             throw new RSRuntimeException("Matrix A is not symmetric");
2574         }
2575         validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C);
2576         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS),
2577                                    beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2578     }
2579 
2580     /**
2581      * SSYRK performs one of the symmetric rank k operations
2582      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2583      *
2584      * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html
2585      *
2586      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2587      * @param Trans The type of transpose applied to the operation.
2588      * @param alpha The scalar alpha.
2589      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2590      * @param beta The scalar beta.
2591      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2592      */
SSYRK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)2593     public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
2594         validateTranspose(Trans);
2595         validateUplo(Uplo);
2596         validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C);
2597         int K = -1;
2598         if (Trans != NO_TRANSPOSE) {
2599             K = A.getType().getY();
2600         } else {
2601             K = A.getType().getX();
2602         }
2603 
2604         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2605     }
2606 
2607     /**
2608      * DSYRK performs one of the symmetric rank k operations
2609      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2610      *
2611      * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html
2612      *
2613      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2614      * @param Trans The type of transpose applied to the operation.
2615      * @param alpha The scalar alpha.
2616      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2617      * @param beta The scalar beta.
2618      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2619      */
DSYRK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)2620     public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
2621         validateTranspose(Trans);
2622         validateUplo(Uplo);
2623         validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C);
2624         int K = -1;
2625         if (Trans != NO_TRANSPOSE) {
2626             K = A.getType().getY();
2627         } else {
2628             K = A.getType().getX();
2629         }
2630         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0);
2631     }
2632 
2633     /**
2634      * CSYRK performs one of the symmetric rank k operations
2635      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2636      *
2637      * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html
2638      *
2639      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2640      * @param Trans The type of transpose applied to the operation.
2641      * @param alpha The scalar alpha.
2642      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2643      * @param beta The scalar beta.
2644      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2645      */
CSYRK(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C)2646     public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) {
2647         validateTranspose(Trans);
2648         validateUplo(Uplo);
2649         validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C);
2650         int K = -1;
2651         if (Trans != NO_TRANSPOSE) {
2652             K = A.getType().getY();
2653         } else {
2654             K = A.getType().getX();
2655         }
2656         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
2657                                          C.getID(mRS), 0, 0, 0, 0);
2658     }
2659 
2660     /**
2661      * ZSYRK performs one of the symmetric rank k operations
2662      * C := alpha*A*A**T + beta*C   or   C := alpha*A**T*A + beta*C
2663      *
2664      * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html
2665      *
2666      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2667      * @param Trans The type of transpose applied to the operation.
2668      * @param alpha The scalar alpha.
2669      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2670      * @param beta The scalar beta.
2671      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2672      */
ZSYRK(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C)2673     public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) {
2674         validateTranspose(Trans);
2675         validateUplo(Uplo);
2676         validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C);
2677         int K = -1;
2678         if (Trans != NO_TRANSPOSE) {
2679             K = A.getType().getY();
2680         } else {
2681             K = A.getType().getX();
2682         }
2683         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y,
2684                                    C.getID(mRS), 0, 0, 0, 0);
2685     }
2686 
validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)2687     static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
2688         validateTranspose(Trans);
2689         if (!A.getType().getElement().isCompatible(e) ||
2690             !B.getType().getElement().isCompatible(e) ||
2691             !C.getType().getElement().isCompatible(e)) {
2692             throw new RSRuntimeException("Called BLAS with wrong Element type");
2693         }
2694         int Cdim = -1;
2695         // A is n x k if no transpose, k x n if transpose
2696         // C is n x n
2697         if (Trans == TRANSPOSE) {
2698             // check columns versus C
2699             Cdim = A.getType().getX();
2700         } else {
2701             // check rows versus C
2702             Cdim = A.getType().getY();
2703         }
2704         if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) {
2705             throw new RSRuntimeException("Invalid symmetric matrix in SYR2K");
2706         }
2707         // A dims == B dims
2708         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
2709             throw new RSRuntimeException("Invalid A and B in SYR2K");
2710         }
2711     }
2712 
2713     /**
2714      * SSYR2K performs one of the symmetric rank 2k operations
2715      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2716      *
2717      * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html
2718      *
2719      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2720      * @param Trans The type of transpose applied to the operation.
2721      * @param alpha The scalar alpha.
2722      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2723      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2724      * @param beta The scalar beta.
2725      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}.
2726      */
SSYR2K(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C)2727     public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) {
2728         validateUplo(Uplo);
2729         validateSYR2K(Element.F32(mRS), Trans, A, B, C);
2730         int K = -1;
2731         if (Trans != NO_TRANSPOSE) {
2732             K = A.getType().getY();
2733         } else {
2734             K = A.getType().getX();
2735         }
2736         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
2737     }
2738 
2739     /**
2740      * DSYR2K performs one of the symmetric rank 2k operations
2741      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2742      *
2743      * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html
2744      *
2745      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2746      * @param Trans The type of transpose applied to the operation.
2747      * @param alpha The scalar alpha.
2748      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2749      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2750      * @param beta The scalar beta.
2751      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}.
2752      */
DSYR2K(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C)2753     public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) {
2754         validateUplo(Uplo);
2755         validateSYR2K(Element.F64(mRS), Trans, A, B, C);
2756         int K = -1;
2757         if (Trans != NO_TRANSPOSE) {
2758             K = A.getType().getY();
2759         } else {
2760             K = A.getType().getX();
2761         }
2762         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0);
2763     }
2764 
2765     /**
2766      * CSYR2K performs one of the symmetric rank 2k operations
2767      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2768      *
2769      * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html
2770      *
2771      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2772      * @param Trans The type of transpose applied to the operation.
2773      * @param alpha The scalar alpha.
2774      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2775      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2776      * @param beta The scalar beta.
2777      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
2778      */
CSYR2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2779     public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
2780         validateUplo(Uplo);
2781         validateSYR2K(Element.F32_2(mRS), Trans, A, B, C);
2782         int K = -1;
2783         if (Trans != NO_TRANSPOSE) {
2784             K = A.getType().getY();
2785         } else {
2786             K = A.getType().getX();
2787         }
2788         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2789     }
2790 
2791     /**
2792      * ZSYR2K performs one of the symmetric rank 2k operations
2793      * C := alpha*A*B**T + alpha*B*A**T + beta*C   or   C := alpha*A**T*B + alpha*B**T*A + beta*C
2794      *
2795      * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html
2796      *
2797      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
2798      * @param Trans The type of transpose applied to the operation.
2799      * @param alpha The scalar alpha.
2800      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2801      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2802      * @param beta The scalar beta.
2803      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
2804      */
ZSYR2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2805     public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
2806         validateUplo(Uplo);
2807         validateSYR2K(Element.F64_2(mRS), Trans, A, B, C);
2808         int K = -1;
2809         if (Trans != NO_TRANSPOSE) {
2810             K = A.getType().getY();
2811         } else {
2812             K = A.getType().getX();
2813         }
2814         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
2815     }
2816 
validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)2817     static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
2818         validateSide(Side);
2819         validateTranspose(TransA);
2820         int aM = -1, aN = -1, bM = -1, bN = -1;
2821         if (!A.getType().getElement().isCompatible(e) ||
2822             !B.getType().getElement().isCompatible(e)) {
2823             throw new RSRuntimeException("Called BLAS with wrong Element type");
2824         }
2825 
2826         aM = A.getType().getY();
2827         aN = A.getType().getX();
2828         if (aM != aN) {
2829             throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A");
2830         }
2831 
2832         bM = B.getType().getY();
2833         bN = B.getType().getX();
2834         if (Side == LEFT) {
2835             if (aN != bM) {
2836                 throw new RSRuntimeException("Called TRMM with invalid matrices");
2837             }
2838         } else {
2839             if (bN != aM) {
2840                 throw new RSRuntimeException("Called TRMM with invalid matrices");
2841             }
2842         }
2843     }
2844 
2845     /**
2846      * STRMM performs one of the matrix-matrix operations
2847      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2848      * op(A) is one of  op(A) = A  or  op(A) = A**T
2849      *
2850      * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html
2851      *
2852      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2853      * @param Uplo Specifies whether matrix A is upper or lower triangular.
2854      * @param TransA The type of transpose applied to matrix A.
2855      * @param Diag Specifies whether or not A is unit triangular.
2856      * @param alpha The scalar alpha.
2857      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2858      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2859      */
STRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)2860     public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2861         validateUplo(Uplo);
2862         validateDiag(Diag);
2863         validateTRMM(Element.F32(mRS), Side, TransA, A, B);
2864         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2865                                         alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0);
2866     }
2867 
2868     /**
2869      * DTRMM performs one of the matrix-matrix operations
2870      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2871      * op(A) is one of  op(A) = A  or  op(A) = A**T
2872      *
2873      * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html
2874      *
2875      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2876      * @param Uplo Specifies whether matrix A is upper or lower triangular.
2877      * @param TransA The type of transpose applied to matrix A.
2878      * @param Diag Specifies whether or not A is unit triangular.
2879      * @param alpha The scalar alpha.
2880      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
2881      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
2882      */
DTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)2883     public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
2884         validateUplo(Uplo);
2885         validateDiag(Diag);
2886         validateTRMM(Element.F64(mRS), Side, TransA, A, B);
2887         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2888                                         alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
2889     }
2890 
2891     /**
2892      * CTRMM performs one of the matrix-matrix operations
2893      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2894      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
2895      *
2896      * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html
2897      *
2898      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2899      * @param Uplo Specifies whether matrix A is upper or lower triangular.
2900      * @param TransA The type of transpose applied to matrix A.
2901      * @param Diag Specifies whether or not A is unit triangular.
2902      * @param alpha The scalar alpha.
2903      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
2904      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
2905      */
CTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)2906     public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
2907         validateUplo(Uplo);
2908         validateDiag(Diag);
2909         validateTRMM(Element.F32_2(mRS), Side, TransA, A, B);
2910         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2911                                          alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2912     }
2913 
2914     /**
2915      * ZTRMM performs one of the matrix-matrix operations
2916      * B := alpha*op(A)*B   or   B := alpha*B*op(A)
2917      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
2918      *
2919      * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html
2920      *
2921      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2922      * @param Uplo Specifies whether matrix A is upper or lower triangular.
2923      * @param TransA The type of transpose applied to matrix A.
2924      * @param Diag Specifies whether or not A is unit triangular.
2925      * @param alpha The scalar alpha.
2926      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
2927      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
2928      */
ZTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)2929     public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
2930         validateUplo(Uplo);
2931         validateDiag(Diag);
2932         validateTRMM(Element.F64_2(mRS), Side, TransA, A, B);
2933         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2934                                    alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
2935     }
2936 
validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)2937     static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) {
2938         int adim = -1, bM = -1, bN = -1;
2939         validateSide(Side);
2940         validateTranspose(TransA);
2941         if (!A.getType().getElement().isCompatible(e) ||
2942             !B.getType().getElement().isCompatible(e)) {
2943             throw new RSRuntimeException("Called BLAS with wrong Element type");
2944         }
2945         adim = A.getType().getX();
2946         if (adim != A.getType().getY()) {
2947             // this may be unnecessary, the restriction could potentially be relaxed
2948             // A needs to contain at least that symmetric matrix but could theoretically be larger
2949             // for now we assume adapters are sufficient, will reevaluate in the future
2950             throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A");
2951         }
2952         bM = B.getType().getY();
2953         bN = B.getType().getX();
2954         if (Side == LEFT) {
2955             // A is M*M
2956             if (adim != bM) {
2957                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2958             }
2959         } else {
2960             // A is N*N
2961             if (adim != bN) {
2962                 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions");
2963             }
2964         }
2965     }
2966 
2967     /**
2968      * STRSM solves one of the matrix equations
2969      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
2970      * op(A) is one of  op(A) = A  or  op(A) = A**T
2971      *
2972      * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html
2973      *
2974      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2975      * @param Uplo Specifies whether matrix A is upper or lower triangular.
2976      * @param TransA The type of transpose applied to matrix A.
2977      * @param Diag Specifies whether or not A is unit triangular.
2978      * @param alpha The scalar alpha.
2979      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}.
2980      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}.
2981      */
STRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)2982     public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) {
2983         validateUplo(Uplo);
2984         validateDiag(Diag);
2985         validateTRSM(Element.F32(mRS), Side, TransA, A, B);
2986         mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
2987                                         alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
2988     }
2989 
2990     /**
2991      * DTRSM solves one of the matrix equations
2992      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
2993      * op(A) is one of  op(A) = A  or  op(A) = A**T
2994      *
2995      * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html
2996      *
2997      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
2998      * @param Uplo Specifies whether matrix A is upper or lower triangular.
2999      * @param TransA The type of transpose applied to matrix A.
3000      * @param Diag Specifies whether or not A is unit triangular.
3001      * @param alpha The scalar alpha.
3002      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}.
3003      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}.
3004      */
DTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3005     public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) {
3006         validateUplo(Uplo);
3007         validateDiag(Diag);
3008         validateTRSM(Element.F64(mRS), Side, TransA, A, B);
3009         mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3010                                         alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0);
3011     }
3012 
3013     /**
3014      * CTRSM solves one of the matrix equations
3015      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3016      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3017      *
3018      * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html
3019      *
3020      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3021      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3022      * @param TransA The type of transpose applied to matrix A.
3023      * @param Diag Specifies whether or not A is unit triangular.
3024      * @param alpha The scalar alpha.
3025      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3026      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3027      */
CTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3028     public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) {
3029         validateUplo(Uplo);
3030         validateDiag(Diag);
3031         validateTRSM(Element.F32_2(mRS), Side, TransA, A, B);
3032         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3033                                          alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3034     }
3035 
3036     /**
3037      * ZTRSM solves one of the matrix equations
3038      * op(A)*X := alpha*B   or   X*op(A) := alpha*B
3039      * op(A) is one of  op(A) = A  or  op(A) = A**T  or  op(A) = A**H
3040      *
3041      * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html
3042      *
3043      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3044      * @param Uplo Specifies whether matrix A is upper or lower triangular.
3045      * @param TransA The type of transpose applied to matrix A.
3046      * @param Diag Specifies whether or not A is unit triangular.
3047      * @param alpha The scalar alpha.
3048      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3049      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3050      */
ZTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3051     public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) {
3052         validateUplo(Uplo);
3053         validateDiag(Diag);
3054         validateTRSM(Element.F64_2(mRS), Side, TransA, A, B);
3055         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0,
3056                                    alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0);
3057     }
3058 
validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C)3059     static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) {
3060         validateSide(Side);
3061 
3062         if (!A.getType().getElement().isCompatible(e) ||
3063             !B.getType().getElement().isCompatible(e) ||
3064             !C.getType().getElement().isCompatible(e)) {
3065             throw new RSRuntimeException("Called BLAS with wrong Element type");
3066         }
3067 
3068         // A must be square; can potentially be relaxed similar to TRSM
3069         int adim = A.getType().getX();
3070         if (adim != A.getType().getY()) {
3071             throw new RSRuntimeException("Called HEMM with non-square A");
3072         }
3073         if ((Side == LEFT && adim != B.getType().getY()) ||
3074             (Side == RIGHT && adim != B.getType().getX())) {
3075             throw new RSRuntimeException("Called HEMM with invalid B");
3076         }
3077         if (B.getType().getX() != C.getType().getX() ||
3078             B.getType().getY() != C.getType().getY()) {
3079             throw new RSRuntimeException("Called HEMM with mismatched B and C");
3080         }
3081     }
3082 
3083     /**
3084      * CHEMM performs one of the matrix-matrix operations
3085      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3086      *
3087      * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html
3088      *
3089      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3090      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3091      * @param alpha The scalar alpha.
3092      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3093      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3094      * @param beta The scalar beta.
3095      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3096      */
CHEMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3097     public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) {
3098         validateUplo(Uplo);
3099         validateHEMM(Element.F32_2(mRS), Side, A, B, C);
3100         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3101                                          alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
3102     }
3103 
3104     /**
3105      * ZHEMM performs one of the matrix-matrix operations
3106      * C := alpha*A*B + beta*C   or   C := alpha*B*A + beta*C
3107      *
3108      * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html
3109      *
3110      * @param Side Specifies whether the symmetric matrix A appears on the left or right.
3111      * @param Uplo Specifies whether the upper or lower triangular part is to be referenced.
3112      * @param alpha The scalar alpha.
3113      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3114      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3115      * @param beta The scalar beta.
3116      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3117      */
ZHEMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3118     public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) {
3119         validateUplo(Uplo);
3120         validateHEMM(Element.F64_2(mRS), Side, A, B, C);
3121         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0,
3122                                    alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0);
3123     }
3124 
validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C)3125     static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) {
3126         if (!A.getType().getElement().isCompatible(e) ||
3127             !C.getType().getElement().isCompatible(e)) {
3128             throw new RSRuntimeException("Called BLAS with wrong Element type");
3129         }
3130         validateConjTranspose(Trans);
3131         int cdim = C.getType().getX();
3132         if (cdim != C.getType().getY()) {
3133             throw new RSRuntimeException("Called HERK with non-square C");
3134         }
3135         if (Trans == NO_TRANSPOSE) {
3136             if (cdim != A.getType().getY()) {
3137                 throw new RSRuntimeException("Called HERK with invalid A");
3138             }
3139         } else {
3140             if (cdim != A.getType().getX()) {
3141                 throw new RSRuntimeException("Called HERK with invalid A");
3142             }
3143         }
3144     }
3145 
3146     /**
3147      * CHERK performs one of the hermitian rank k operations
3148      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3149      *
3150      * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html
3151      *
3152      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3153      * @param Trans The type of transpose applied to the operation.
3154      * @param alpha The scalar alpha.
3155      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3156      * @param beta The scalar beta.
3157      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3158      */
CHERK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3159     public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) {
3160         validateUplo(Uplo);
3161         validateHERK(Element.F32_2(mRS), Trans, A, C);
3162         int k = 0;
3163         if (Trans == CONJ_TRANSPOSE) {
3164             k = A.getType().getY();
3165         } else {
3166             k = A.getType().getX();
3167         }
3168         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3169                                          alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3170     }
3171 
3172     /**
3173      * ZHERK performs one of the hermitian rank k operations
3174      * C := alpha*A*A**H + beta*C   or   C := alpha*A**H*A + beta*C
3175      *
3176      * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html
3177      *
3178      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3179      * @param Trans The type of transpose applied to the operation.
3180      * @param alpha The scalar alpha.
3181      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3182      * @param beta The scalar beta.
3183      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3184      */
ZHERK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)3185     public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) {
3186         validateUplo(Uplo);
3187         validateHERK(Element.F64_2(mRS), Trans, A, C);
3188         int k = 0;
3189         if (Trans == CONJ_TRANSPOSE) {
3190             k = A.getType().getY();
3191         } else {
3192             k = A.getType().getX();
3193         }
3194         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k,
3195                                    alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0);
3196     }
3197 
validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)3198     static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) {
3199         if (!A.getType().getElement().isCompatible(e) ||
3200             !B.getType().getElement().isCompatible(e) ||
3201             !C.getType().getElement().isCompatible(e)) {
3202             throw new RSRuntimeException("Called BLAS with wrong Element type");
3203         }
3204         validateConjTranspose(Trans);
3205         int cdim = C.getType().getX();
3206         if (cdim != C.getType().getY()) {
3207             throw new RSRuntimeException("Called HER2K with non-square C");
3208         }
3209         if (Trans == NO_TRANSPOSE) {
3210             if (A.getType().getY() != cdim) {
3211                 throw new RSRuntimeException("Called HER2K with invalid matrices");
3212             }
3213         } else {
3214             if (A.getType().getX() != cdim) {
3215                 throw new RSRuntimeException("Called HER2K with invalid matrices");
3216             }
3217         }
3218         if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) {
3219             throw new RSRuntimeException("Called HER2K with invalid A and B matrices");
3220         }
3221     }
3222 
3223     /**
3224      * CHER2K performs one of the hermitian rank 2k operations
3225      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3226      *
3227      * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html
3228      *
3229      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3230      * @param Trans The type of transpose applied to the operation.
3231      * @param alpha The scalar alpha.
3232      * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}.
3233      * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}.
3234      * @param beta The scalar beta.
3235      * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}.
3236      */
CHER2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C)3237     public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) {
3238         validateUplo(Uplo);
3239         validateHER2K(Element.F32_2(mRS), Trans, A, B, C);
3240         int k = 0;
3241         if (Trans == NO_TRANSPOSE) {
3242             k = A.getType().getX();
3243         } else {
3244             k = A.getType().getY();
3245         }
3246         mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3247                                          A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3248     }
3249 
3250     /**
3251      * ZHER2K performs one of the hermitian rank 2k operations
3252      * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C   or   C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C
3253      *
3254      * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html
3255      *
3256      * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced.
3257      * @param Trans The type of transpose applied to the operation.
3258      * @param alpha The scalar alpha.
3259      * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}.
3260      * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}.
3261      * @param beta The scalar beta.
3262      * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}.
3263      */
ZHER2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C)3264     public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) {
3265         validateUplo(Uplo);
3266         validateHER2K(Element.F64_2(mRS), Trans, A, B, C);
3267         int k = 0;
3268         if (Trans == NO_TRANSPOSE) {
3269             k = A.getType().getX();
3270         } else {
3271             k = A.getType().getY();
3272         }
3273         mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y,
3274                                    A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0);
3275     }
3276 
3277 
3278     /**
3279      * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B)
3280      * Calculations are done in 1.10.21 fixed-point format for the final output,
3281      * just before there's a shift down to drop the fractional parts. The output
3282      * values are gated to 0 to 255 to fit in a byte, but the 10-bit format
3283      * gives some headroom to avoid wrapping around on small overflows.
3284      *
3285      * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}.
3286      * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255.
3287      * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}.
3288      * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255.
3289      * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}.
3290      * @param c_offset The offset for all values in matrix C.
3291      * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult.
3292      **/
BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult)3293     public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) {
3294         validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C);
3295 
3296         if (a_offset < 0 || a_offset > 255) {
3297             throw new RSRuntimeException("Invalid a_offset passed to BNNM");
3298         }
3299         if (b_offset < 0 || b_offset > 255) {
3300             throw new RSRuntimeException("Invalid b_offset passed to BNNM");
3301         }
3302         int M = -1, N = -1, K = -1;
3303         M = A.getType().getY();
3304         N = B.getType().getY();
3305         K = A.getType().getX();
3306 
3307 
3308         mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult);
3309 
3310     }
3311 
3312 }
3313