1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.renderscript; 18 19 import android.annotation.IntDef; 20 import java.lang.annotation.Retention; 21 import java.lang.annotation.RetentionPolicy; 22 23 /** 24 * 25 * ScriptIntrinsicBLAS class provides high performance RenderScript APIs to BLAS. 26 * 27 * The BLAS (Basic Linear Algebra Subprograms) are routines that provide standard 28 * building blocks for performing basic vector and matrix operations. 29 * 30 * For detailed description of BLAS, please refer to http://www.netlib.org/blas/ 31 * 32 * @deprecated Renderscript has been deprecated in API level 31. Please refer to the <a 33 * href="https://developer.android.com/guide/topics/renderscript/migration-guide">migration 34 * guide</a> for the proposed alternatives. 35 **/ 36 @Deprecated 37 public final class ScriptIntrinsicBLAS extends ScriptIntrinsic { 38 private Allocation mLUT; 39 ScriptIntrinsicBLAS(long id, RenderScript rs)40 private ScriptIntrinsicBLAS(long id, RenderScript rs) { 41 super(id, rs); 42 } 43 44 private static final int RsBlas_sdsdot = 1; 45 private static final int RsBlas_dsdot = 2; 46 private static final int RsBlas_sdot = 3; 47 private static final int RsBlas_ddot = 4; 48 private static final int RsBlas_cdotu_sub = 5; 49 private static final int RsBlas_cdotc_sub = 6; 50 private static final int RsBlas_zdotu_sub = 7; 51 private static final int RsBlas_zdotc_sub = 8; 52 private static final int RsBlas_snrm2 = 9; 53 private static final int RsBlas_sasum = 10; 54 private static final int RsBlas_dnrm2 = 11; 55 private static final int RsBlas_dasum = 12; 56 private static final int RsBlas_scnrm2 = 13; 57 private static final int RsBlas_scasum = 14; 58 private static final int RsBlas_dznrm2 = 15; 59 private static final int RsBlas_dzasum = 16; 60 private static final int RsBlas_isamax = 17; 61 private static final int RsBlas_idamax = 18; 62 private static final int RsBlas_icamax = 19; 63 private static final int RsBlas_izamax = 20; 64 private static final int RsBlas_sswap = 21; 65 private static final int RsBlas_scopy = 22; 66 private static final int RsBlas_saxpy = 23; 67 private static final int RsBlas_dswap = 24; 68 private static final int RsBlas_dcopy = 25; 69 private static final int RsBlas_daxpy = 26; 70 private static final int RsBlas_cswap = 27; 71 private static final int RsBlas_ccopy = 28; 72 private static final int RsBlas_caxpy = 29; 73 private static final int RsBlas_zswap = 30; 74 private static final int RsBlas_zcopy = 31; 75 private static final int RsBlas_zaxpy = 32; 76 private static final int RsBlas_srotg = 33; 77 private static final int RsBlas_srotmg = 34; 78 private static final int RsBlas_srot = 35; 79 private static final int RsBlas_srotm = 36; 80 private static final int RsBlas_drotg = 37; 81 private static final int RsBlas_drotmg = 38; 82 private static final int RsBlas_drot = 39; 83 private static final int RsBlas_drotm = 40; 84 private static final int RsBlas_sscal = 41; 85 private static final int RsBlas_dscal = 42; 86 private static final int RsBlas_cscal = 43; 87 private static final int RsBlas_zscal = 44; 88 private static final int RsBlas_csscal = 45; 89 private static final int RsBlas_zdscal = 46; 90 private static final int RsBlas_sgemv = 47; 91 private static final int RsBlas_sgbmv = 48; 92 private static final int RsBlas_strmv = 49; 93 private static final int RsBlas_stbmv = 50; 94 private static final int RsBlas_stpmv = 51; 95 private static final int RsBlas_strsv = 52; 96 private static final int RsBlas_stbsv = 53; 97 private static final int RsBlas_stpsv = 54; 98 private static final int RsBlas_dgemv = 55; 99 private static final int RsBlas_dgbmv = 56; 100 private static final int RsBlas_dtrmv = 57; 101 private static final int RsBlas_dtbmv = 58; 102 private static final int RsBlas_dtpmv = 59; 103 private static final int RsBlas_dtrsv = 60; 104 private static final int RsBlas_dtbsv = 61; 105 private static final int RsBlas_dtpsv = 62; 106 private static final int RsBlas_cgemv = 63; 107 private static final int RsBlas_cgbmv = 64; 108 private static final int RsBlas_ctrmv = 65; 109 private static final int RsBlas_ctbmv = 66; 110 private static final int RsBlas_ctpmv = 67; 111 private static final int RsBlas_ctrsv = 68; 112 private static final int RsBlas_ctbsv = 69; 113 private static final int RsBlas_ctpsv = 70; 114 private static final int RsBlas_zgemv = 71; 115 private static final int RsBlas_zgbmv = 72; 116 private static final int RsBlas_ztrmv = 73; 117 private static final int RsBlas_ztbmv = 74; 118 private static final int RsBlas_ztpmv = 75; 119 private static final int RsBlas_ztrsv = 76; 120 private static final int RsBlas_ztbsv = 77; 121 private static final int RsBlas_ztpsv = 78; 122 private static final int RsBlas_ssymv = 79; 123 private static final int RsBlas_ssbmv = 80; 124 private static final int RsBlas_sspmv = 81; 125 private static final int RsBlas_sger = 82; 126 private static final int RsBlas_ssyr = 83; 127 private static final int RsBlas_sspr = 84; 128 private static final int RsBlas_ssyr2 = 85; 129 private static final int RsBlas_sspr2 = 86; 130 private static final int RsBlas_dsymv = 87; 131 private static final int RsBlas_dsbmv = 88; 132 private static final int RsBlas_dspmv = 89; 133 private static final int RsBlas_dger = 90; 134 private static final int RsBlas_dsyr = 91; 135 private static final int RsBlas_dspr = 92; 136 private static final int RsBlas_dsyr2 = 93; 137 private static final int RsBlas_dspr2 = 94; 138 private static final int RsBlas_chemv = 95; 139 private static final int RsBlas_chbmv = 96; 140 private static final int RsBlas_chpmv = 97; 141 private static final int RsBlas_cgeru = 98; 142 private static final int RsBlas_cgerc = 99; 143 private static final int RsBlas_cher = 100; 144 private static final int RsBlas_chpr = 101; 145 private static final int RsBlas_cher2 = 102; 146 private static final int RsBlas_chpr2 = 103; 147 private static final int RsBlas_zhemv = 104; 148 private static final int RsBlas_zhbmv = 105; 149 private static final int RsBlas_zhpmv = 106; 150 private static final int RsBlas_zgeru = 107; 151 private static final int RsBlas_zgerc = 108; 152 private static final int RsBlas_zher = 109; 153 private static final int RsBlas_zhpr = 110; 154 private static final int RsBlas_zher2 = 111; 155 private static final int RsBlas_zhpr2 = 112; 156 private static final int RsBlas_sgemm = 113; 157 private static final int RsBlas_ssymm = 114; 158 private static final int RsBlas_ssyrk = 115; 159 private static final int RsBlas_ssyr2k = 116; 160 private static final int RsBlas_strmm = 117; 161 private static final int RsBlas_strsm = 118; 162 private static final int RsBlas_dgemm = 119; 163 private static final int RsBlas_dsymm = 120; 164 private static final int RsBlas_dsyrk = 121; 165 private static final int RsBlas_dsyr2k = 122; 166 private static final int RsBlas_dtrmm = 123; 167 private static final int RsBlas_dtrsm = 124; 168 private static final int RsBlas_cgemm = 125; 169 private static final int RsBlas_csymm = 126; 170 private static final int RsBlas_csyrk = 127; 171 private static final int RsBlas_csyr2k = 128; 172 private static final int RsBlas_ctrmm = 129; 173 private static final int RsBlas_ctrsm = 130; 174 private static final int RsBlas_zgemm = 131; 175 private static final int RsBlas_zsymm = 132; 176 private static final int RsBlas_zsyrk = 133; 177 private static final int RsBlas_zsyr2k = 134; 178 private static final int RsBlas_ztrmm = 135; 179 private static final int RsBlas_ztrsm = 136; 180 private static final int RsBlas_chemm = 137; 181 private static final int RsBlas_cherk = 138; 182 private static final int RsBlas_cher2k = 139; 183 private static final int RsBlas_zhemm = 140; 184 private static final int RsBlas_zherk = 141; 185 private static final int RsBlas_zher2k = 142; 186 187 // BLAS extensions start here 188 private static final int RsBlas_bnnm = 1000; 189 190 /** 191 * Create an intrinsic to access BLAS subroutines. 192 * 193 * @param rs The RenderScript context 194 * @return ScriptIntrinsicBLAS 195 */ create(RenderScript rs)196 public static ScriptIntrinsicBLAS create(RenderScript rs) { 197 long id = rs.nScriptIntrinsicCreate(13, Element.U32(rs).getID(rs)); 198 return new ScriptIntrinsicBLAS(id, rs); 199 } 200 201 /** 202 * @hide 203 */ 204 @IntDef({NO_TRANSPOSE, TRANSPOSE, CONJ_TRANSPOSE}) 205 @Retention(RetentionPolicy.SOURCE) 206 public @interface Transpose {} 207 208 /** 209 * @hide 210 */ 211 @IntDef({UPPER, LOWER}) 212 @Retention(RetentionPolicy.SOURCE) 213 public @interface Uplo {} 214 215 /** 216 * @hide 217 */ 218 @IntDef({NON_UNIT, UNIT}) 219 @Retention(RetentionPolicy.SOURCE) 220 public @interface Diag {} 221 222 /** 223 * @hide 224 */ 225 @IntDef({LEFT, RIGHT}) 226 @Retention(RetentionPolicy.SOURCE) 227 public @interface Side {} 228 229 public static final int NO_TRANSPOSE = 111; 230 public static final int TRANSPOSE = 112; 231 public static final int CONJ_TRANSPOSE = 113; 232 233 public static final int UPPER = 121; 234 public static final int LOWER = 122; 235 236 public static final int NON_UNIT = 131; 237 public static final int UNIT = 132; 238 239 public static final int LEFT = 141; 240 public static final int RIGHT = 142; 241 validateSide(@ide int Side)242 static void validateSide(@Side int Side) { 243 if (Side != LEFT && Side != RIGHT) { 244 throw new RSRuntimeException("Invalid side passed to BLAS"); 245 } 246 } 247 validateTranspose(@ranspose int Trans)248 static void validateTranspose(@Transpose int Trans) { 249 if (Trans != NO_TRANSPOSE && Trans != TRANSPOSE && 250 Trans != CONJ_TRANSPOSE) { 251 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 252 } 253 } 254 validateConjTranspose(@ranspose int Trans)255 static void validateConjTranspose(@Transpose int Trans) { 256 if (Trans != NO_TRANSPOSE && 257 Trans != CONJ_TRANSPOSE) { 258 throw new RSRuntimeException("Invalid transpose passed to BLAS"); 259 } 260 } 261 validateDiag(@iag int Diag)262 static void validateDiag(@Diag int Diag) { 263 if (Diag != NON_UNIT && Diag != UNIT) { 264 throw new RSRuntimeException("Invalid diag passed to BLAS"); 265 } 266 } 267 validateUplo(@plo int Uplo)268 static void validateUplo(@Uplo int Uplo) { 269 if (Uplo != UPPER && Uplo != LOWER) { 270 throw new RSRuntimeException("Invalid uplo passed to BLAS"); 271 } 272 } 273 274 275 /** 276 * Level 2 BLAS 277 */ 278 validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY)279 static void validateGEMV(Element e, int TransA, Allocation A, Allocation X, int incX, Allocation Y, int incY) { 280 validateTranspose(TransA); 281 int M = A.getType().getY(); 282 int N = A.getType().getX(); 283 if (!A.getType().getElement().isCompatible(e) || 284 !X.getType().getElement().isCompatible(e) || 285 !Y.getType().getElement().isCompatible(e)) { 286 throw new RSRuntimeException("Called BLAS with wrong Element type"); 287 } 288 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 289 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 290 } 291 292 if (incX <= 0 || incY <= 0) { 293 throw new RSRuntimeException("Vector increments must be greater than 0"); 294 } 295 int expectedXDim = -1, expectedYDim = -1; 296 if (TransA == NO_TRANSPOSE) { 297 expectedXDim = 1 + (N - 1) * incX; 298 expectedYDim = 1 + (M - 1) * incY; 299 } else { 300 expectedXDim = 1 + (M - 1) * incX; 301 expectedYDim = 1 + (N - 1) * incY; 302 } 303 if (X.getType().getX() != expectedXDim || 304 Y.getType().getX() != expectedYDim) { 305 throw new RSRuntimeException("Incorrect vector dimensions for GEMV"); 306 } 307 } 308 309 /** 310 * SGEMV performs one of the matrix-vector operations 311 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 312 * 313 * Details: http://www.netlib.org/lapack/explore-html/db/d58/sgemv_8f.html 314 * 315 * @param TransA The type of transpose applied to matrix A. 316 * @param alpha The scalar alpha. 317 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 318 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 319 * @param incX The increment for the elements of vector x, must be larger than zero. 320 * @param beta The scalar beta. 321 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 322 * @param incY The increment for the elements of vector y, must be larger than zero. 323 */ SGEMV(@ranspose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)324 public void SGEMV(@Transpose int TransA, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 325 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 326 int M = A.getType().getY(); 327 int N = A.getType().getX(); 328 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 329 } 330 331 /** 332 * DGEMV performs one of the matrix-vector operations 333 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 334 * 335 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dgemv_8f.html 336 * 337 * @param TransA The type of transpose applied to matrix A. 338 * @param alpha The scalar alpha. 339 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 340 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 341 * @param incX The increment for the elements of vector x, must be larger than zero. 342 * @param beta The scalar beta. 343 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 344 * @param incY The increment for the elements of vector y, must be larger than zero. 345 */ DGEMV(@ranspose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)346 public void DGEMV(@Transpose int TransA, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 347 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 348 int M = A.getType().getY(); 349 int N = A.getType().getX(); 350 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 351 } 352 353 /** 354 * CGEMV performs one of the matrix-vector operations 355 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 356 * 357 * Details: http://www.netlib.org/lapack/explore-html/d4/d8a/cgemv_8f.html 358 * 359 * @param TransA The type of transpose applied to matrix A. 360 * @param alpha The scalar alpha. 361 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 362 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 363 * @param incX The increment for the elements of vector x, must be larger than zero. 364 * @param beta The scalar beta. 365 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 366 * @param incY The increment for the elements of vector y, must be larger than zero. 367 */ CGEMV(@ranspose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)368 public void CGEMV(@Transpose int TransA, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 369 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 370 int M = A.getType().getY(); 371 int N = A.getType().getX(); 372 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 373 } 374 375 /** 376 * ZGEMV performs one of the matrix-vector operations 377 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 378 * 379 * Details: http://www.netlib.org/lapack/explore-html/db/d40/zgemv_8f.html 380 * 381 * @param TransA The type of transpose applied to matrix A. 382 * @param alpha The scalar alpha. 383 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 384 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 385 * @param incX The increment for the elements of vector x, must be larger than zero. 386 * @param beta The scalar beta. 387 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 388 * @param incY The increment for the elements of vector y, must be larger than zero. 389 */ ZGEMV(@ranspose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)390 public void ZGEMV(@Transpose int TransA, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 391 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 392 int M = A.getType().getY(); 393 int N = A.getType().getX(); 394 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 395 } 396 397 /** 398 * SGBMV performs one of the matrix-vector operations 399 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 400 * 401 * Details: http://www.netlib.org/lapack/explore-html/d6/d46/sgbmv_8f.html 402 * 403 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 404 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 405 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 406 * for i in range(0, m): 407 * for j in range(max(0, i-kl), min(i+ku+1, n)): 408 * b[i, j-i+kl] = a[i, j] 409 * 410 * @param TransA The type of transpose applied to matrix A. 411 * @param KL The number of sub-diagonals of the matrix A. 412 * @param KU The number of super-diagonals of the matrix A. 413 * @param alpha The scalar alpha. 414 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32}. 415 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 416 * @param incX The increment for the elements of vector x, must be larger than zero. 417 * @param beta The scalar beta. 418 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 419 * @param incY The increment for the elements of vector y, must be larger than zero. 420 */ SGBMV(@ranspose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)421 public void SGBMV(@Transpose int TransA, int KL, int KU, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 422 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 423 validateGEMV(Element.F32(mRS), TransA, A, X, incX, Y, incY); 424 if (KL < 0 || KU < 0) { 425 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 426 } 427 int M = A.getType().getY(); 428 int N = A.getType().getX(); 429 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); 430 } 431 432 /** 433 * DGBMV performs one of the matrix-vector operations 434 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y 435 * 436 * Details: http://www.netlib.org/lapack/explore-html/d2/d3f/dgbmv_8f.html 437 * 438 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 439 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 440 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 441 * for i in range(0, m): 442 * for j in range(max(0, i-kl), min(i+ku+1, n)): 443 * b[i, j-i+kl] = a[i, j] 444 * 445 * @param TransA The type of transpose applied to matrix A. 446 * @param KL The number of sub-diagonals of the matrix A. 447 * @param KU The number of super-diagonals of the matrix A. 448 * @param alpha The scalar alpha. 449 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64}. 450 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 451 * @param incX The increment for the elements of vector x, must be larger than zero. 452 * @param beta The scalar beta. 453 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 454 * @param incY The increment for the elements of vector y, must be larger than zero. 455 */ DGBMV(@ranspose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)456 public void DGBMV(@Transpose int TransA, int KL, int KU, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 457 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 458 validateGEMV(Element.F64(mRS), TransA, A, X, incX, Y, incY); 459 if (KL < 0 || KU < 0) { 460 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 461 } 462 int M = A.getType().getY(); 463 int N = A.getType().getX(); 464 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, KL, KU); 465 } 466 467 /** 468 * CGBMV performs one of the matrix-vector operations 469 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 470 * 471 * Details: http://www.netlib.org/lapack/explore-html/d0/d75/cgbmv_8f.html 472 * 473 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 474 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 475 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 476 * for i in range(0, m): 477 * for j in range(max(0, i-kl), min(i+ku+1, n)): 478 * b[i, j-i+kl] = a[i, j] 479 * 480 * @param TransA The type of transpose applied to matrix A. 481 * @param KL The number of sub-diagonals of the matrix A. 482 * @param KU The number of super-diagonals of the matrix A. 483 * @param alpha The scalar alpha. 484 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F32_2}. 485 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 486 * @param incX The increment for the elements of vector x, must be larger than zero. 487 * @param beta The scalar beta. 488 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 489 * @param incY The increment for the elements of vector y, must be larger than zero. 490 */ CGBMV(@ranspose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)491 public void CGBMV(@Transpose int TransA, int KL, int KU, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 492 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 493 validateGEMV(Element.F32_2(mRS), TransA, A, X, incX, Y, incY); 494 if (KL < 0 || KU < 0) { 495 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 496 } 497 int M = A.getType().getY(); 498 int N = A.getType().getX(); 499 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); 500 } 501 502 /** 503 * ZGBMV performs one of the matrix-vector operations 504 * y := alpha*A*x + beta*y or y := alpha*A**T*x + beta*y or y := alpha*A**H*x + beta*y 505 * 506 * Details: http://www.netlib.org/lapack/explore-html/d9/d46/zgbmv_8f.html 507 * 508 * Note: For a M*N matrix, the input Allocation should also be of size M*N (dimY = M, dimX = N), 509 * but only the region M*(KL+KU+1) will be referenced. The following subroutine can is an 510 * example showing how to convert the original matrix 'a' to row-based band matrix 'b'. 511 * for i in range(0, m): 512 * for j in range(max(0, i-kl), min(i+ku+1, n)): 513 * b[i, j-i+kl] = a[i, j] 514 * 515 * @param TransA The type of transpose applied to matrix A. 516 * @param KL The number of sub-diagonals of the matrix A. 517 * @param KU The number of super-diagonals of the matrix A. 518 * @param alpha The scalar alpha. 519 * @param A The input allocation contains the band matrix A, supported elements type {@link Element#F64_2}. 520 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 521 * @param incX The increment for the elements of vector x, must be larger than zero. 522 * @param beta The scalar beta. 523 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 524 * @param incY The increment for the elements of vector y, must be larger than zero. 525 */ ZGBMV(@ranspose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)526 public void ZGBMV(@Transpose int TransA, int KL, int KU, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 527 // GBMV has the same validation requirements as GEMV + KL and KU >= 0 528 validateGEMV(Element.F64_2(mRS), TransA, A, X, incX, Y, incY); 529 if (KL < 0 || KU < 0) { 530 throw new RSRuntimeException("KL and KU must be greater than or equal to 0"); 531 } 532 int M = A.getType().getY(); 533 int N = A.getType().getX(); 534 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgbmv, TransA, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, KL, KU); 535 } 536 validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)537 static void validateTRMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 538 validateTranspose(TransA); 539 validateUplo(Uplo); 540 validateDiag(Diag); 541 int N = A.getType().getY(); 542 if (A.getType().getX() != N) { 543 throw new RSRuntimeException("A must be a square matrix for TRMV"); 544 } 545 if (!A.getType().getElement().isCompatible(e) || 546 !X.getType().getElement().isCompatible(e)) { 547 throw new RSRuntimeException("Called BLAS with wrong Element type"); 548 } 549 if (X.getType().getY() > 1) { 550 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 551 } 552 553 if (incX <= 0) { 554 throw new RSRuntimeException("Vector increments must be greater than 0"); 555 } 556 int expectedXDim = 1 + (N - 1) * incX; 557 if (X.getType().getX() != expectedXDim) { 558 throw new RSRuntimeException("Incorrect vector dimensions for TRMV"); 559 } 560 } 561 validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)562 static int validateTPMV(Element e, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 563 validateTranspose(TransA); 564 validateUplo(Uplo); 565 validateDiag(Diag); 566 if (!Ap.getType().getElement().isCompatible(e) || 567 !X.getType().getElement().isCompatible(e)) { 568 throw new RSRuntimeException("Called BLAS with wrong Element type"); 569 } 570 if (X.getType().getY() > 1) { 571 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 572 } 573 574 if (Ap.getType().getY() > 1) { 575 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 576 } 577 578 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 579 //is it really doing anything? 580 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 581 throw new RSRuntimeException("Invalid dimension for Ap"); 582 } 583 if (incX <= 0) { 584 throw new RSRuntimeException("Vector increments must be greater than 0"); 585 } 586 int expectedXDim = 1 + (N - 1) * incX; 587 if (X.getType().getX() != expectedXDim) { 588 throw new RSRuntimeException("Incorrect vector dimensions for TPMV"); 589 } 590 591 return N; 592 } 593 594 /** 595 * STRMV performs one of the matrix-vector operations 596 * x := A*x or x := A**T*x 597 * 598 * Details: http://www.netlib.org/lapack/explore-html/de/d45/strmv_8f.html 599 * 600 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 601 * @param TransA The type of transpose applied to matrix A. 602 * @param Diag Specifies whether or not A is unit triangular. 603 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 604 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 605 * @param incX The increment for the elements of vector x, must be larger than zero. 606 */ STRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)607 public void STRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 608 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 609 int N = A.getType().getY(); 610 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 611 } 612 613 /** 614 * DTRMV performs one of the matrix-vector operations 615 * x := A*x or x := A**T*x 616 * 617 * Details: http://www.netlib.org/lapack/explore-html/dc/d7e/dtrmv_8f.html 618 * 619 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 620 * @param TransA The type of transpose applied to matrix A. 621 * @param Diag Specifies whether or not A is unit triangular. 622 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 623 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 624 * @param incX The increment for the elements of vector x, must be larger than zero. 625 */ DTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)626 public void DTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 627 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 628 int N = A.getType().getY(); 629 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 630 } 631 632 /** 633 * CTRMV performs one of the matrix-vector operations 634 * x := A*x or x := A**T*x or x := A**H*x 635 * 636 * Details: http://www.netlib.org/lapack/explore-html/df/d78/ctrmv_8f.html 637 * 638 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 639 * @param TransA The type of transpose applied to matrix A. 640 * @param Diag Specifies whether or not A is unit triangular. 641 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 642 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 643 * @param incX The increment for the elements of vector x, must be larger than zero. 644 */ CTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)645 public void CTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 646 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 647 int N = A.getType().getY(); 648 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 649 } 650 651 /** 652 * ZTRMV performs one of the matrix-vector operations 653 * x := A*x or x := A**T*x or x := A**H*x 654 * 655 * Details: http://www.netlib.org/lapack/explore-html/d0/dd1/ztrmv_8f.html 656 * 657 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 658 * @param TransA The type of transpose applied to matrix A. 659 * @param Diag Specifies whether or not A is unit triangular. 660 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 661 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 662 * @param incX The increment for the elements of vector x, must be larger than zero. 663 */ ZTRMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)664 public void ZTRMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 665 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 666 int N = A.getType().getY(); 667 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 668 } 669 670 /** 671 * STBMV performs one of the matrix-vector operations 672 * x := A*x or x := A**T*x 673 * 674 * Details: http://www.netlib.org/lapack/explore-html/d6/d7d/stbmv_8f.html 675 * 676 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 677 * but only the region N*(K+1) will be referenced. The following subroutine can is an 678 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 679 * for i in range(0, n): 680 * for j in range(i, min(i+k+1, n)): 681 * b[i, j-i] = a[i, j] 682 * 683 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 684 * @param TransA The type of transpose applied to matrix A. 685 * @param Diag Specifies whether or not A is unit triangular. 686 * @param K The number of off-diagonals of the matrix A 687 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 688 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 689 * @param incX The increment for the elements of vector x, must be larger than zero. 690 */ STBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)691 public void STBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 692 // TBMV has the same requirements as TRMV + K >= 0 693 if (K < 0) { 694 throw new RSRuntimeException("K must be greater than or equal to 0"); 695 } 696 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 697 int N = A.getType().getY(); 698 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 699 } 700 701 /** 702 * DTBMV performs one of the matrix-vector operations 703 * x := A*x or x := A**T*x 704 * 705 * Details: http://www.netlib.org/lapack/explore-html/df/d29/dtbmv_8f.html 706 * 707 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 708 * but only the region N*(K+1) will be referenced. The following subroutine can is an 709 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 710 * for i in range(0, n): 711 * for j in range(i, min(i+k+1, n)): 712 * b[i, j-i] = a[i, j] 713 * 714 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 715 * @param TransA The type of transpose applied to matrix A. 716 * @param Diag Specifies whether or not A is unit triangular. 717 * @param K The number of off-diagonals of the matrix A 718 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 719 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 720 * @param incX The increment for the elements of vector x, must be larger than zero. 721 */ DTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)722 public void DTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 723 // TBMV has the same requirements as TRMV + K >= 0 724 if (K < 0) { 725 throw new RSRuntimeException("K must be greater than or equal to 0"); 726 } 727 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 728 int N = A.getType().getY(); 729 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 730 } 731 732 /** 733 * CTBMV performs one of the matrix-vector operations 734 * x := A*x or x := A**T*x or x := A**H*x 735 * 736 * Details: http://www.netlib.org/lapack/explore-html/d3/dcd/ctbmv_8f.html 737 * 738 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 739 * but only the region N*(K+1) will be referenced. The following subroutine can is an 740 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 741 * for i in range(0, n): 742 * for j in range(i, min(i+k+1, n)): 743 * b[i, j-i] = a[i, j] 744 * 745 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 746 * @param TransA The type of transpose applied to matrix A. 747 * @param Diag Specifies whether or not A is unit triangular. 748 * @param K The number of off-diagonals of the matrix A 749 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 750 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 751 * @param incX The increment for the elements of vector x, must be larger than zero. 752 */ CTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)753 public void CTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 754 // TBMV has the same requirements as TRMV + K >= 0 755 if (K < 0) { 756 throw new RSRuntimeException("K must be greater than or equal to 0"); 757 } 758 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 759 int N = A.getType().getY(); 760 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 761 } 762 763 /** 764 * ZTBMV performs one of the matrix-vector operations 765 * x := A*x or x := A**T*x or x := A**H*x 766 * 767 * Details: http://www.netlib.org/lapack/explore-html/d3/d39/ztbmv_8f.html 768 * 769 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 770 * but only the region N*(K+1) will be referenced. The following subroutine can is an 771 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 772 * for i in range(0, n): 773 * for j in range(i, min(i+k+1, n)): 774 * b[i, j-i] = a[i, j] 775 * 776 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 777 * @param TransA The type of transpose applied to matrix A. 778 * @param Diag Specifies whether or not A is unit triangular. 779 * @param K The number of off-diagonals of the matrix A 780 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 781 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 782 * @param incX The increment for the elements of vector x, must be larger than zero. 783 */ ZTBMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)784 public void ZTBMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 785 // TBMV has the same requirements as TRMV + K >= 0 786 if (K < 0) { 787 throw new RSRuntimeException("K must be greater than or equal to 0"); 788 } 789 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 790 int N = A.getType().getY(); 791 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbmv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 792 } 793 794 /** 795 * STPMV performs one of the matrix-vector operations 796 * x := A*x or x := A**T*x 797 * 798 * Details: http://www.netlib.org/lapack/explore-html/db/db1/stpmv_8f.html 799 * 800 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 801 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 802 * 'a' to packed matrix 'b'. 803 * k = 0 804 * for i in range(0, n): 805 * for j in range(i, n): 806 * b[k++] = a[i, j] 807 * 808 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 809 * @param TransA The type of transpose applied to matrix A. 810 * @param Diag Specifies whether or not A is unit triangular. 811 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 812 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 813 * @param incX The increment for the elements of vector x, must be larger than zero. 814 */ STPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)815 public void STPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 816 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 817 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 818 } 819 820 /** 821 * DTPMV performs one of the matrix-vector operations 822 * x := A*x or x := A**T*x 823 * 824 * Details: http://www.netlib.org/lapack/explore-html/dc/dcd/dtpmv_8f.html 825 * 826 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 827 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 828 * 'a' to packed matrix 'b'. 829 * k = 0 830 * for i in range(0, n): 831 * for j in range(i, n): 832 * b[k++] = a[i, j] 833 * 834 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 835 * @param TransA The type of transpose applied to matrix A. 836 * @param Diag Specifies whether or not A is unit triangular. 837 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 838 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 839 * @param incX The increment for the elements of vector x, must be larger than zero. 840 */ DTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)841 public void DTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 842 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 843 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 844 } 845 846 /** 847 * CTPMV performs one of the matrix-vector operations 848 * x := A*x or x := A**T*x or x := A**H*x 849 * 850 * Details: http://www.netlib.org/lapack/explore-html/d4/dbb/ctpmv_8f.html 851 * 852 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 853 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 854 * 'a' to packed matrix 'b'. 855 * k = 0 856 * for i in range(0, n): 857 * for j in range(i, n): 858 * b[k++] = a[i, j] 859 * 860 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 861 * @param TransA The type of transpose applied to matrix A. 862 * @param Diag Specifies whether or not A is unit triangular. 863 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 864 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 865 * @param incX The increment for the elements of vector x, must be larger than zero. 866 */ CTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)867 public void CTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 868 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 869 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 870 } 871 872 /** 873 * ZTPMV performs one of the matrix-vector operations 874 * x := A*x or x := A**T*x or x := A**H*x 875 * 876 * Details: http://www.netlib.org/lapack/explore-html/d2/d9e/ztpmv_8f.html 877 * 878 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 879 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 880 * 'a' to packed matrix 'b'. 881 * k = 0 882 * for i in range(0, n): 883 * for j in range(i, n): 884 * b[k++] = a[i, j] 885 * 886 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 887 * @param TransA The type of transpose applied to matrix A. 888 * @param Diag Specifies whether or not A is unit triangular. 889 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 890 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 891 * @param incX The increment for the elements of vector x, must be larger than zero. 892 */ ZTPMV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)893 public void ZTPMV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 894 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 895 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpmv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 896 } 897 898 /** 899 * STRSV solves one of the systems of equations 900 * A*x = b or A**T*x = b 901 * 902 * Details: http://www.netlib.org/lapack/explore-html/d0/d2a/strsv_8f.html 903 * 904 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 905 * @param TransA The type of transpose applied to matrix A. 906 * @param Diag Specifies whether or not A is unit triangular. 907 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 908 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 909 * @param incX The increment for the elements of vector x, must be larger than zero. 910 */ STRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)911 public void STRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 912 // TRSV is the same as TRMV 913 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 914 int N = A.getType().getY(); 915 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 916 917 } 918 919 /** 920 * DTRSV solves one of the systems of equations 921 * A*x = b or A**T*x = b 922 * 923 * Details: http://www.netlib.org/lapack/explore-html/d6/d96/dtrsv_8f.html 924 * 925 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 926 * @param TransA The type of transpose applied to matrix A. 927 * @param Diag Specifies whether or not A is unit triangular. 928 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 929 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 930 * @param incX The increment for the elements of vector x, must be larger than zero. 931 */ DTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)932 public void DTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 933 // TRSV is the same as TRMV 934 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 935 int N = A.getType().getY(); 936 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 937 938 } 939 940 /** 941 * CTRSV solves one of the systems of equations 942 * A*x = b or A**T*x = b or A**H*x = b 943 * 944 * Details: http://www.netlib.org/lapack/explore-html/d4/dc8/ctrsv_8f.html 945 * 946 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 947 * @param TransA The type of transpose applied to matrix A. 948 * @param Diag Specifies whether or not A is unit triangular. 949 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 950 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 951 * @param incX The increment for the elements of vector x, must be larger than zero. 952 */ CTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)953 public void CTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 954 // TRSV is the same as TRMV 955 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 956 int N = A.getType().getY(); 957 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 958 959 } 960 961 /** 962 * ZTRSV solves one of the systems of equations 963 * A*x = b or A**T*x = b or A**H*x = b 964 * 965 * Details: http://www.netlib.org/lapack/explore-html/d1/d2f/ztrsv_8f.html 966 * 967 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 968 * @param TransA The type of transpose applied to matrix A. 969 * @param Diag Specifies whether or not A is unit triangular. 970 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 971 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 972 * @param incX The increment for the elements of vector x, must be larger than zero. 973 */ ZTRSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX)974 public void ZTRSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation A, Allocation X, int incX) { 975 // TRSV is the same as TRMV 976 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 977 int N = A.getType().getY(); 978 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 979 980 } 981 982 /** 983 * STBSV solves one of the systems of equations 984 * A*x = b or A**T*x = b 985 * 986 * Details: http://www.netlib.org/lapack/explore-html/d0/d1f/stbsv_8f.html 987 * 988 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 989 * but only the region N*(K+1) will be referenced. The following subroutine can is an 990 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 991 * for i in range(0, n): 992 * for j in range(i, min(i+k+1, n)): 993 * b[i, j-i] = a[i, j] 994 * 995 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 996 * @param TransA The type of transpose applied to matrix A. 997 * @param Diag Specifies whether or not A is unit triangular. 998 * @param K The number of off-diagonals of the matrix A 999 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1000 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1001 * @param incX The increment for the elements of vector x, must be larger than zero. 1002 */ STBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1003 public void STBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1004 // TBSV is the same as TRMV + K >= 0 1005 validateTRMV(Element.F32(mRS), Uplo, TransA, Diag, A, X, incX); 1006 int N = A.getType().getY(); 1007 if (K < 0) { 1008 throw new RSRuntimeException("Number of diagonals must be positive"); 1009 } 1010 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1011 } 1012 1013 /** 1014 * DTBSV solves one of the systems of equations 1015 * A*x = b or A**T*x = b 1016 * 1017 * Details: http://www.netlib.org/lapack/explore-html/d4/dcf/dtbsv_8f.html 1018 * 1019 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1020 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1021 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1022 * for i in range(0, n): 1023 * for j in range(i, min(i+k+1, n)): 1024 * b[i, j-i] = a[i, j] 1025 * 1026 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1027 * @param TransA The type of transpose applied to matrix A. 1028 * @param Diag Specifies whether or not A is unit triangular. 1029 * @param K The number of off-diagonals of the matrix A 1030 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1031 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1032 * @param incX The increment for the elements of vector x, must be larger than zero. 1033 */ DTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1034 public void DTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1035 // TBSV is the same as TRMV + K >= 0 1036 validateTRMV(Element.F64(mRS), Uplo, TransA, Diag, A, X, incX); 1037 int N = A.getType().getY(); 1038 if (K < 0) { 1039 throw new RSRuntimeException("Number of diagonals must be positive"); 1040 } 1041 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, A.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1042 } 1043 1044 /** 1045 * CTBSV solves one of the systems of equations 1046 * A*x = b or A**T*x = b or A**H*x = b 1047 * 1048 * Details: http://www.netlib.org/lapack/explore-html/d9/d5f/ctbsv_8f.html 1049 * 1050 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1051 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1052 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1053 * for i in range(0, n): 1054 * for j in range(i, min(i+k+1, n)): 1055 * b[i, j-i] = a[i, j] 1056 * 1057 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1058 * @param TransA The type of transpose applied to matrix A. 1059 * @param Diag Specifies whether or not A is unit triangular. 1060 * @param K The number of off-diagonals of the matrix A 1061 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1062 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1063 * @param incX The increment for the elements of vector x, must be larger than zero. 1064 */ CTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1065 public void CTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1066 // TBSV is the same as TRMV + K >= 0 1067 validateTRMV(Element.F32_2(mRS), Uplo, TransA, Diag, A, X, incX); 1068 int N = A.getType().getY(); 1069 if (K < 0) { 1070 throw new RSRuntimeException("Number of diagonals must be positive"); 1071 } 1072 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1073 } 1074 1075 /** 1076 * ZTBSV solves one of the systems of equations 1077 * A*x = b or A**T*x = b or A**H*x = b 1078 * 1079 * Details: http://www.netlib.org/lapack/explore-html/d4/d5a/ztbsv_8f.html 1080 * 1081 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1082 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1083 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1084 * for i in range(0, n): 1085 * for j in range(i, min(i+k+1, n)): 1086 * b[i, j-i] = a[i, j] 1087 * 1088 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1089 * @param TransA The type of transpose applied to matrix A. 1090 * @param Diag Specifies whether or not A is unit triangular. 1091 * @param K The number of off-diagonals of the matrix A 1092 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 1093 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1094 * @param incX The increment for the elements of vector x, must be larger than zero. 1095 */ ZTBSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX)1096 public void ZTBSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, int K, Allocation A, Allocation X, int incX) { 1097 // TBSV is the same as TRMV + K >= 0 1098 validateTRMV(Element.F64_2(mRS), Uplo, TransA, Diag, A, X, incX); 1099 int N = A.getType().getY(); 1100 if (K < 0) { 1101 throw new RSRuntimeException("Number of diagonals must be positive"); 1102 } 1103 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztbsv, TransA, 0, 0, Uplo, Diag, 0, N, K, 0, 0, A.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1104 } 1105 1106 /** 1107 * STPSV solves one of the systems of equations 1108 * A*x = b or A**T*x = b 1109 * 1110 * Details: http://www.netlib.org/lapack/explore-html/d0/d7c/stpsv_8f.html 1111 * 1112 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1113 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1114 * 'a' to packed matrix 'b'. 1115 * k = 0 1116 * for i in range(0, n): 1117 * for j in range(i, n): 1118 * b[k++] = a[i, j] 1119 * 1120 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1121 * @param TransA The type of transpose applied to matrix A. 1122 * @param Diag Specifies whether or not A is unit triangular. 1123 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32}. 1124 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1125 * @param incX The increment for the elements of vector x, must be larger than zero. 1126 */ STPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1127 public void STPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1128 // TPSV is same as TPMV 1129 int N = validateTPMV(Element.F32(mRS), Uplo, TransA, Diag, Ap, X, incX); 1130 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_stpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1131 } 1132 1133 /** 1134 * DTPSV solves one of the systems of equations 1135 * A*x = b or A**T*x = b 1136 * 1137 * Details: http://www.netlib.org/lapack/explore-html/d9/d84/dtpsv_8f.html 1138 * 1139 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1140 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1141 * 'a' to packed matrix 'b'. 1142 * k = 0 1143 * for i in range(0, n): 1144 * for j in range(i, n): 1145 * b[k++] = a[i, j] 1146 * 1147 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1148 * @param TransA The type of transpose applied to matrix A. 1149 * @param Diag Specifies whether or not A is unit triangular. 1150 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64}. 1151 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1152 * @param incX The increment for the elements of vector x, must be larger than zero. 1153 */ DTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1154 public void DTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1155 // TPSV is same as TPMV 1156 int N = validateTPMV(Element.F64(mRS), Uplo, TransA, Diag, Ap, X, incX); 1157 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, incX, 0, 0, 0); 1158 } 1159 1160 /** 1161 * CTPSV solves one of the systems of equations 1162 * A*x = b or A**T*x = b or A**H*x = b 1163 * 1164 * Details: http://www.netlib.org/lapack/explore-html/d8/d56/ctpsv_8f.html 1165 * 1166 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1167 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1168 * 'a' to packed matrix 'b'. 1169 * k = 0 1170 * for i in range(0, n): 1171 * for j in range(i, n): 1172 * b[k++] = a[i, j] 1173 * 1174 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1175 * @param TransA The type of transpose applied to matrix A. 1176 * @param Diag Specifies whether or not A is unit triangular. 1177 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F32_2}. 1178 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1179 * @param incX The increment for the elements of vector x, must be larger than zero. 1180 */ CTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1181 public void CTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1182 // TPSV is same as TPMV 1183 int N = validateTPMV(Element.F32_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1184 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1185 } 1186 1187 /** 1188 * ZTPSV solves one of the systems of equations 1189 * A*x = b or A**T*x = b or A**H*x = b 1190 * 1191 * Details: http://www.netlib.org/lapack/explore-html/da/d57/ztpsv_8f.html 1192 * 1193 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1194 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1195 * 'a' to packed matrix 'b'. 1196 * k = 0 1197 * for i in range(0, n): 1198 * for j in range(i, n): 1199 * b[k++] = a[i, j] 1200 * 1201 * @param Uplo Specifies whether the matrix is an upper or lower triangular matrix. 1202 * @param TransA The type of transpose applied to matrix A. 1203 * @param Diag Specifies whether or not A is unit triangular. 1204 * @param Ap The input allocation contains packed matrix A, supported elements type {@link Element#F64_2}. 1205 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 1206 * @param incX The increment for the elements of vector x, must be larger than zero. 1207 */ ZTPSV(@plo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX)1208 public void ZTPSV(@Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Allocation Ap, Allocation X, int incX) { 1209 // TPSV is same as TPMV 1210 int N = validateTPMV(Element.F64_2(mRS), Uplo, TransA, Diag, Ap, X, incX); 1211 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztpsv, TransA, 0, 0, Uplo, Diag, 0, N, 0, 0, 0, Ap.getID(mRS), X.getID(mRS), 0, 0, 0, incX, 0, 0, 0); 1212 } 1213 1214 /** 1215 * Level 2, S and D only 1216 */ validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY)1217 static int validateSYMV(Element e, @Uplo int Uplo, Allocation A, Allocation X, Allocation Y, int incX, int incY) { 1218 validateUplo(Uplo); 1219 int N = A.getType().getY(); 1220 if (A.getType().getX() != N) { 1221 throw new RSRuntimeException("A must be a square matrix for SYMV"); 1222 } 1223 if (!A.getType().getElement().isCompatible(e) || 1224 !X.getType().getElement().isCompatible(e) || 1225 !Y.getType().getElement().isCompatible(e) ) { 1226 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1227 } 1228 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1229 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1230 } 1231 1232 if (incX <= 0 || incY <= 0) { 1233 throw new RSRuntimeException("Vector increments must be greater than 0"); 1234 } 1235 int expectedXDim = 1 + (N - 1) * incX; 1236 if (X.getType().getX() != expectedXDim) { 1237 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1238 } 1239 int expectedYDim = 1 + (N - 1) * incY; 1240 if (Y.getType().getX() != expectedYDim) { 1241 throw new RSRuntimeException("Incorrect vector dimensions for SYMV"); 1242 } 1243 return N; 1244 } validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY)1245 static int validateSPMV(Element e, @Uplo int Uplo, Allocation Ap, Allocation X, int incX, Allocation Y, int incY) { 1246 validateUplo(Uplo); 1247 if (!Ap.getType().getElement().isCompatible(e) || 1248 !X.getType().getElement().isCompatible(e) || 1249 !Y.getType().getElement().isCompatible(e)) { 1250 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1251 } 1252 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1253 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1254 } 1255 1256 if (Ap.getType().getY() > 1) { 1257 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1258 } 1259 1260 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1261 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1262 throw new RSRuntimeException("Invalid dimension for Ap"); 1263 } 1264 if (incX <= 0 || incY <= 0) { 1265 throw new RSRuntimeException("Vector increments must be greater than 0"); 1266 } 1267 int expectedXDim = 1 + (N - 1) * incX; 1268 if (X.getType().getX() != expectedXDim) { 1269 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1270 } 1271 int expectedYDim = 1 + (N - 1) * incY; 1272 if (Y.getType().getX() != expectedYDim) { 1273 throw new RSRuntimeException("Incorrect vector dimensions for SPMV"); 1274 } 1275 1276 return N; 1277 } validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1278 static void validateGER(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1279 if (!A.getType().getElement().isCompatible(e) || 1280 !X.getType().getElement().isCompatible(e) || 1281 !Y.getType().getElement().isCompatible(e) ) { 1282 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1283 } 1284 1285 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1286 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1287 } 1288 1289 int M = A.getType().getY(); 1290 int N = A.getType().getX(); 1291 1292 if (N < 1 || M < 1) { 1293 throw new RSRuntimeException("M and N must be 1 or greater for GER"); 1294 } 1295 if (incX <= 0 || incY <= 0) { 1296 throw new RSRuntimeException("Vector increments must be greater than 0"); 1297 } 1298 int expectedXDim = 1 + (M - 1) * incX; 1299 if (X.getType().getX() != expectedXDim) { 1300 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1301 } 1302 int expectedYDim = 1 + (N - 1) * incY; 1303 if (Y.getType().getX() != expectedYDim) { 1304 throw new RSRuntimeException("Incorrect vector dimensions for GER"); 1305 } 1306 1307 1308 } validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A)1309 static int validateSYR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation A) { 1310 validateUplo(Uplo); 1311 if (!A.getType().getElement().isCompatible(e) || 1312 !X.getType().getElement().isCompatible(e)) { 1313 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1314 } 1315 1316 int N = A.getType().getX(); 1317 1318 if (X.getType().getY() > 1) { 1319 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1320 } 1321 if (N != A.getType().getY()) { 1322 throw new RSRuntimeException("A must be a symmetric matrix"); 1323 } 1324 if (incX <= 0) { 1325 throw new RSRuntimeException("Vector increments must be greater than 0"); 1326 } 1327 int expectedXDim = 1 + (N - 1) * incX; 1328 if (X.getType().getX() != expectedXDim) { 1329 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1330 } 1331 return N; 1332 } validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap)1333 static int validateSPR(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Ap) { 1334 validateUplo(Uplo); 1335 if (!Ap.getType().getElement().isCompatible(e) || 1336 !X.getType().getElement().isCompatible(e)) { 1337 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1338 } 1339 if (X.getType().getY() > 1) { 1340 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1341 } 1342 1343 if (Ap.getType().getY() > 1) { 1344 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1345 } 1346 1347 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1348 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1349 throw new RSRuntimeException("Invalid dimension for Ap"); 1350 } 1351 if (incX <= 0) { 1352 throw new RSRuntimeException("Vector increments must be greater than 0"); 1353 } 1354 int expectedXDim = 1 + (N - 1) * incX; 1355 if (X.getType().getX() != expectedXDim) { 1356 throw new RSRuntimeException("Incorrect vector dimensions for SPR"); 1357 } 1358 1359 return N; 1360 } 1361 validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A)1362 static int validateSYR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1363 validateUplo(Uplo); 1364 if (!A.getType().getElement().isCompatible(e) || 1365 !X.getType().getElement().isCompatible(e) || 1366 !Y.getType().getElement().isCompatible(e)) { 1367 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1368 } 1369 1370 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1371 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1372 } 1373 1374 int N = A.getType().getX(); 1375 1376 if (N != A.getType().getY()) { 1377 throw new RSRuntimeException("A must be a symmetric matrix"); 1378 } 1379 if (incX <= 0 || incY <= 0) { 1380 throw new RSRuntimeException("Vector increments must be greater than 0"); 1381 } 1382 int expectedXDim = 1 + (N - 1) * incX; 1383 int expectedYDim = 1 + (N - 1) * incY; 1384 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1385 throw new RSRuntimeException("Incorrect vector dimensions for SYR"); 1386 } 1387 return N; 1388 1389 } validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1390 static int validateSPR2(Element e, @Uplo int Uplo, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1391 validateUplo(Uplo); 1392 if (!Ap.getType().getElement().isCompatible(e) || 1393 !X.getType().getElement().isCompatible(e) || 1394 !Y.getType().getElement().isCompatible(e)) { 1395 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1396 } 1397 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1398 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1399 } 1400 1401 if (Ap.getType().getY() > 1) { 1402 throw new RSRuntimeException("Ap must have a Y dimension of 0 or 1"); 1403 } 1404 1405 int N = (int)Math.sqrt((double)Ap.getType().getX() * 2); 1406 if (Ap.getType().getX() != ((N * (N+1)) / 2)) { 1407 throw new RSRuntimeException("Invalid dimension for Ap"); 1408 } 1409 if (incX <= 0 || incY <= 0) { 1410 throw new RSRuntimeException("Vector increments must be greater than 0"); 1411 } 1412 int expectedXDim = 1 + (N - 1) * incX; 1413 int expectedYDim = 1 + (N - 1) * incY; 1414 if (X.getType().getX() != expectedXDim || Y.getType().getX() != expectedYDim) { 1415 throw new RSRuntimeException("Incorrect vector dimensions for SPR2"); 1416 } 1417 1418 return N; 1419 } 1420 1421 /** 1422 * SSYMV performs the matrix-vector operation 1423 * y := alpha*A*x + beta*y 1424 * 1425 * Details: http://www.netlib.org/lapack/explore-html/d2/d94/ssymv_8f.html 1426 * 1427 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1428 * @param alpha The scalar alpha. 1429 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1430 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1431 * @param incX The increment for the elements of vector x, must be larger than zero. 1432 * @param beta The scalar beta. 1433 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1434 * @param incY The increment for the elements of vector y, must be larger than zero. 1435 */ SSYMV(@plo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1436 public void SSYMV(@Uplo int Uplo, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1437 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1438 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1439 } 1440 1441 /** 1442 * SSBMV performs the matrix-vector operation 1443 * y := alpha*A*x + beta*y 1444 * 1445 * Details: http://www.netlib.org/lapack/explore-html/d3/da1/ssbmv_8f.html 1446 * 1447 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1448 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1449 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1450 * for i in range(0, n): 1451 * for j in range(i, min(i+k+1, n)): 1452 * b[i, j-i] = a[i, j] 1453 * 1454 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1455 * @param K The number of off-diagonals of the matrix A 1456 * @param alpha The scalar alpha. 1457 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1458 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1459 * @param incX The increment for the elements of vector x, must be larger than zero. 1460 * @param beta The scalar beta. 1461 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1462 * @param incY The increment for the elements of vector y, must be larger than zero. 1463 */ SSBMV(@plo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY)1464 public void SSBMV(@Uplo int Uplo, int K, float alpha, Allocation A, Allocation X, int incX, float beta, Allocation Y, int incY) { 1465 // SBMV is the same as SYMV + K >= 0 1466 if (K < 0) { 1467 throw new RSRuntimeException("K must be greater than or equal to 0"); 1468 } 1469 int N = validateSYMV(Element.F32(mRS), Uplo, A, X, Y, incX, incY); 1470 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1471 } 1472 1473 /** 1474 * SSPMV performs the matrix-vector operation 1475 * y := alpha*A*x + beta*y 1476 * 1477 * Details: http://www.netlib.org/lapack/explore-html/d8/d68/sspmv_8f.html 1478 * 1479 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1480 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1481 * 'a' to packed matrix 'b'. 1482 * k = 0 1483 * for i in range(0, n): 1484 * for j in range(i, n): 1485 * b[k++] = a[i, j] 1486 * 1487 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1488 * @param alpha The scalar alpha. 1489 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1490 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1491 * @param incX The increment for the elements of vector x, must be larger than zero. 1492 * @param beta The scalar beta. 1493 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1494 * @param incY The increment for the elements of vector y, must be larger than zero. 1495 */ SSPMV(@plo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY)1496 public void SSPMV(@Uplo int Uplo, float alpha, Allocation Ap, Allocation X, int incX, float beta, Allocation Y, int incY) { 1497 int N = validateSPMV(Element.F32(mRS), Uplo, Ap, X, incX, Y, incY); 1498 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1499 } 1500 1501 /** 1502 * SGER performs the rank 1 operation 1503 * A := alpha*x*y**T + A 1504 * 1505 * Details: http://www.netlib.org/lapack/explore-html/db/d5c/sger_8f.html 1506 * 1507 * @param alpha The scalar alpha. 1508 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1509 * @param incX The increment for the elements of vector x, must be larger than zero. 1510 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1511 * @param incY The increment for the elements of vector y, must be larger than zero. 1512 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1513 */ SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1514 public void SGER(float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1515 int M = A.getType().getY(); 1516 int N = A.getType().getX(); 1517 validateGER(Element.F32(mRS), X, incX, Y, incY, A); 1518 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); 1519 } 1520 1521 /** 1522 * SSYR performs the rank 1 operation 1523 * A := alpha*x*x**T + A 1524 * 1525 * Details: http://www.netlib.org/lapack/explore-html/d6/dac/ssyr_8f.html 1526 * 1527 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1528 * @param alpha The scalar alpha. 1529 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1530 * @param incX The increment for the elements of vector x, must be larger than zero. 1531 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1532 */ SSYR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1533 public void SSYR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1534 int N = validateSYR(Element.F32(mRS), Uplo, X, incX, A); 1535 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1536 } 1537 1538 /** 1539 * SSPR performs the rank 1 operation 1540 * A := alpha*x*x**T + A 1541 * 1542 * Details: http://www.netlib.org/lapack/explore-html/d2/d9b/sspr_8f.html 1543 * 1544 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1545 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1546 * 'a' to packed matrix 'b'. 1547 * k = 0 1548 * for i in range(0, n): 1549 * for j in range(i, n): 1550 * b[k++] = a[i, j] 1551 * 1552 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1553 * @param alpha The scalar alpha. 1554 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1555 * @param incX The increment for the elements of vector x, must be larger than zero. 1556 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1557 */ SSPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1558 public void SSPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1559 int N = validateSPR(Element.F32(mRS), Uplo, X, incX, Ap); 1560 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1561 } 1562 1563 /** 1564 * SSYR2 performs the symmetric rank 2 operation 1565 * A := alpha*x*y**T + alpha*y*x**T + A 1566 * 1567 * Details: http://www.netlib.org/lapack/explore-html/db/d99/ssyr2_8f.html 1568 * 1569 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1570 * @param alpha The scalar alpha. 1571 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1572 * @param incX The increment for the elements of vector x, must be larger than zero. 1573 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1574 * @param incY The increment for the elements of vector y, must be larger than zero. 1575 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 1576 */ SSYR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1577 public void SSYR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1578 int N = validateSYR2(Element.F32(mRS), Uplo, X, incX, Y, incY, A); 1579 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); 1580 } 1581 1582 /** 1583 * SSPR2 performs the symmetric rank 2 operation 1584 * A := alpha*x*y**T + alpha*y*x**T + A 1585 * 1586 * Details: http://www.netlib.org/lapack/explore-html/db/d3e/sspr2_8f.html 1587 * 1588 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1589 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1590 * 'a' to packed matrix 'b'. 1591 * k = 0 1592 * for i in range(0, n): 1593 * for j in range(i, n): 1594 * b[k++] = a[i, j] 1595 * 1596 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1597 * @param alpha The scalar alpha. 1598 * @param X The input allocation contains vector x, supported elements type {@link Element#F32}. 1599 * @param incX The increment for the elements of vector x, must be larger than zero. 1600 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32}. 1601 * @param incY The increment for the elements of vector y, must be larger than zero. 1602 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32}. 1603 */ SSPR2(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1604 public void SSPR2(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1605 int N = validateSPR2(Element.F32(mRS), Uplo, X, incX, Y, incY, Ap); 1606 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); 1607 } 1608 1609 /** 1610 * DSYMV performs the matrix-vector operation 1611 * y := alpha*A*x + beta*y 1612 * 1613 * Details: http://www.netlib.org/lapack/explore-html/d8/dbe/dsymv_8f.html 1614 * 1615 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1616 * @param alpha The scalar alpha. 1617 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1618 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1619 * @param incX The increment for the elements of vector x, must be larger than zero. 1620 * @param beta The scalar beta. 1621 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1622 * @param incY The increment for the elements of vector y, must be larger than zero. 1623 */ DSYMV(@plo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1624 public void DSYMV(@Uplo int Uplo, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1625 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1626 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1627 } 1628 1629 /** 1630 * DSBMV performs the matrix-vector operation 1631 * y := alpha*A*x + beta*y 1632 * 1633 * Details: http://www.netlib.org/lapack/explore-html/d8/d1e/dsbmv_8f.html 1634 * 1635 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1636 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1637 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1638 * for i in range(0, n): 1639 * for j in range(i, min(i+k+1, n)): 1640 * b[i, j-i] = a[i, j] 1641 * 1642 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1643 * @param K The number of off-diagonals of the matrix A 1644 * @param alpha The scalar alpha. 1645 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1646 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1647 * @param incX The increment for the elements of vector x, must be larger than zero. 1648 * @param beta The scalar beta. 1649 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1650 * @param incY The increment for the elements of vector y, must be larger than zero. 1651 */ DSBMV(@plo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY)1652 public void DSBMV(@Uplo int Uplo, int K, double alpha, Allocation A, Allocation X, int incX, double beta, Allocation Y, int incY) { 1653 // SBMV is the same as SYMV + K >= 0 1654 if (K < 0) { 1655 throw new RSRuntimeException("K must be greater than or equal to 0"); 1656 } 1657 int N = validateSYMV(Element.F64(mRS), Uplo, A, X, Y, incX, incY); 1658 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha, A.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1659 } 1660 1661 /** 1662 * DSPMV performs the matrix-vector operation 1663 * y := alpha*A*x + beta*y 1664 * 1665 * Details: http://www.netlib.org/lapack/explore-html/d4/d85/dspmv_8f.html 1666 * 1667 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1668 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1669 * 'a' to packed matrix 'b'. 1670 * k = 0 1671 * for i in range(0, n): 1672 * for j in range(i, n): 1673 * b[k++] = a[i, j] 1674 * 1675 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1676 * @param alpha The scalar alpha. 1677 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1678 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1679 * @param incX The increment for the elements of vector x, must be larger than zero. 1680 * @param beta The scalar beta. 1681 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1682 * @param incY The increment for the elements of vector y, must be larger than zero. 1683 */ DSPMV(@plo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY)1684 public void DSPMV(@Uplo int Uplo, double alpha, Allocation Ap, Allocation X, int incX, double beta, Allocation Y, int incY) { 1685 int N = validateSPMV(Element.F64(mRS), Uplo, Ap, X, incX, Y, incY); 1686 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, Ap.getID(mRS), X.getID(mRS), beta, Y.getID(mRS), incX, incY, 0, 0); 1687 } 1688 1689 /** 1690 * DGER performs the rank 1 operation 1691 * A := alpha*x*y**T + A 1692 * 1693 * Details: http://www.netlib.org/lapack/explore-html/dc/da8/dger_8f.html 1694 * 1695 * @param alpha The scalar alpha. 1696 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1697 * @param incX The increment for the elements of vector x, must be larger than zero. 1698 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1699 * @param incY The increment for the elements of vector y, must be larger than zero. 1700 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1701 */ DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1702 public void DGER(double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1703 int M = A.getType().getY(); 1704 int N = A.getType().getX(); 1705 validateGER(Element.F64(mRS), X, incX, Y, incY, A); 1706 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dger, 0, 0, 0, 0, 0, M, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0.f, A.getID(mRS), incX, incY, 0, 0); 1707 } 1708 1709 /** 1710 * DSYR performs the rank 1 operation 1711 * A := alpha*x*x**T + A 1712 * 1713 * Details: http://www.netlib.org/lapack/explore-html/d3/d60/dsyr_8f.html 1714 * 1715 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1716 * @param alpha The scalar alpha. 1717 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1718 * @param incX The increment for the elements of vector x, must be larger than zero. 1719 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1720 */ DSYR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)1721 public void DSYR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 1722 int N = validateSYR(Element.F64(mRS), Uplo, X, incX, A); 1723 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), A.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1724 } 1725 1726 /** 1727 * DSPR performs the rank 1 operation 1728 * A := alpha*x*x**T + A 1729 * 1730 * Details: http://www.netlib.org/lapack/explore-html/dd/dba/dspr_8f.html 1731 * 1732 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1733 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1734 * 'a' to packed matrix 'b'. 1735 * k = 0 1736 * for i in range(0, n): 1737 * for j in range(i, n): 1738 * b[k++] = a[i, j] 1739 * 1740 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1741 * @param alpha The scalar alpha. 1742 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1743 * @param incX The increment for the elements of vector x, must be larger than zero. 1744 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1745 */ DSPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)1746 public void DSPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 1747 int N = validateSPR(Element.F64(mRS), Uplo, X, incX, Ap); 1748 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Ap.getID(mRS), 0.f, 0, incX, 0, 0, 0); 1749 } 1750 1751 /** 1752 * DSYR2 performs the symmetric rank 2 operation 1753 * A := alpha*x*y**T + alpha*y*x**T + A 1754 * 1755 * Details: http://www.netlib.org/lapack/explore-html/de/d41/dsyr2_8f.html 1756 * 1757 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1758 * @param alpha The scalar alpha. 1759 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1760 * @param incX The increment for the elements of vector x, must be larger than zero. 1761 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1762 * @param incY The increment for the elements of vector y, must be larger than zero. 1763 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 1764 */ DSYR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1765 public void DSYR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1766 int N = validateSYR2(Element.F64(mRS), Uplo, X, incX, Y, incY, A); 1767 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, A.getID(mRS), incX, incY, 0, 0); 1768 } 1769 1770 /** 1771 * DSPR2 performs the symmetric rank 2 operation 1772 * A := alpha*x*y**T + alpha*y*x**T + A 1773 * 1774 * Details: http://www.netlib.org/lapack/explore-html/dd/d9e/dspr2_8f.html 1775 * 1776 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1777 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1778 * 'a' to packed matrix 'b'. 1779 * k = 0 1780 * for i in range(0, n): 1781 * for j in range(i, n): 1782 * b[k++] = a[i, j] 1783 * 1784 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1785 * @param alpha The scalar alpha. 1786 * @param X The input allocation contains vector x, supported elements type {@link Element#F64}. 1787 * @param incX The increment for the elements of vector x, must be larger than zero. 1788 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64}. 1789 * @param incY The increment for the elements of vector y, must be larger than zero. 1790 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64}. 1791 */ DSPR2(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)1792 public void DSPR2(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 1793 int N = validateSPR2(Element.F64(mRS), Uplo, X, incX, Y, incY, Ap); 1794 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dspr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, X.getID(mRS), Y.getID(mRS), 0, Ap.getID(mRS), incX, incY, 0, 0); 1795 } 1796 1797 1798 /** 1799 * Level 2, C and Z only 1800 */ 1801 validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A)1802 static void validateGERU(Element e, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1803 if (!A.getType().getElement().isCompatible(e) || 1804 !X.getType().getElement().isCompatible(e) || 1805 !Y.getType().getElement().isCompatible(e)) { 1806 throw new RSRuntimeException("Called BLAS with wrong Element type"); 1807 } 1808 if (X.getType().getY() > 1 || Y.getType().getY() > 1) { 1809 throw new RSRuntimeException("BLAS vectors must have Y dimension of 0 or 1"); 1810 } 1811 1812 int M = A.getType().getY(); 1813 int N = A.getType().getX(); 1814 if (incX <= 0 || incY <= 0) { 1815 throw new RSRuntimeException("Vector increments must be greater than 0"); 1816 } 1817 int expectedXDim = 1 + (M - 1) * incX; 1818 if (X.getType().getX() != expectedXDim) { 1819 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 1820 } 1821 int expectedYDim = 1 + (N - 1) * incY; 1822 if (Y.getType().getX() != expectedYDim) { 1823 throw new RSRuntimeException("Incorrect vector dimensions for GERU"); 1824 } 1825 1826 } 1827 1828 /** 1829 * CHEMV performs the matrix-vector operation 1830 * y := alpha*A*x + beta*y 1831 * 1832 * Details: http://www.netlib.org/lapack/explore-html/d7/d51/chemv_8f.html 1833 * 1834 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1835 * @param alpha The scalar alpha. 1836 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1837 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1838 * @param incX The increment for the elements of vector x, must be larger than zero. 1839 * @param beta The scalar beta. 1840 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1841 * @param incY The increment for the elements of vector y, must be larger than zero. 1842 */ CHEMV(@plo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1843 public void CHEMV(@Uplo int Uplo, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1844 // HEMV is the same as SYR2 validation-wise 1845 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 1846 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1847 } 1848 1849 /** 1850 * CHBMV performs the matrix-vector operation 1851 * y := alpha*A*x + beta*y 1852 * 1853 * Details: http://www.netlib.org/lapack/explore-html/db/dc2/chbmv_8f.html 1854 * 1855 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 1856 * but only the region N*(K+1) will be referenced. The following subroutine can is an 1857 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 1858 * for i in range(0, n): 1859 * for j in range(i, min(i+k+1, n)): 1860 * b[i, j-i] = a[i, j] 1861 * 1862 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 1863 * @param K The number of off-diagonals of the matrix A 1864 * @param alpha The scalar alpha. 1865 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1866 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1867 * @param incX The increment for the elements of vector x, must be larger than zero. 1868 * @param beta The scalar beta. 1869 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1870 * @param incY The increment for the elements of vector y, must be larger than zero. 1871 */ CHBMV(@plo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1872 public void CHBMV(@Uplo int Uplo, int K, Float2 alpha, Allocation A, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1873 // HBMV is the same as SYR2 validation-wise 1874 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 1875 if (K < 0) { 1876 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 1877 } 1878 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1879 } 1880 1881 /** 1882 * CHPMV performs the matrix-vector operation 1883 * y := alpha*A*x + beta*y 1884 * 1885 * Details: http://www.netlib.org/lapack/explore-html/d2/d06/chpmv_8f.html 1886 * 1887 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1888 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1889 * 'a' to packed matrix 'b'. 1890 * k = 0 1891 * for i in range(0, n): 1892 * for j in range(i, n): 1893 * b[k++] = a[i, j] 1894 * 1895 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 1896 * @param alpha The scalar alpha. 1897 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1898 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1899 * @param incX The increment for the elements of vector x, must be larger than zero. 1900 * @param beta The scalar beta. 1901 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1902 * @param incY The increment for the elements of vector y, must be larger than zero. 1903 */ CHPMV(@plo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY)1904 public void CHPMV(@Uplo int Uplo, Float2 alpha, Allocation Ap, Allocation X, int incX, Float2 beta, Allocation Y, int incY) { 1905 // HPMV is the same as SPR2 1906 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 1907 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 1908 } 1909 1910 /** 1911 * CGERU performs the rank 1 operation 1912 * A := alpha*x*y**T + A 1913 * 1914 * Details: http://www.netlib.org/lapack/explore-html/db/d5f/cgeru_8f.html 1915 * 1916 * @param alpha The scalar alpha. 1917 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1918 * @param incX The increment for the elements of vector x, must be larger than zero. 1919 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1920 * @param incY The increment for the elements of vector y, must be larger than zero. 1921 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1922 */ CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1923 public void CGERU(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1924 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 1925 int M = A.getType().getY(); 1926 int N = A.getType().getX(); 1927 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 1928 } 1929 1930 /** 1931 * CGERC performs the rank 1 operation 1932 * A := alpha*x*y**H + A 1933 * 1934 * Details: http://www.netlib.org/lapack/explore-html/dd/d84/cgerc_8f.html 1935 * 1936 * @param alpha The scalar alpha. 1937 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1938 * @param incX The increment for the elements of vector x, must be larger than zero. 1939 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 1940 * @param incY The increment for the elements of vector y, must be larger than zero. 1941 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1942 */ CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)1943 public void CGERC(Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 1944 // same as GERU 1945 validateGERU(Element.F32_2(mRS), X, incX, Y, incY, A); 1946 int M = A.getType().getY(); 1947 int N = A.getType().getX(); 1948 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 1949 } 1950 1951 /** 1952 * CHER performs the rank 1 operation 1953 * A := alpha*x*x**H + A 1954 * 1955 * Details: http://www.netlib.org/lapack/explore-html/d3/d6d/cher_8f.html 1956 * 1957 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 1958 * @param alpha The scalar alpha. 1959 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1960 * @param incX The increment for the elements of vector x, must be larger than zero. 1961 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1962 */ CHER(@plo int Uplo, float alpha, Allocation X, int incX, Allocation A)1963 public void CHER(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation A) { 1964 // same as SYR 1965 int N = validateSYR(Element.F32_2(mRS), Uplo, X, incX, A); 1966 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); 1967 } 1968 1969 /** 1970 * CHPR performs the rank 1 operation 1971 * A := alpha*x*x**H + A 1972 * 1973 * Details: http://www.netlib.org/lapack/explore-html/db/dcd/chpr_8f.html 1974 * 1975 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 1976 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 1977 * 'a' to packed matrix 'b'. 1978 * k = 0 1979 * for i in range(0, n): 1980 * for j in range(i, n): 1981 * b[k++] = a[i, j] 1982 * 1983 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 1984 * @param alpha The scalar alpha. 1985 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 1986 * @param incX The increment for the elements of vector x, must be larger than zero. 1987 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 1988 */ CHPR(@plo int Uplo, float alpha, Allocation X, int incX, Allocation Ap)1989 public void CHPR(@Uplo int Uplo, float alpha, Allocation X, int incX, Allocation Ap) { 1990 // equivalent to SPR for validation 1991 int N = validateSPR(Element.F32_2(mRS), Uplo, X, incX, Ap); 1992 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); 1993 } 1994 1995 /** 1996 * CHER2 performs the symmetric rank 2 operation 1997 * A := alpha*x*y**H + alpha*y*x**H + A 1998 * 1999 * Details: http://www.netlib.org/lapack/explore-html/db/d87/cher2_8f.html 2000 * 2001 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2002 * @param alpha The scalar alpha. 2003 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2004 * @param incX The increment for the elements of vector x, must be larger than zero. 2005 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2006 * @param incY The increment for the elements of vector y, must be larger than zero. 2007 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2008 */ CHER2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2009 public void CHER2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2010 // same as SYR2 2011 int N = validateSYR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, A); 2012 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2013 } 2014 2015 /** 2016 * CHPR2 performs the symmetric rank 2 operation 2017 * A := alpha*x*y**H + alpha*y*x**H + A 2018 * 2019 * Details: http://www.netlib.org/lapack/explore-html/d6/d44/chpr2_8f.html 2020 * 2021 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2022 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2023 * 'a' to packed matrix 'b'. 2024 * k = 0 2025 * for i in range(0, n): 2026 * for j in range(i, n): 2027 * b[k++] = a[i, j] 2028 * 2029 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2030 * @param alpha The scalar alpha. 2031 * @param X The input allocation contains vector x, supported elements type {@link Element#F32_2}. 2032 * @param incX The increment for the elements of vector x, must be larger than zero. 2033 * @param Y The input allocation contains vector y, supported elements type {@link Element#F32_2}. 2034 * @param incY The increment for the elements of vector y, must be larger than zero. 2035 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2036 */ CHPR2(@plo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2037 public void CHPR2(@Uplo int Uplo, Float2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2038 // same as SPR2 2039 int N = validateSPR2(Element.F32_2(mRS), Uplo, X, incX, Y, incY, Ap); 2040 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); 2041 } 2042 2043 /** 2044 * ZHEMV performs the matrix-vector operation 2045 * y := alpha*A*x + beta*y 2046 * 2047 * Details: http://www.netlib.org/lapack/explore-html/d0/ddd/zhemv_8f.html 2048 * 2049 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2050 * @param alpha The scalar alpha. 2051 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2052 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2053 * @param incX The increment for the elements of vector x, must be larger than zero. 2054 * @param beta The scalar beta. 2055 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2056 * @param incY The increment for the elements of vector y, must be larger than zero. 2057 */ ZHEMV(@plo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2058 public void ZHEMV(@Uplo int Uplo, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2059 // HEMV is the same as SYR2 validation-wise 2060 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2061 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2062 } 2063 2064 /** 2065 * ZHBMV performs the matrix-vector operation 2066 * y := alpha*A*x + beta*y 2067 * 2068 * Details: http://www.netlib.org/lapack/explore-html/d3/d1a/zhbmv_8f.html 2069 * 2070 * Note: For a N*N matrix, the input Allocation should also be of size N*N (dimY = N, dimX = N), 2071 * but only the region N*(K+1) will be referenced. The following subroutine can is an 2072 * example showing how to convert a UPPER trianglar matrix 'a' to row-based band matrix 'b'. 2073 * for i in range(0, n): 2074 * for j in range(i, min(i+k+1, n)): 2075 * b[i, j-i] = a[i, j] 2076 * 2077 * @param Uplo Specifies whether the upper or lower triangular part of the band matrix A is being supplied. 2078 * @param K The number of off-diagonals of the matrix A 2079 * @param alpha The scalar alpha. 2080 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2081 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2082 * @param incX The increment for the elements of vector x, must be larger than zero. 2083 * @param beta The scalar beta. 2084 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2085 * @param incY The increment for the elements of vector y, must be larger than zero. 2086 */ ZHBMV(@plo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2087 public void ZHBMV(@Uplo int Uplo, int K, Double2 alpha, Allocation A, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2088 // HBMV is the same as SYR2 validation-wise 2089 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2090 if (K < 0) { 2091 throw new RSRuntimeException("K must be 0 or greater for HBMV"); 2092 } 2093 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhbmv, 0, 0, 0, Uplo, 0, 0, N, K, alpha.x, alpha.y, A.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2094 } 2095 2096 /** 2097 * ZHPMV performs the matrix-vector operation 2098 * y := alpha*A*x + beta*y 2099 * 2100 * Details: http://www.netlib.org/lapack/explore-html/d0/d60/zhpmv_8f.html 2101 * 2102 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2103 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2104 * 'a' to packed matrix 'b'. 2105 * k = 0 2106 * for i in range(0, n): 2107 * for j in range(i, n): 2108 * b[k++] = a[i, j] 2109 * 2110 * @param Uplo Specifies whether the upper or lower triangular part of the matrix A is supplied in packed form. 2111 * @param alpha The scalar alpha. 2112 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2113 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2114 * @param incX The increment for the elements of vector x, must be larger than zero. 2115 * @param beta The scalar beta. 2116 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2117 * @param incY The increment for the elements of vector y, must be larger than zero. 2118 */ ZHPMV(@plo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY)2119 public void ZHPMV(@Uplo int Uplo, Double2 alpha, Allocation Ap, Allocation X, int incX, Double2 beta, Allocation Y, int incY) { 2120 // HPMV is the same as SPR2 2121 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2122 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpmv, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, Ap.getID(mRS), X.getID(mRS), beta.x, beta.y, Y.getID(mRS), incX, incY, 0, 0); 2123 } 2124 2125 /** 2126 * ZGERU performs the rank 1 operation 2127 * A := alpha*x*y**T + A 2128 * 2129 * Details: http://www.netlib.org/lapack/explore-html/d7/d12/zgeru_8f.html 2130 * 2131 * @param alpha The scalar alpha. 2132 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2133 * @param incX The increment for the elements of vector x, must be larger than zero. 2134 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2135 * @param incY The increment for the elements of vector y, must be larger than zero. 2136 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2137 */ ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2138 public void ZGERU(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2139 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2140 int M = A.getType().getY(); 2141 int N = A.getType().getX(); 2142 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgeru, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2143 } 2144 2145 /** 2146 * ZGERC performs the rank 1 operation 2147 * A := alpha*x*y**H + A 2148 * 2149 * Details: http://www.netlib.org/lapack/explore-html/d3/dad/zgerc_8f.html 2150 * 2151 * @param alpha The scalar alpha. 2152 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2153 * @param incX The increment for the elements of vector x, must be larger than zero. 2154 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2155 * @param incY The increment for the elements of vector y, must be larger than zero. 2156 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2157 */ ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2158 public void ZGERC(Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2159 // same as GERU 2160 validateGERU(Element.F64_2(mRS), X, incX, Y, incY, A); 2161 int M = A.getType().getY(); 2162 int N = A.getType().getX(); 2163 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgerc, 0, 0, 0, 0, 0, M, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2164 } 2165 2166 /** 2167 * ZHER performs the rank 1 operation 2168 * A := alpha*x*x**H + A 2169 * 2170 * Details: http://www.netlib.org/lapack/explore-html/de/d0e/zher_8f.html 2171 * 2172 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2173 * @param alpha The scalar alpha. 2174 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2175 * @param incX The increment for the elements of vector x, must be larger than zero. 2176 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2177 */ ZHER(@plo int Uplo, double alpha, Allocation X, int incX, Allocation A)2178 public void ZHER(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation A) { 2179 // same as SYR 2180 int N = validateSYR(Element.F64_2(mRS), Uplo, X, incX, A); 2181 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, A.getID(mRS), incX, 0, 0, 0); 2182 } 2183 2184 /** 2185 * ZHPR performs the rank 1 operation 2186 * A := alpha*x*x**H + A 2187 * 2188 * Details: http://www.netlib.org/lapack/explore-html/de/de1/zhpr_8f.html 2189 * 2190 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2191 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2192 * 'a' to packed matrix 'b'. 2193 * k = 0 2194 * for i in range(0, n): 2195 * for j in range(i, n): 2196 * b[k++] = a[i, j] 2197 * 2198 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2199 * @param alpha The scalar alpha. 2200 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2201 * @param incX The increment for the elements of vector x, must be larger than zero. 2202 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2203 */ ZHPR(@plo int Uplo, double alpha, Allocation X, int incX, Allocation Ap)2204 public void ZHPR(@Uplo int Uplo, double alpha, Allocation X, int incX, Allocation Ap) { 2205 // equivalent to SPR for validation 2206 int N = validateSPR(Element.F64_2(mRS), Uplo, X, incX, Ap); 2207 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr, 0, 0, 0, Uplo, 0, 0, N, 0, alpha, 0, X.getID(mRS), 0, 0, 0, Ap.getID(mRS), incX, 0, 0, 0); 2208 } 2209 2210 /** 2211 * ZHER2 performs the symmetric rank 2 operation 2212 * A := alpha*x*y**H + alpha*y*x**H + A 2213 * 2214 * Details: http://www.netlib.org/lapack/explore-html/da/d8a/zher2_8f.html 2215 * 2216 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2217 * @param alpha The scalar alpha. 2218 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2219 * @param incX The increment for the elements of vector x, must be larger than zero. 2220 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2221 * @param incY The increment for the elements of vector y, must be larger than zero. 2222 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2223 */ ZHER2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A)2224 public void ZHER2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation A) { 2225 // same as SYR2 2226 int N = validateSYR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, A); 2227 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, A.getID(mRS), incX, incY, 0, 0); 2228 } 2229 2230 /** 2231 * ZHPR2 performs the symmetric rank 2 operation 2232 * A := alpha*x*y**H + alpha*y*x**H + A 2233 * 2234 * Details: http://www.netlib.org/lapack/explore-html/d5/d52/zhpr2_8f.html 2235 * 2236 * Note: For a N*N matrix, the input Allocation should be a 1D allocation of size dimX = N*(N+1)/2, 2237 * The following subroutine can is an example showing how to convert a UPPER trianglar matrix 2238 * 'a' to packed matrix 'b'. 2239 * k = 0 2240 * for i in range(0, n): 2241 * for j in range(i, n): 2242 * b[k++] = a[i, j] 2243 * 2244 * @param Uplo Specifies whether the upper or lower triangular part is to be supplied in the packed form. 2245 * @param alpha The scalar alpha. 2246 * @param X The input allocation contains vector x, supported elements type {@link Element#F64_2}. 2247 * @param incX The increment for the elements of vector x, must be larger than zero. 2248 * @param Y The input allocation contains vector y, supported elements type {@link Element#F64_2}. 2249 * @param incY The increment for the elements of vector y, must be larger than zero. 2250 * @param Ap The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2251 */ ZHPR2(@plo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap)2252 public void ZHPR2(@Uplo int Uplo, Double2 alpha, Allocation X, int incX, Allocation Y, int incY, Allocation Ap) { 2253 // same as SPR2 2254 int N = validateSPR2(Element.F64_2(mRS), Uplo, X, incX, Y, incY, Ap); 2255 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhpr2, 0, 0, 0, Uplo, 0, 0, N, 0, alpha.x, alpha.y, X.getID(mRS), Y.getID(mRS), 0, 0, Ap.getID(mRS), incX, incY, 0, 0); 2256 } 2257 2258 2259 /** 2260 * Level 3 BLAS 2261 */ 2262 validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C)2263 static void validateL3(Element e, int TransA, int TransB, int Side, Allocation A, Allocation B, Allocation C) { 2264 int aM = -1, aN = -1, bM = -1, bN = -1, cM = -1, cN = -1; 2265 if ((A != null && !A.getType().getElement().isCompatible(e)) || 2266 (B != null && !B.getType().getElement().isCompatible(e)) || 2267 (C != null && !C.getType().getElement().isCompatible(e))) { 2268 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2269 } 2270 if (C == null) { 2271 //since matrix C is used to store the result, it cannot be null. 2272 throw new RSRuntimeException("Allocation C cannot be null"); 2273 } 2274 cM = C.getType().getY(); 2275 cN = C.getType().getX(); 2276 2277 if (Side == RIGHT) { 2278 if ((A == null && B != null) || (A != null && B == null)) { 2279 throw new RSRuntimeException("Provided Matrix A without Matrix B, or vice versa"); 2280 } 2281 if (B != null) { 2282 bM = A.getType().getY(); 2283 bN = A.getType().getX(); 2284 } 2285 if (A != null) { 2286 aM = B.getType().getY(); 2287 aN = B.getType().getX(); 2288 } 2289 } else { 2290 if (A != null) { 2291 if (TransA == TRANSPOSE || TransA == CONJ_TRANSPOSE) { 2292 aN = A.getType().getY(); 2293 aM = A.getType().getX(); 2294 } else { 2295 aM = A.getType().getY(); 2296 aN = A.getType().getX(); 2297 } 2298 } 2299 if (B != null) { 2300 if (TransB == TRANSPOSE || TransB == CONJ_TRANSPOSE) { 2301 bN = B.getType().getY(); 2302 bM = B.getType().getX(); 2303 } else { 2304 bM = B.getType().getY(); 2305 bN = B.getType().getX(); 2306 } 2307 } 2308 } 2309 if (A != null && B != null && C != null) { 2310 if (aN != bM || aM != cM || bN != cN) { 2311 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2312 } 2313 } else if (A != null && C != null) { 2314 // A and C only, for SYRK 2315 if (cM != cN) { 2316 throw new RSRuntimeException("Matrix C is not symmetric"); 2317 } 2318 if (aM != cM) { 2319 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2320 } 2321 } else if (A != null && B != null) { 2322 // A and B only 2323 if (aN != bM) { 2324 throw new RSRuntimeException("Called BLAS with invalid dimensions"); 2325 } 2326 } 2327 2328 } 2329 2330 /** 2331 * SGEMM performs one of the matrix-matrix operations 2332 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2333 * 2334 * Details: http://www.netlib.org/lapack/explore-html/d4/de2/sgemm_8f.html 2335 * 2336 * @param TransA The type of transpose applied to matrix A. 2337 * @param TransB The type of transpose applied to matrix B. 2338 * @param alpha The scalar alpha. 2339 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2340 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2341 * @param beta The scalar beta. 2342 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2343 */ SGEMM(@ranspose int TransA, @Transpose int TransB, float alpha, Allocation A, Allocation B, float beta, Allocation C)2344 public void SGEMM(@Transpose int TransA, @Transpose int TransB, float alpha, Allocation A, 2345 Allocation B, float beta, Allocation C) { 2346 validateTranspose(TransA); 2347 validateTranspose(TransB); 2348 validateL3(Element.F32(mRS), TransA, TransB, 0, A, B, C); 2349 2350 int M = -1, N = -1, K = -1; 2351 if (TransA != NO_TRANSPOSE) { 2352 M = A.getType().getX(); 2353 K = A.getType().getY(); 2354 } else { 2355 M = A.getType().getY(); 2356 K = A.getType().getX(); 2357 } 2358 if (TransB != NO_TRANSPOSE) { 2359 N = B.getType().getY(); 2360 } else { 2361 N = B.getType().getX(); 2362 } 2363 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_sgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), 2364 beta, C.getID(mRS), 0, 0, 0, 0); 2365 } 2366 2367 /** 2368 * DGEMM performs one of the matrix-matrix operations 2369 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T 2370 * 2371 * Details: http://www.netlib.org/lapack/explore-html/d7/d2b/dgemm_8f.html 2372 * 2373 * @param TransA The type of transpose applied to matrix A. 2374 * @param TransB The type of transpose applied to matrix B. 2375 * @param alpha The scalar alpha. 2376 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2377 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2378 * @param beta The scalar beta. 2379 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2380 */ DGEMM(@ranspose int TransA, @Transpose int TransB, double alpha, Allocation A, Allocation B, double beta, Allocation C)2381 public void DGEMM(@Transpose int TransA, @Transpose int TransB, double alpha, Allocation A, 2382 Allocation B, double beta, Allocation C) { 2383 validateTranspose(TransA); 2384 validateTranspose(TransB); 2385 validateL3(Element.F64(mRS), TransA, TransB, 0, A, B, C); 2386 int M = -1, N = -1, K = -1; 2387 if (TransA != NO_TRANSPOSE) { 2388 M = A.getType().getX(); 2389 K = A.getType().getY(); 2390 } else { 2391 M = A.getType().getY(); 2392 K = A.getType().getX(); 2393 } 2394 if (TransB != NO_TRANSPOSE) { 2395 N = B.getType().getY(); 2396 } else { 2397 N = B.getType().getX(); 2398 } 2399 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha, A.getID(mRS), B.getID(mRS), 2400 beta, C.getID(mRS), 0, 0, 0, 0); 2401 } 2402 2403 /** 2404 * CGEMM performs one of the matrix-matrix operations 2405 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 2406 * 2407 * Details: http://www.netlib.org/lapack/explore-html/d6/d5b/cgemm_8f.html 2408 * 2409 * @param TransA The type of transpose applied to matrix A. 2410 * @param TransB The type of transpose applied to matrix B. 2411 * @param alpha The scalar alpha. 2412 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2413 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2414 * @param beta The scalar beta. 2415 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2416 */ CGEMM(@ranspose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2417 public void CGEMM(@Transpose int TransA, @Transpose int TransB, Float2 alpha, Allocation A, 2418 Allocation B, Float2 beta, Allocation C) { 2419 validateTranspose(TransA); 2420 validateTranspose(TransB); 2421 validateL3(Element.F32_2(mRS), TransA, TransB, 0, A, B, C); 2422 int M = -1, N = -1, K = -1; 2423 if (TransA != NO_TRANSPOSE) { 2424 M = A.getType().getX(); 2425 K = A.getType().getY(); 2426 } else { 2427 M = A.getType().getY(); 2428 K = A.getType().getX(); 2429 } 2430 if (TransB != NO_TRANSPOSE) { 2431 N = B.getType().getY(); 2432 } else { 2433 N = B.getType().getX(); 2434 } 2435 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2436 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2437 } 2438 2439 /** 2440 * ZGEMM performs one of the matrix-matrix operations 2441 * C := alpha*op(A)*op(B) + beta*C where op(X) is one of op(X) = X or op(X) = X**T or op(X) = X**H 2442 * 2443 * Details: http://www.netlib.org/lapack/explore-html/d7/d76/zgemm_8f.html 2444 * 2445 * @param TransA The type of transpose applied to matrix A. 2446 * @param TransB The type of transpose applied to matrix B. 2447 * @param alpha The scalar alpha. 2448 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2449 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2450 * @param beta The scalar beta. 2451 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2452 */ ZGEMM(@ranspose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2453 public void ZGEMM(@Transpose int TransA, @Transpose int TransB, Double2 alpha, Allocation A, 2454 Allocation B, Double2 beta, Allocation C) { 2455 validateTranspose(TransA); 2456 validateTranspose(TransB); 2457 validateL3(Element.F64_2(mRS), TransA, TransB, 0, A, B, C); 2458 int M = -1, N = -1, K = -1; 2459 if (TransA != NO_TRANSPOSE) { 2460 M = A.getType().getX(); 2461 K = A.getType().getY(); 2462 } else { 2463 M = A.getType().getY(); 2464 K = A.getType().getX(); 2465 } 2466 if (TransB != NO_TRANSPOSE) { 2467 N = B.getType().getY(); 2468 } else { 2469 N = B.getType().getX(); 2470 } 2471 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zgemm, TransA, TransB, 0, 0, 0, M, N, K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2472 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2473 } 2474 2475 /** 2476 * SSYMM performs one of the matrix-matrix operations 2477 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2478 * 2479 * Details: http://www.netlib.org/lapack/explore-html/d7/d42/ssymm_8f.html 2480 * 2481 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2482 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2483 * @param alpha The scalar alpha. 2484 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2485 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2486 * @param beta The scalar beta. 2487 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2488 */ SSYMM(@ide int Side, @Uplo int Uplo, float alpha, Allocation A, Allocation B, float beta, Allocation C)2489 public void SSYMM(@Side int Side, @Uplo int Uplo, float alpha, Allocation A, 2490 Allocation B, float beta, Allocation C) { 2491 validateSide(Side); 2492 validateUplo(Uplo); 2493 //For SYMM, Matrix A should be symmetric 2494 if (A.getType().getX() != A.getType().getY()) { 2495 throw new RSRuntimeException("Matrix A is not symmetric"); 2496 } 2497 validateL3(Element.F32(mRS), 0, 0, Side, A, B, C); 2498 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 2499 beta, C.getID(mRS), 0, 0, 0, 0); 2500 } 2501 2502 /** 2503 * DSYMM performs one of the matrix-matrix operations 2504 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2505 * 2506 * Details: http://www.netlib.org/lapack/explore-html/d8/db0/dsymm_8f.html 2507 * 2508 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2509 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2510 * @param alpha The scalar alpha. 2511 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2512 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2513 * @param beta The scalar beta. 2514 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2515 */ DSYMM(@ide int Side, @Uplo int Uplo, double alpha, Allocation A, Allocation B, double beta, Allocation C)2516 public void DSYMM(@Side int Side, @Uplo int Uplo, double alpha, Allocation A, 2517 Allocation B, double beta, Allocation C) { 2518 validateSide(Side); 2519 validateUplo(Uplo); 2520 if (A.getType().getX() != A.getType().getY()) { 2521 throw new RSRuntimeException("Matrix A is not symmetric"); 2522 } 2523 validateL3(Element.F64(mRS), 0, 0, Side, A, B, C); 2524 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha, A.getID(mRS), B.getID(mRS), 2525 beta, C.getID(mRS), 0, 0, 0, 0); 2526 } 2527 2528 /** 2529 * CSYMM performs one of the matrix-matrix operations 2530 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2531 * 2532 * Details: http://www.netlib.org/lapack/explore-html/db/d59/csymm_8f.html 2533 * 2534 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2535 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2536 * @param alpha The scalar alpha. 2537 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2538 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2539 * @param beta The scalar beta. 2540 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2541 */ CSYMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2542 public void CSYMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, 2543 Allocation B, Float2 beta, Allocation C) { 2544 validateSide(Side); 2545 validateUplo(Uplo); 2546 if (A.getType().getX() != A.getType().getY()) { 2547 throw new RSRuntimeException("Matrix A is not symmetric"); 2548 } 2549 validateL3(Element.F32_2(mRS), 0, 0, Side, A, B, C); 2550 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2551 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2552 } 2553 2554 /** 2555 * ZSYMM performs one of the matrix-matrix operations 2556 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 2557 * 2558 * Details: http://www.netlib.org/lapack/explore-html/df/d51/zsymm_8f.html 2559 * 2560 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2561 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 2562 * @param alpha The scalar alpha. 2563 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2564 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2565 * @param beta The scalar beta. 2566 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2567 */ ZSYMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2568 public void ZSYMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, 2569 Allocation B, Double2 beta, Allocation C) { 2570 validateSide(Side); 2571 validateUplo(Uplo); 2572 if (A.getType().getX() != A.getType().getY()) { 2573 throw new RSRuntimeException("Matrix A is not symmetric"); 2574 } 2575 validateL3(Element.F64_2(mRS), 0, 0, Side, A, B, C); 2576 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsymm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 2577 beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2578 } 2579 2580 /** 2581 * SSYRK performs one of the symmetric rank k operations 2582 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2583 * 2584 * Details: http://www.netlib.org/lapack/explore-html/d0/d40/ssyrk_8f.html 2585 * 2586 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2587 * @param Trans The type of transpose applied to the operation. 2588 * @param alpha The scalar alpha. 2589 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2590 * @param beta The scalar beta. 2591 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2592 */ SSYRK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)2593 public void SSYRK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 2594 validateTranspose(Trans); 2595 validateUplo(Uplo); 2596 validateL3(Element.F32(mRS), Trans, 0, 0, A, null, C); 2597 int K = -1; 2598 if (Trans != NO_TRANSPOSE) { 2599 K = A.getType().getY(); 2600 } else { 2601 K = A.getType().getX(); 2602 } 2603 2604 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); 2605 } 2606 2607 /** 2608 * DSYRK performs one of the symmetric rank k operations 2609 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2610 * 2611 * Details: http://www.netlib.org/lapack/explore-html/dc/d05/dsyrk_8f.html 2612 * 2613 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2614 * @param Trans The type of transpose applied to the operation. 2615 * @param alpha The scalar alpha. 2616 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2617 * @param beta The scalar beta. 2618 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2619 */ DSYRK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)2620 public void DSYRK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 2621 validateTranspose(Trans); 2622 validateUplo(Uplo); 2623 validateL3(Element.F64(mRS), Trans, 0, 0, A, null, C); 2624 int K = -1; 2625 if (Trans != NO_TRANSPOSE) { 2626 K = A.getType().getY(); 2627 } else { 2628 K = A.getType().getX(); 2629 } 2630 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), 0, beta, C.getID(mRS), 0, 0, 0, 0); 2631 } 2632 2633 /** 2634 * CSYRK performs one of the symmetric rank k operations 2635 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2636 * 2637 * Details: http://www.netlib.org/lapack/explore-html/d3/d6a/csyrk_8f.html 2638 * 2639 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2640 * @param Trans The type of transpose applied to the operation. 2641 * @param alpha The scalar alpha. 2642 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2643 * @param beta The scalar beta. 2644 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2645 */ CSYRK(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C)2646 public void CSYRK(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Float2 beta, Allocation C) { 2647 validateTranspose(Trans); 2648 validateUplo(Uplo); 2649 validateL3(Element.F32_2(mRS), Trans, 0, 0, A, null, C); 2650 int K = -1; 2651 if (Trans != NO_TRANSPOSE) { 2652 K = A.getType().getY(); 2653 } else { 2654 K = A.getType().getX(); 2655 } 2656 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, 2657 C.getID(mRS), 0, 0, 0, 0); 2658 } 2659 2660 /** 2661 * ZSYRK performs one of the symmetric rank k operations 2662 * C := alpha*A*A**T + beta*C or C := alpha*A**T*A + beta*C 2663 * 2664 * Details: http://www.netlib.org/lapack/explore-html/de/d54/zsyrk_8f.html 2665 * 2666 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2667 * @param Trans The type of transpose applied to the operation. 2668 * @param alpha The scalar alpha. 2669 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2670 * @param beta The scalar beta. 2671 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2672 */ ZSYRK(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C)2673 public void ZSYRK(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Double2 beta, Allocation C) { 2674 validateTranspose(Trans); 2675 validateUplo(Uplo); 2676 validateL3(Element.F64_2(mRS), Trans, 0, 0, A, null, C); 2677 int K = -1; 2678 if (Trans != NO_TRANSPOSE) { 2679 K = A.getType().getY(); 2680 } else { 2681 K = A.getType().getX(); 2682 } 2683 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyrk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), 0, beta.x, beta.y, 2684 C.getID(mRS), 0, 0, 0, 0); 2685 } 2686 validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)2687 static void validateSYR2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 2688 validateTranspose(Trans); 2689 if (!A.getType().getElement().isCompatible(e) || 2690 !B.getType().getElement().isCompatible(e) || 2691 !C.getType().getElement().isCompatible(e)) { 2692 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2693 } 2694 int Cdim = -1; 2695 // A is n x k if no transpose, k x n if transpose 2696 // C is n x n 2697 if (Trans == TRANSPOSE) { 2698 // check columns versus C 2699 Cdim = A.getType().getX(); 2700 } else { 2701 // check rows versus C 2702 Cdim = A.getType().getY(); 2703 } 2704 if (C.getType().getX() != Cdim || C.getType().getY() != Cdim) { 2705 throw new RSRuntimeException("Invalid symmetric matrix in SYR2K"); 2706 } 2707 // A dims == B dims 2708 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 2709 throw new RSRuntimeException("Invalid A and B in SYR2K"); 2710 } 2711 } 2712 2713 /** 2714 * SSYR2K performs one of the symmetric rank 2k operations 2715 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2716 * 2717 * Details: http://www.netlib.org/lapack/explore-html/df/d3d/ssyr2k_8f.html 2718 * 2719 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2720 * @param Trans The type of transpose applied to the operation. 2721 * @param alpha The scalar alpha. 2722 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2723 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2724 * @param beta The scalar beta. 2725 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32}. 2726 */ SSYR2K(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C)2727 public void SSYR2K(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, Allocation B, float beta, Allocation C) { 2728 validateUplo(Uplo); 2729 validateSYR2K(Element.F32(mRS), Trans, A, B, C); 2730 int K = -1; 2731 if (Trans != NO_TRANSPOSE) { 2732 K = A.getType().getY(); 2733 } else { 2734 K = A.getType().getX(); 2735 } 2736 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_ssyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); 2737 } 2738 2739 /** 2740 * DSYR2K performs one of the symmetric rank 2k operations 2741 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2742 * 2743 * Details: http://www.netlib.org/lapack/explore-html/d1/dec/dsyr2k_8f.html 2744 * 2745 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2746 * @param Trans The type of transpose applied to the operation. 2747 * @param alpha The scalar alpha. 2748 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2749 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2750 * @param beta The scalar beta. 2751 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64}. 2752 */ DSYR2K(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C)2753 public void DSYR2K(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, Allocation B, double beta, Allocation C) { 2754 validateUplo(Uplo); 2755 validateSYR2K(Element.F64(mRS), Trans, A, B, C); 2756 int K = -1; 2757 if (Trans != NO_TRANSPOSE) { 2758 K = A.getType().getY(); 2759 } else { 2760 K = A.getType().getX(); 2761 } 2762 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha, A.getID(mRS), B.getID(mRS), beta, C.getID(mRS), 0, 0, 0, 0); 2763 } 2764 2765 /** 2766 * CSYR2K performs one of the symmetric rank 2k operations 2767 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2768 * 2769 * Details: http://www.netlib.org/lapack/explore-html/de/d7e/csyr2k_8f.html 2770 * 2771 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2772 * @param Trans The type of transpose applied to the operation. 2773 * @param alpha The scalar alpha. 2774 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2775 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2776 * @param beta The scalar beta. 2777 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 2778 */ CSYR2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)2779 public void CSYR2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 2780 validateUplo(Uplo); 2781 validateSYR2K(Element.F32_2(mRS), Trans, A, B, C); 2782 int K = -1; 2783 if (Trans != NO_TRANSPOSE) { 2784 K = A.getType().getY(); 2785 } else { 2786 K = A.getType().getX(); 2787 } 2788 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_csyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2789 } 2790 2791 /** 2792 * ZSYR2K performs one of the symmetric rank 2k operations 2793 * C := alpha*A*B**T + alpha*B*A**T + beta*C or C := alpha*A**T*B + alpha*B**T*A + beta*C 2794 * 2795 * Details: http://www.netlib.org/lapack/explore-html/df/d20/zsyr2k_8f.html 2796 * 2797 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 2798 * @param Trans The type of transpose applied to the operation. 2799 * @param alpha The scalar alpha. 2800 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2801 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2802 * @param beta The scalar beta. 2803 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 2804 */ ZSYR2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)2805 public void ZSYR2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 2806 validateUplo(Uplo); 2807 validateSYR2K(Element.F64_2(mRS), Trans, A, B, C); 2808 int K = -1; 2809 if (Trans != NO_TRANSPOSE) { 2810 K = A.getType().getY(); 2811 } else { 2812 K = A.getType().getX(); 2813 } 2814 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zsyr2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), K, alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 2815 } 2816 validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)2817 static void validateTRMM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 2818 validateSide(Side); 2819 validateTranspose(TransA); 2820 int aM = -1, aN = -1, bM = -1, bN = -1; 2821 if (!A.getType().getElement().isCompatible(e) || 2822 !B.getType().getElement().isCompatible(e)) { 2823 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2824 } 2825 2826 aM = A.getType().getY(); 2827 aN = A.getType().getX(); 2828 if (aM != aN) { 2829 throw new RSRuntimeException("Called TRMM with a non-symmetric matrix A"); 2830 } 2831 2832 bM = B.getType().getY(); 2833 bN = B.getType().getX(); 2834 if (Side == LEFT) { 2835 if (aN != bM) { 2836 throw new RSRuntimeException("Called TRMM with invalid matrices"); 2837 } 2838 } else { 2839 if (bN != aM) { 2840 throw new RSRuntimeException("Called TRMM with invalid matrices"); 2841 } 2842 } 2843 } 2844 2845 /** 2846 * STRMM performs one of the matrix-matrix operations 2847 * B := alpha*op(A)*B or B := alpha*B*op(A) 2848 * op(A) is one of op(A) = A or op(A) = A**T 2849 * 2850 * Details: http://www.netlib.org/lapack/explore-html/df/d01/strmm_8f.html 2851 * 2852 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2853 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2854 * @param TransA The type of transpose applied to matrix A. 2855 * @param Diag Specifies whether or not A is unit triangular. 2856 * @param alpha The scalar alpha. 2857 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2858 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2859 */ STRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)2860 public void STRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 2861 validateUplo(Uplo); 2862 validateDiag(Diag); 2863 validateTRMM(Element.F32(mRS), Side, TransA, A, B); 2864 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2865 alpha, A.getID(mRS), B.getID(mRS), 0.f, 0, 0, 0, 0, 0); 2866 } 2867 2868 /** 2869 * DTRMM performs one of the matrix-matrix operations 2870 * B := alpha*op(A)*B or B := alpha*B*op(A) 2871 * op(A) is one of op(A) = A or op(A) = A**T 2872 * 2873 * Details: http://www.netlib.org/lapack/explore-html/dd/d19/dtrmm_8f.html 2874 * 2875 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2876 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2877 * @param TransA The type of transpose applied to matrix A. 2878 * @param Diag Specifies whether or not A is unit triangular. 2879 * @param alpha The scalar alpha. 2880 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 2881 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 2882 */ DTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)2883 public void DTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 2884 validateUplo(Uplo); 2885 validateDiag(Diag); 2886 validateTRMM(Element.F64(mRS), Side, TransA, A, B); 2887 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2888 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 2889 } 2890 2891 /** 2892 * CTRMM performs one of the matrix-matrix operations 2893 * B := alpha*op(A)*B or B := alpha*B*op(A) 2894 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 2895 * 2896 * Details: http://www.netlib.org/lapack/explore-html/d4/d9b/ctrmm_8f.html 2897 * 2898 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2899 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2900 * @param TransA The type of transpose applied to matrix A. 2901 * @param Diag Specifies whether or not A is unit triangular. 2902 * @param alpha The scalar alpha. 2903 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 2904 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 2905 */ CTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)2906 public void CTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 2907 validateUplo(Uplo); 2908 validateDiag(Diag); 2909 validateTRMM(Element.F32_2(mRS), Side, TransA, A, B); 2910 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2911 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 2912 } 2913 2914 /** 2915 * ZTRMM performs one of the matrix-matrix operations 2916 * B := alpha*op(A)*B or B := alpha*B*op(A) 2917 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 2918 * 2919 * Details: http://www.netlib.org/lapack/explore-html/d8/de1/ztrmm_8f.html 2920 * 2921 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2922 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2923 * @param TransA The type of transpose applied to matrix A. 2924 * @param Diag Specifies whether or not A is unit triangular. 2925 * @param alpha The scalar alpha. 2926 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 2927 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 2928 */ ZTRMM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)2929 public void ZTRMM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 2930 validateUplo(Uplo); 2931 validateDiag(Diag); 2932 validateTRMM(Element.F64_2(mRS), Side, TransA, A, B); 2933 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrmm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2934 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 2935 } 2936 validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B)2937 static void validateTRSM(Element e, @Side int Side, @Transpose int TransA, Allocation A, Allocation B) { 2938 int adim = -1, bM = -1, bN = -1; 2939 validateSide(Side); 2940 validateTranspose(TransA); 2941 if (!A.getType().getElement().isCompatible(e) || 2942 !B.getType().getElement().isCompatible(e)) { 2943 throw new RSRuntimeException("Called BLAS with wrong Element type"); 2944 } 2945 adim = A.getType().getX(); 2946 if (adim != A.getType().getY()) { 2947 // this may be unnecessary, the restriction could potentially be relaxed 2948 // A needs to contain at least that symmetric matrix but could theoretically be larger 2949 // for now we assume adapters are sufficient, will reevaluate in the future 2950 throw new RSRuntimeException("Called TRSM with a non-symmetric matrix A"); 2951 } 2952 bM = B.getType().getY(); 2953 bN = B.getType().getX(); 2954 if (Side == LEFT) { 2955 // A is M*M 2956 if (adim != bM) { 2957 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 2958 } 2959 } else { 2960 // A is N*N 2961 if (adim != bN) { 2962 throw new RSRuntimeException("Called TRSM with invalid matrix dimensions"); 2963 } 2964 } 2965 } 2966 2967 /** 2968 * STRSM solves one of the matrix equations 2969 * op(A)*X := alpha*B or X*op(A) := alpha*B 2970 * op(A) is one of op(A) = A or op(A) = A**T 2971 * 2972 * Details: http://www.netlib.org/lapack/explore-html/d2/d8b/strsm_8f.html 2973 * 2974 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2975 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2976 * @param TransA The type of transpose applied to matrix A. 2977 * @param Diag Specifies whether or not A is unit triangular. 2978 * @param alpha The scalar alpha. 2979 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32}. 2980 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32}. 2981 */ STRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B)2982 public void STRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, float alpha, Allocation A, Allocation B) { 2983 validateUplo(Uplo); 2984 validateDiag(Diag); 2985 validateTRSM(Element.F32(mRS), Side, TransA, A, B); 2986 mRS.nScriptIntrinsicBLAS_Single(getID(mRS), RsBlas_strsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 2987 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 2988 } 2989 2990 /** 2991 * DTRSM solves one of the matrix equations 2992 * op(A)*X := alpha*B or X*op(A) := alpha*B 2993 * op(A) is one of op(A) = A or op(A) = A**T 2994 * 2995 * Details: http://www.netlib.org/lapack/explore-html/de/da7/dtrsm_8f.html 2996 * 2997 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 2998 * @param Uplo Specifies whether matrix A is upper or lower triangular. 2999 * @param TransA The type of transpose applied to matrix A. 3000 * @param Diag Specifies whether or not A is unit triangular. 3001 * @param alpha The scalar alpha. 3002 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64}. 3003 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64}. 3004 */ DTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B)3005 public void DTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, double alpha, Allocation A, Allocation B) { 3006 validateUplo(Uplo); 3007 validateDiag(Diag); 3008 validateTRSM(Element.F64(mRS), Side, TransA, A, B); 3009 mRS.nScriptIntrinsicBLAS_Double(getID(mRS), RsBlas_dtrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3010 alpha, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0); 3011 } 3012 3013 /** 3014 * CTRSM solves one of the matrix equations 3015 * op(A)*X := alpha*B or X*op(A) := alpha*B 3016 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3017 * 3018 * Details: http://www.netlib.org/lapack/explore-html/de/d30/ctrsm_8f.html 3019 * 3020 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3021 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3022 * @param TransA The type of transpose applied to matrix A. 3023 * @param Diag Specifies whether or not A is unit triangular. 3024 * @param alpha The scalar alpha. 3025 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3026 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3027 */ CTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B)3028 public void CTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Float2 alpha, Allocation A, Allocation B) { 3029 validateUplo(Uplo); 3030 validateDiag(Diag); 3031 validateTRSM(Element.F32_2(mRS), Side, TransA, A, B); 3032 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_ctrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3033 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 3034 } 3035 3036 /** 3037 * ZTRSM solves one of the matrix equations 3038 * op(A)*X := alpha*B or X*op(A) := alpha*B 3039 * op(A) is one of op(A) = A or op(A) = A**T or op(A) = A**H 3040 * 3041 * Details: http://www.netlib.org/lapack/explore-html/d1/d39/ztrsm_8f.html 3042 * 3043 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3044 * @param Uplo Specifies whether matrix A is upper or lower triangular. 3045 * @param TransA The type of transpose applied to matrix A. 3046 * @param Diag Specifies whether or not A is unit triangular. 3047 * @param alpha The scalar alpha. 3048 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3049 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3050 */ ZTRSM(@ide int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B)3051 public void ZTRSM(@Side int Side, @Uplo int Uplo, @Transpose int TransA, @Diag int Diag, Double2 alpha, Allocation A, Allocation B) { 3052 validateUplo(Uplo); 3053 validateDiag(Diag); 3054 validateTRSM(Element.F64_2(mRS), Side, TransA, A, B); 3055 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_ztrsm, TransA, 0, Side, Uplo, Diag, B.getType().getY(), B.getType().getX(), 0, 3056 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), 0, 0, 0, 0, 0, 0, 0); 3057 } 3058 validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C)3059 static void validateHEMM(Element e, @Side int Side, Allocation A, Allocation B, Allocation C) { 3060 validateSide(Side); 3061 3062 if (!A.getType().getElement().isCompatible(e) || 3063 !B.getType().getElement().isCompatible(e) || 3064 !C.getType().getElement().isCompatible(e)) { 3065 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3066 } 3067 3068 // A must be square; can potentially be relaxed similar to TRSM 3069 int adim = A.getType().getX(); 3070 if (adim != A.getType().getY()) { 3071 throw new RSRuntimeException("Called HEMM with non-square A"); 3072 } 3073 if ((Side == LEFT && adim != B.getType().getY()) || 3074 (Side == RIGHT && adim != B.getType().getX())) { 3075 throw new RSRuntimeException("Called HEMM with invalid B"); 3076 } 3077 if (B.getType().getX() != C.getType().getX() || 3078 B.getType().getY() != C.getType().getY()) { 3079 throw new RSRuntimeException("Called HEMM with mismatched B and C"); 3080 } 3081 } 3082 3083 /** 3084 * CHEMM performs one of the matrix-matrix operations 3085 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3086 * 3087 * Details: http://www.netlib.org/lapack/explore-html/d3/d66/chemm_8f.html 3088 * 3089 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3090 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3091 * @param alpha The scalar alpha. 3092 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3093 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3094 * @param beta The scalar beta. 3095 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3096 */ CHEMM(@ide int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C)3097 public void CHEMM(@Side int Side, @Uplo int Uplo, Float2 alpha, Allocation A, Allocation B, Float2 beta, Allocation C) { 3098 validateUplo(Uplo); 3099 validateHEMM(Element.F32_2(mRS), Side, A, B, C); 3100 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_chemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3101 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 3102 } 3103 3104 /** 3105 * ZHEMM performs one of the matrix-matrix operations 3106 * C := alpha*A*B + beta*C or C := alpha*B*A + beta*C 3107 * 3108 * Details: http://www.netlib.org/lapack/explore-html/d6/d3e/zhemm_8f.html 3109 * 3110 * @param Side Specifies whether the symmetric matrix A appears on the left or right. 3111 * @param Uplo Specifies whether the upper or lower triangular part is to be referenced. 3112 * @param alpha The scalar alpha. 3113 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3114 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3115 * @param beta The scalar beta. 3116 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3117 */ ZHEMM(@ide int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C)3118 public void ZHEMM(@Side int Side, @Uplo int Uplo, Double2 alpha, Allocation A, Allocation B, Double2 beta, Allocation C) { 3119 validateUplo(Uplo); 3120 validateHEMM(Element.F64_2(mRS), Side, A, B, C); 3121 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zhemm, 0, 0, Side, Uplo, 0, C.getType().getY(), C.getType().getX(), 0, 3122 alpha.x, alpha.y, A.getID(mRS), B.getID(mRS), beta.x, beta.y, C.getID(mRS), 0, 0, 0, 0); 3123 } 3124 validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C)3125 static void validateHERK(Element e, @Transpose int Trans, Allocation A, Allocation C) { 3126 if (!A.getType().getElement().isCompatible(e) || 3127 !C.getType().getElement().isCompatible(e)) { 3128 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3129 } 3130 validateConjTranspose(Trans); 3131 int cdim = C.getType().getX(); 3132 if (cdim != C.getType().getY()) { 3133 throw new RSRuntimeException("Called HERK with non-square C"); 3134 } 3135 if (Trans == NO_TRANSPOSE) { 3136 if (cdim != A.getType().getY()) { 3137 throw new RSRuntimeException("Called HERK with invalid A"); 3138 } 3139 } else { 3140 if (cdim != A.getType().getX()) { 3141 throw new RSRuntimeException("Called HERK with invalid A"); 3142 } 3143 } 3144 } 3145 3146 /** 3147 * CHERK performs one of the hermitian rank k operations 3148 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3149 * 3150 * Details: http://www.netlib.org/lapack/explore-html/d8/d52/cherk_8f.html 3151 * 3152 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3153 * @param Trans The type of transpose applied to the operation. 3154 * @param alpha The scalar alpha. 3155 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3156 * @param beta The scalar beta. 3157 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3158 */ CHERK(@plo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C)3159 public void CHERK(@Uplo int Uplo, @Transpose int Trans, float alpha, Allocation A, float beta, Allocation C) { 3160 validateUplo(Uplo); 3161 validateHERK(Element.F32_2(mRS), Trans, A, C); 3162 int k = 0; 3163 if (Trans == CONJ_TRANSPOSE) { 3164 k = A.getType().getY(); 3165 } else { 3166 k = A.getType().getX(); 3167 } 3168 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 3169 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); 3170 } 3171 3172 /** 3173 * ZHERK performs one of the hermitian rank k operations 3174 * C := alpha*A*A**H + beta*C or C := alpha*A**H*A + beta*C 3175 * 3176 * Details: http://www.netlib.org/lapack/explore-html/d1/db1/zherk_8f.html 3177 * 3178 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3179 * @param Trans The type of transpose applied to the operation. 3180 * @param alpha The scalar alpha. 3181 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3182 * @param beta The scalar beta. 3183 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3184 */ ZHERK(@plo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C)3185 public void ZHERK(@Uplo int Uplo, @Transpose int Trans, double alpha, Allocation A, double beta, Allocation C) { 3186 validateUplo(Uplo); 3187 validateHERK(Element.F64_2(mRS), Trans, A, C); 3188 int k = 0; 3189 if (Trans == CONJ_TRANSPOSE) { 3190 k = A.getType().getY(); 3191 } else { 3192 k = A.getType().getX(); 3193 } 3194 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zherk, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, 3195 alpha, 0, A.getID(mRS), 0, beta, 0, C.getID(mRS), 0, 0, 0, 0); 3196 } 3197 validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C)3198 static void validateHER2K(Element e, @Transpose int Trans, Allocation A, Allocation B, Allocation C) { 3199 if (!A.getType().getElement().isCompatible(e) || 3200 !B.getType().getElement().isCompatible(e) || 3201 !C.getType().getElement().isCompatible(e)) { 3202 throw new RSRuntimeException("Called BLAS with wrong Element type"); 3203 } 3204 validateConjTranspose(Trans); 3205 int cdim = C.getType().getX(); 3206 if (cdim != C.getType().getY()) { 3207 throw new RSRuntimeException("Called HER2K with non-square C"); 3208 } 3209 if (Trans == NO_TRANSPOSE) { 3210 if (A.getType().getY() != cdim) { 3211 throw new RSRuntimeException("Called HER2K with invalid matrices"); 3212 } 3213 } else { 3214 if (A.getType().getX() != cdim) { 3215 throw new RSRuntimeException("Called HER2K with invalid matrices"); 3216 } 3217 } 3218 if (A.getType().getX() != B.getType().getX() || A.getType().getY() != B.getType().getY()) { 3219 throw new RSRuntimeException("Called HER2K with invalid A and B matrices"); 3220 } 3221 } 3222 3223 /** 3224 * CHER2K performs one of the hermitian rank 2k operations 3225 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 3226 * 3227 * Details: http://www.netlib.org/lapack/explore-html/d1/d82/cher2k_8f.html 3228 * 3229 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3230 * @param Trans The type of transpose applied to the operation. 3231 * @param alpha The scalar alpha. 3232 * @param A The input allocation contains matrix A, supported elements type {@link Element#F32_2}. 3233 * @param B The input allocation contains matrix B, supported elements type {@link Element#F32_2}. 3234 * @param beta The scalar beta. 3235 * @param C The input allocation contains matrix C, supported elements type {@link Element#F32_2}. 3236 */ CHER2K(@plo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C)3237 public void CHER2K(@Uplo int Uplo, @Transpose int Trans, Float2 alpha, Allocation A, Allocation B, float beta, Allocation C) { 3238 validateUplo(Uplo); 3239 validateHER2K(Element.F32_2(mRS), Trans, A, B, C); 3240 int k = 0; 3241 if (Trans == NO_TRANSPOSE) { 3242 k = A.getType().getX(); 3243 } else { 3244 k = A.getType().getY(); 3245 } 3246 mRS.nScriptIntrinsicBLAS_Complex(getID(mRS), RsBlas_cher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 3247 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); 3248 } 3249 3250 /** 3251 * ZHER2K performs one of the hermitian rank 2k operations 3252 * C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C or C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C 3253 * 3254 * Details: http://www.netlib.org/lapack/explore-html/d7/dfa/zher2k_8f.html 3255 * 3256 * @param Uplo Specifies whether the upper or lower triangular part of C is to be referenced. 3257 * @param Trans The type of transpose applied to the operation. 3258 * @param alpha The scalar alpha. 3259 * @param A The input allocation contains matrix A, supported elements type {@link Element#F64_2}. 3260 * @param B The input allocation contains matrix B, supported elements type {@link Element#F64_2}. 3261 * @param beta The scalar beta. 3262 * @param C The input allocation contains matrix C, supported elements type {@link Element#F64_2}. 3263 */ ZHER2K(@plo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C)3264 public void ZHER2K(@Uplo int Uplo, @Transpose int Trans, Double2 alpha, Allocation A, Allocation B, double beta, Allocation C) { 3265 validateUplo(Uplo); 3266 validateHER2K(Element.F64_2(mRS), Trans, A, B, C); 3267 int k = 0; 3268 if (Trans == NO_TRANSPOSE) { 3269 k = A.getType().getX(); 3270 } else { 3271 k = A.getType().getY(); 3272 } 3273 mRS.nScriptIntrinsicBLAS_Z(getID(mRS), RsBlas_zher2k, Trans, 0, 0, Uplo, 0, 0, C.getType().getX(), k, alpha.x, alpha.y, 3274 A.getID(mRS), B.getID(mRS), beta, 0, C.getID(mRS), 0, 0, 0, 0); 3275 } 3276 3277 3278 /** 3279 * 8-bit GEMM-like operation for neural networks: C = A * Transpose(B) 3280 * Calculations are done in 1.10.21 fixed-point format for the final output, 3281 * just before there's a shift down to drop the fractional parts. The output 3282 * values are gated to 0 to 255 to fit in a byte, but the 10-bit format 3283 * gives some headroom to avoid wrapping around on small overflows. 3284 * 3285 * @param A The input allocation contains matrix A, supported elements type {@link Element#U8}. 3286 * @param a_offset The offset for all values in matrix A, e.g A[i,j] = A[i,j] - a_offset. Value should be from 0 to 255. 3287 * @param B The input allocation contains matrix B, supported elements type {@link Element#U8}. 3288 * @param b_offset The offset for all values in matrix B, e.g B[i,j] = B[i,j] - b_offset. Value should be from 0 to 255. 3289 * @param C The input allocation contains matrix C, supported elements type {@link Element#U8}. 3290 * @param c_offset The offset for all values in matrix C. 3291 * @param c_mult The multiplier for all values in matrix C, e.g C[i,j] = (C[i,j] + c_offset) * c_mult. 3292 **/ BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult)3293 public void BNNM(Allocation A, int a_offset, Allocation B, int b_offset, Allocation C, int c_offset, int c_mult) { 3294 validateL3(Element.U8(mRS), NO_TRANSPOSE, TRANSPOSE, 0, A, B, C); 3295 3296 if (a_offset < 0 || a_offset > 255) { 3297 throw new RSRuntimeException("Invalid a_offset passed to BNNM"); 3298 } 3299 if (b_offset < 0 || b_offset > 255) { 3300 throw new RSRuntimeException("Invalid b_offset passed to BNNM"); 3301 } 3302 int M = -1, N = -1, K = -1; 3303 M = A.getType().getY(); 3304 N = B.getType().getY(); 3305 K = A.getType().getX(); 3306 3307 3308 mRS.nScriptIntrinsicBLAS_BNNM(getID(mRS), M, N, K, A.getID(mRS), a_offset, B.getID(mRS), b_offset, C.getID(mRS), c_offset, c_mult); 3309 3310 } 3311 3312 } 3313