27 gemm_nn(
const int M,
const int N,
const int K,
const double ALPHA,
28 const double *A,
const int lda,
const double *B,
const int ldb,
29 double *C,
const int ldc)
31 for (
int i = 0; i < M; ++i) {
32 for (
int k = 0; k < K; ++k) {
33 const double A_PART = ALPHA * A[i * lda + k];
34 for (
int j = 0; j < N; ++j) {
35 C[i * ldc + j] += A_PART * B[k * ldb + j];
42 gemm_nt(
const int M,
const int N,
const int K,
const double ALPHA,
43 const double *A,
const int lda,
const double *B,
const int ldb,
44 double *C,
const int ldc)
46 for (
int i = 0; i < M; ++i) {
47 for (
int j = 0; j < N; ++j) {
49 for (
int k = 0; k < K; ++k) {
50 sum += ALPHA * A[i * lda + k] * B[j * ldb + k];
52 C[i * ldc + j] += sum;
58 gemm_tn(
const int M,
const int N,
const int K,
const double ALPHA,
59 const double *A,
const int lda,
const double *B,
const int ldb,
60 double *C,
const int ldc)
62 for (
int i = 0; i < M; ++i) {
63 for (
int k = 0; k < K; ++k) {
64 const double A_PART = ALPHA * A[k * lda + i];
65 for (
int j = 0; j < N; ++j) {
66 C[i * ldc + j] += A_PART * B[k * ldb + j];
73 gemm_tt(
const int M,
const int N,
const int K,
const double ALPHA,
74 const double *A,
const int lda,
const double *B,
const int ldb,
75 double *C,
const int ldc)
77 for (
int i = 0; i < M; ++i) {
78 for (
int j = 0; j < N; ++j) {
80 for (
int k = 0; k < K; ++k) {
81 sum += ALPHA * A[i + k * lda] * B[k + j * ldb];
83 C[i * ldc + j] += sum;
108 blas_gemm(
const int TA,
const int TB,
const int M,
const int N,
const int K,
109 const double ALPHA,
const double *A,
const int lda,
const double *B,
110 const int ldb,
const double BETA,
double *C,
const int ldc)
112 for (
int i = 0; i < M; ++i) {
113 for (
int j = 0; j < N; ++j) {
114 C[i * ldc + j] *= BETA;
118 gemm_nn(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
119 }
else if (TA && !TB) {
120 gemm_tn(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
121 }
else if (!TA && TB) {
122 gemm_nt(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
124 gemm_tt(M, N, K, ALPHA, A, lda, B, ldb, C, ldc);
138 blas_axpy(
const int N,
const double ALPHA,
const double *X,
const int INCX,
139 double *Y,
const int INCY)
142 for (
int i = 0; i < N; ++i) {
143 Y[i * INCY] += ALPHA * X[i * INCX];
146 for (
int i = 0; i < N; ++i) {
147 Y[i * INCY] += X[i * INCX];
160 blas_scal(
const int N,
const double ALPHA,
double *X,
const int INCX)
163 for (
int i = 0; i < N; ++i) {
164 X[i * INCX] *= ALPHA;
167 for (
int i = 0; i < N; ++i) {
181 blas_fill(
const int N,
const double ALPHA,
double *X,
const int INCX)
183 for (
int i = 0; i < N; ++i) {
198 blas_dot(
const int N,
const double *X,
const int INCX,
const double *Y,
202 for (
int i = 0; i < N; ++i) {
203 dot += X[i * INCX] * Y[i * INCY];
217 blas_mul(
const int N,
const double *X,
const int INCX,
double *Y,
220 for (
int i = 0; i < N; ++i) {
221 Y[i * INCY] *= X[i * INCX];
235 for (
int i = 0; i < N; ++i) {
static void gemm_nn(const int M, const int N, const int K, const double ALPHA, const double *A, const int lda, const double *B, const int ldb, double *C, const int ldc)
static void gemm_tn(const int M, const int N, const int K, const double ALPHA, const double *A, const int lda, const double *B, const int ldb, double *C, const int ldc)
void blas_fill(const int N, const double ALPHA, double *X, const int INCX)
Fills the vector X with the value ALPHA.
void blas_scal(const int N, const double ALPHA, double *X, const int INCX)
Scales vector X by the scalar ALPHA and overwrites it with the result.
double blas_sum(const double *X, const int N)
Returns the sum of the vector X.
double blas_dot(const int N, const double *X, const int INCX, const double *Y, const int INCY)
Computes the dot product of two vectors.
static void gemm_nt(const int M, const int N, const int K, const double ALPHA, const double *A, const int lda, const double *B, const int ldb, double *C, const int ldc)
static void gemm_tt(const int M, const int N, const int K, const double ALPHA, const double *A, const int lda, const double *B, const int ldb, double *C, const int ldc)
void blas_axpy(const int N, const double ALPHA, const double *X, const int INCX, double *Y, const int INCY)
Multiplies vector X by the scalar ALPHA and adds it to the vector Y.
void blas_mul(const int N, const double *X, const int INCX, double *Y, const int INCY)
Multiplies vector X by the vector Y and stores the result in vector Y.
void blas_gemm(const int TA, const int TB, const int M, const int N, const int K, const double ALPHA, const double *A, const int lda, const double *B, const int ldb, const double BETA, double *C, const int ldc)
Performs the matrix-matrix multiplication: .
Basic linear algebra functions.