Changes
Page history
Update BLAS
authored
Mar 23, 2023
by
Alfredo Correa
Show whitespace changes
Inline
Side-by-side
BLAS.md
View page @
7b58ee5f
...
...
@@ -11,7 +11,7 @@ All these operations are now supported for CPU and GPU memory, real and complex.
| DOT |
`blas::dot(x, y, res)`
| $
`r = \sum_i x_i y_i`
$ |
`res = (x, y);`
|
`res = blas::dot(x, y)`
|
`inner_product(begin(x), end(x), begin(y), T{});`
|
| |
`blas::dot(blas::C(x), y, res)`
| $
`r = \sum_i \bar x_i y_i`
$ |
`res = (*x, y);`
|
`res = blas::dot(blas::C(x), y)`
|
`inner_product(begin(x), end(x), begin(y), T{}, plus<>{}, [](T const& t1, T const& t2) {return conj(t1)*t2;});`
|
| |
`blas::dot(x, blas::C(y), res)`
| $
`r = \sum_i x_i \bar y_i`
$ |
`res = (x, *y);`
|
`res = blas::dot(x, blas::C(y));`
|
`inner_product(x.begin(), x.end(), y.begin(), T{}, plus<>{}, [](T const& t1, T const& t2) {return t1*conj(t2);});`
|
| | ~~
`blas::dot(
x
, blas::C(y), res)`
~~ | $
`r = \sum_i \bar x_i \bar y_i`
$ not implemented in BLAS, conjugate result | | |
`auto res = conj(inner_product(x.begin(), x.end(), y.begin(), T{});`
|
| | ~~
`blas::dot(
blas::C(x)
, blas::C(y), res)`
~~ | $
`r = \sum_i \bar x_i \bar y_i`
$ not implemented in BLAS, conjugate result | | |
`auto res = conj(inner_product(x.begin(), x.end(), y.begin(), T{});`
|
| GEMV |
`blas::gemv(aa, A, x, bb, y)`
| $
`y_i \leftarrow \alpha\sum_j A_{ij}x_j + \beta y_i`
$ |
`y=A%x`
`y=aa*A%x`
`y+=A%x`
`y+=aa*A%x`
|
`y=blas::gemv(aa, A, x)`
`y+=blas::gemv(aa, A, x)`
|
`transform(begin(A), end(A), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(Ac, x);})`
|
| |
`blas::gemv(aa, blas::T(A), x, bb, y)`
| $
`y_i \leftarrow \alpha\sum_j A_{ji}x_j + \beta y_i`
$ |
`y= ~A % x`
`y=aa*(~A)%x`
`y+=(~A)%x`
`y+=aa*(~A)%x`
|
`y=blas::gemv(aa, blas::T(A), x)`
`y+=blas::gemv(aa, blas::T(A), x)`
|
`transform(begin(transposed(A)), end(transposed(A)), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(Ac, x);})`
|
| |
`blas::gemv(aa, blas::J(A), x, bb, y)`
| $
`y_i \leftarrow \alpha\sum_j A_{ij}^*x_j + \beta y_i`
$ |
`y= *A % x`
`y=aa*(*A)%x`
`y+=(*A)%x`
`y+=aa*(*A)%x`
|
`y=blas::gemv(aa, blas::J(A), x)`
`y+=blas::gemv(aa, blas::J(A), x)`
|
`transform(begin(A), end(A), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(*Ac, x);})`
|
...
...
@@ -21,7 +21,7 @@ All these operations are now supported for CPU and GPU memory, real and complex.
| |
`blas::gemm(aa, blas::T(A), B, bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{kj} + \beta C_{ij}`
$ |
`C =~A * B`
`C = aa*(~A * B)`
`C+=~A * B`
`C+=aa*(~A * B)`
|
`C = blas::gemm(aa, blas::T(A), B, bb, C)`
(or
`+=`
) |
`transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr));})`
|
| |
`blas::gemm(aa, blas::T(A), blas::T(B), bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{jk} + \beta C_{ij}`
$ |
`C =~A * ~B`
`C = aa*(~A * ~B)`
`C+=~A * ~B`
`C+=aa*(~A * ~B)`
|
`C = blas::gemm(aa, blas::T(A), blas::T(B), bb, C)`
(or
`+=`
) |
`transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, std::move(Cr));})`
|
| |
<s>
`blas::gemm(aa, A, blas::J(B), bb, C)`
</s>
(use
`blas::gemm(..., blas::T(B), blas::H(A), ..., HC)`
and conjtranspose result) | $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj}^* + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(Ar, blas::C(Bc)) + bb*c;}); return std::move(Cr);});`
|
| |
<s>
`blas::gemm(aa, blas::J(A), B, bb, C)`
</s>
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ik}^* B_{kj} + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), Bc) + bb*c;}); return std::move(Cr);});`
|
| |
~~
`blas::gemm(aa, blas::J(A), B, bb, C)`
~~
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ik}^* B_{kj} + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), Bc) + bb*c;}); return std::move(Cr);});`
|
| |
<s>
`blas::gemm(aa, blas::J(A), blas::J(B), bb, C)`
</s>
| $
`C_{ij} \leftarrow \alpha \sum_k \bar{A_{ik}} \bar{B_{kj}} + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), blas::C(Bc)) + bb*c;}); return std::move(Cr);});`
|
| |
`blas::gemm(aa, A, blas::H(B), bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} \bar B_{jk} + \beta C_{ij}`
$ |
`C = aa*(A* ~*B)`
(or
`+=`
) |
`C = blas::gemm(aa, A, blas::H(B))`
`C += blas::gemm(aa, A, blas::H(B))`
|
`transform(begin(A), end(A), begin(CC), begin(CC), [&](auto const& Ar, auto&& Cr){return blas::gemv(aa, blas::J(B), Ar, bb, move(Cr));})`
|
| |
`blas::gemm(aa, blas::H(A), B, bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k \bar A_{ki} B_{kj} + \beta C_{ij}`
$ |
`CC=~*A *B`
|
`C=blas::gemm(aa, blas::H(A), B)`
|
`transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BT=transposed(B)](auto const& Ac, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c){return aa*blas::dot(blas::C(Ac), Bc) + bb*c;}); return move(Cr);})`
|