Changes
Page history
Update BLAS
authored
Mar 22, 2023
by
Alfredo Correa
Show whitespace changes
Inline
Side-by-side
BLAS.md
View page @
471de97a
...
...
@@ -4,8 +4,9 @@ All these operations are now supported for CPU and GPU memory, real and complex.
|--- |--- | --- | --- | --- | --- |
| SWAP |
`blas::swap(x, y)`
| $
`x_i \leftrightarrow y_i`
$ |
`(x^y)`
| |
`swap_ranges(begin(x), end(x), begin(y))`
|
| COPY |
`blas::copy(x, y)`
| $
`y_i \leftrightarrow x_i`
$ |
`y << x`
|
`y = blas::copy(x)`
|
`copy(begin(x), end(x), begin(y))`
|
| ASUM |
`blas::asum(x, res)`
| $
`r \leftarrow \sum_i \|\Re x_i\| + \|\Im x_i\|`
$ |
`x == 0`
or
`x != 0`
|
`res = blas::asum(x)`
|
`transform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return abs(e.real()) + abs(e.imag());})`
|
| NRM2 |
`blas::nrm2(x, res)`
| $
`r \leftarrow \sqrt{\sum_i \|x_i\|^2}`
$ |
`abs(x)`
|
`res = blas::nrm2(x);`
|
`sqrt(trasnform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return norm(e);}));`
|
| ASUM |
`blas::asum(x, res)`
| $
`r \leftarrow \sum_i \|\Re x_i\| + \|\Im x_i\|`
$ |
`x==0`
/
`x!=0`
/
`isinf(x)`
/
`isnan(x)`
|
`res = blas::asum(x)`
|
`transform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return abs(e.real()) + abs(e.imag());})`
|
| NRM2 |
`blas::
nrm2(x, res)`
| $
`r \leftarrow \sqrt{\sum_i \|x_i\|^2}`
$ |
`abs(x)`
|
`res = blas::nrm2(x);`
|
`sqrt(trasnform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return norm(e);}));`
|
| SCAL |
`blas::scal(aa, x);`
| $
`x_i \leftarrow \alpha x_i`
$ |
`x*=aa;`
| |
`for_each(begin(x), end(x), [aa](auto& e){return e*=aa;})`
|
| AXPY |
`blas::axpy(aa, x, y)`
| $
`y_i \leftarrow \alpha x_i + y_i`
$ |
`y+=x`
`y-=x`
`y+=aa*x`
`y-=aa*x`
| |
`transform(x.begin(), x.end(), y.begin(), y.begin(), [aa](auto ex, auto ey) {return aa*ex + ey;}`
|
| DOT |
`blas::dot(x, y, res)`
| $
`r = \sum_i x_i y_i`
$ |
`res = (x, y);`
|
`res = blas::dot(x, y)`
|
`inner_product(begin(x), end(x), begin(y), T{});`
|
...
...
@@ -20,4 +21,4 @@ All these operations are now supported for CPU and GPU memory, real and complex.
| |
`blas::gemm(aa, A, blas::T(B), bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{jk} + \beta C_{ij}`
$ |
`C = aa*(A* ~B)`
|
`C = blas::gemm(aa, A, blas::T(B))`
`C += blas::gemm(aa, A, blas::T(B))`
|
`transform(begin(A), end(A), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, move(Cr));})`
|
| |
`blas::gemm(aa, blas::T(A), B, bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{kj} + \beta C_{ij}`
$ |
`C =~A * B`
`C = aa*(~A * B)`
`C+=~A * B`
`C+=aa*(~A * B)`
|
`C = blas::gemm(aa, blas::T(A), B, bb, C)`
(or
`+=`
) |
`transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr));})`
|
| |
`blas::gemm(aa, blas::T(A), blas::T(B), bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{jk} + \beta C_{ij}`
$ |
`C =~A * ~B`
`C = aa*(~A * ~B)`
`C+=~A * ~B`
`C+=aa*(~A * ~B)`
|
`C = blas::gemm(aa, blas::T(A), blas::T(B), bb, C)`
(or
`+=`
) |
`transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, std::move(Cr));})`
|
| |
<s>
`blas::gemm(aa, A, blas::J(B), bb, C)`
</s>
(use
`blas::gemm(..., blas::T(B), blas::H(A), ..., C
_copy
)`
and conjtranspose result) | $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj}^* + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& BCr, auto&& Ce) {return aa*blas::dot(Ar, blas::C(BCr)) + bb*Ce;}); return std::move(Cr);});`
|
| |
<s>
`blas::gemm(aa, A, blas::J(B), bb, C)`
</s>
(use
`blas::gemm(..., blas::T(B), blas::H(A), ...,
H
C)`
and conjtranspose result) | $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj}^* + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& BCr, auto&& Ce) {return aa*blas::dot(Ar, blas::C(BCr)) + bb*Ce;}); return std::move(Cr);});`
|