Skip to content
GitLab
Menu
Why GitLab
Pricing
Contact Sales
Explore
Why GitLab
Pricing
Contact Sales
Explore
Sign in
Get free trial
Changes
Page history
Update BLAS
authored
Mar 22, 2023
by
Alfredo Correa
Show whitespace changes
Inline
Side-by-side
BLAS.md
View page @
471de97a
...
...
@@ -4,8 +4,9 @@ All these operations are now supported for CPU and GPU memory, real and complex.
|--- |--- | --- | --- | --- | --- |
| SWAP |
`blas::swap(x, y)`
| $
`x_i \leftrightarrow y_i`
$ |
`(x^y)`
| |
`swap_ranges(begin(x), end(x), begin(y))`
|
| COPY |
`blas::copy(x, y)`
| $
`y_i \leftrightarrow x_i`
$ |
`y << x`
|
`y = blas::copy(x)`
|
`copy(begin(x), end(x), begin(y))`
|
| ASUM |
`blas::asum(x, res)`
| $
`r \leftarrow \sum_i \|\Re x_i\| + \|\Im x_i\|`
$ |
`x == 0`
or
`x != 0`
|
`res = blas::asum(x)`
|
`transform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return abs(e.real()) + abs(e.imag());})`
|
| NRM2 |
`blas::nrm2(x, res)`
| $
`r \leftarrow \sqrt{\sum_i \|x_i\|^2}`
$ |
`abs(x)`
|
`res = blas::nrm2(x);`
|
`sqrt(trasnform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return norm(e);}));`
|
| ASUM |
`blas::asum(x, res)`
| $
`r \leftarrow \sum_i \|\Re x_i\| + \|\Im x_i\|`
$ |
`x==0`
/
`x!=0`
/
`isinf(x)`
/
`isnan(x)`
|
`res = blas::asum(x)`
|
`transform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return abs(e.real()) + abs(e.imag());})`
|
| NRM2 |
`blas::
nrm2(x, res)`
| $
`r \leftarrow \sqrt{\sum_i \|x_i\|^2}`
$ |
`abs(x)`
|
`res = blas::nrm2(x);`
|
`sqrt(trasnform_reduce(begin(x), end(x), 0.0, plus<>{}, [](auto const& e){return norm(e);}));`
|
| SCAL |
`blas::scal(aa, x);`
| $
`x_i \leftarrow \alpha x_i`
$ |
`x*=aa;`
| |
`for_each(begin(x), end(x), [aa](auto& e){return e*=aa;})`
|
| AXPY |
`blas::axpy(aa, x, y)`
| $
`y_i \leftarrow \alpha x_i + y_i`
$ |
`y+=x`
`y-=x`
`y+=aa*x`
`y-=aa*x`
| |
`transform(x.begin(), x.end(), y.begin(), y.begin(), [aa](auto ex, auto ey) {return aa*ex + ey;}`
|
| DOT |
`blas::dot(x, y, res)`
| $
`r = \sum_i x_i y_i`
$ |
`res = (x, y);`
|
`res = blas::dot(x, y)`
|
`inner_product(begin(x), end(x), begin(y), T{});`
|
...
...
@@ -20,4 +21,4 @@ All these operations are now supported for CPU and GPU memory, real and complex.
| |
`blas::gemm(aa, A, blas::T(B), bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{jk} + \beta C_{ij}`
$ |
`C = aa*(A* ~B)`
|
`C = blas::gemm(aa, A, blas::T(B))`
`C += blas::gemm(aa, A, blas::T(B))`
|
`transform(begin(A), end(A), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, move(Cr));})`
|
| |
`blas::gemm(aa, blas::T(A), B, bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{kj} + \beta C_{ij}`
$ |
`C =~A * B`
`C = aa*(~A * B)`
`C+=~A * B`
`C+=aa*(~A * B)`
|
`C = blas::gemm(aa, blas::T(A), B, bb, C)`
(or
`+=`
) |
`transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr));})`
|
| |
`blas::gemm(aa, blas::T(A), blas::T(B), bb, C)`
| $
`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{jk} + \beta C_{ij}`
$ |
`C =~A * ~B`
`C = aa*(~A * ~B)`
`C+=~A * ~B`
`C+=aa*(~A * ~B)`
|
`C = blas::gemm(aa, blas::T(A), blas::T(B), bb, C)`
(or
`+=`
) |
`transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, std::move(Cr));})`
|
| |
<s>
`blas::gemm(aa, A, blas::J(B), bb, C)`
</s>
(use
`blas::gemm(..., blas::T(B), blas::H(A), ..., C
_copy
)`
and conjtranspose result) | $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj}^* + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& BCr, auto&& Ce) {return aa*blas::dot(Ar, blas::C(BCr)) + bb*Ce;}); return std::move(Cr);});`
|
| |
<s>
`blas::gemm(aa, A, blas::J(B), bb, C)`
</s>
(use
`blas::gemm(..., blas::T(B), blas::H(A), ...,
H
C)`
and conjtranspose result) | $
`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj}^* + \beta C_{ij}`
$ (not BLAS-implemented) | | |
`transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& BCr, auto&& Ce) {return aa*blas::dot(Ar, blas::C(BCr)) + bb*Ce;}); return std::move(Cr);});`
|