Update BLAS authored by Alfredo Correa's avatar Alfredo Correa
......@@ -11,7 +11,7 @@ All these operations are now supported for CPU and GPU memory, real and complex.
| DOT | `blas::dot(x, y, res)` | $`r = \sum_i x_i y_i`$ | `res = (x, y);` | `res = blas::dot(x, y)` | `inner_product(begin(x), end(x), begin(y), T{});` |
| | `blas::dot(blas::C(x), y, res)` | $`r = \sum_i \bar x_i y_i`$ | `res = (*x, y);` | `res = blas::dot(blas::C(x), y)` | `inner_product(begin(x), end(x), begin(y), T{}, plus<>{}, [](T const& t1, T const& t2) {return conj(t1)*t2;});` |
| | `blas::dot(x, blas::C(y), res)` | $`r = \sum_i x_i \bar y_i`$ | `res = (x, *y);` | `res = blas::dot(x, blas::C(y));` | `inner_product(x.begin(), x.end(), y.begin(), T{}, plus<>{}, [](T const& t1, T const& t2) {return t1*conj(t2);});` |
| | ~~`blas::dot(x, blas::C(y), res)`~~ | $`r = \sum_i \bar x_i \bar y_i`$ not implemented in BLAS, conjugate result | | | `auto res = conj(inner_product(x.begin(), x.end(), y.begin(), T{});` |
| | ~~`blas::dot(blas::C(x), blas::C(y), res)`~~ | $`r = \sum_i \bar x_i \bar y_i`$ not implemented in BLAS, conjugate result | | | `auto res = conj(inner_product(x.begin(), x.end(), y.begin(), T{});` |
| GEMV | `blas::gemv(aa, A, x, bb, y)` | $`y_i \leftarrow \alpha\sum_j A_{ij}x_j + \beta y_i`$ | `y=A%x` `y=aa*A%x` `y+=A%x` `y+=aa*A%x` | `y=blas::gemv(aa, A, x)` `y+=blas::gemv(aa, A, x)` | `transform(begin(A), end(A), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(Ac, x);})` |
| | `blas::gemv(aa, blas::T(A), x, bb, y)` | $`y_i \leftarrow \alpha\sum_j A_{ji}x_j + \beta y_i`$ | `y= ~A % x` `y=aa*(~A)%x` `y+=(~A)%x` `y+=aa*(~A)%x` | `y=blas::gemv(aa, blas::T(A), x)` `y+=blas::gemv(aa, blas::T(A), x)` | `transform(begin(transposed(A)), end(transposed(A)), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(Ac, x);})` |
| | `blas::gemv(aa, blas::J(A), x, bb, y)` | $`y_i \leftarrow \alpha\sum_j A_{ij}^*x_j + \beta y_i`$ | `y= *A % x` `y=aa*(*A)%x` `y+=(*A)%x` `y+=aa*(*A)%x` | `y=blas::gemv(aa, blas::J(A), x)` `y+=blas::gemv(aa, blas::J(A), x)` | `transform(begin(A), end(A), begin(y), [&x, aa] (auto const& Ac) {return aa*blas::dot(*Ac, x);})` |
......@@ -21,7 +21,7 @@ All these operations are now supported for CPU and GPU memory, real and complex.
| | `blas::gemm(aa, blas::T(A), B, bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{kj} + \beta C_{ij}`$ | `C =~A * B` `C = aa*(~A * B)` `C+=~A * B` `C+=aa*(~A * B)` | `C = blas::gemm(aa, blas::T(A), B, bb, C)` (or `+=`) | `transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, blas::T(B), Ar, bb, std::move(Cr));})` |
| | `blas::gemm(aa, blas::T(A), blas::T(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ki} B_{jk} + \beta C_{ij}`$ | `C =~A * ~B` `C = aa*(~A * ~B)` `C+=~A * ~B` `C+=aa*(~A * ~B)` | `C = blas::gemm(aa, blas::T(A), blas::T(B), bb, C)` (or `+=`) | `transform(begin(transposed(A)), end(transposed(A)), begin(C), begin(C), [&B, aa, bb] (auto const& Ar, auto&& Cr) {return blas::gemv(aa, B, Ar, bb, std::move(Cr));})` |
| | <s>`blas::gemm(aa, A, blas::J(B), bb, C)`</s> (use `blas::gemm(..., blas::T(B), blas::H(A), ..., HC)` and conjtranspose result) | $`C_{ij} \leftarrow \alpha \sum_k A_{ik} B_{kj}^* + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(Ar, blas::C(Bc)) + bb*c;}); return std::move(Cr);});` |
| | <s>`blas::gemm(aa, blas::J(A), B, bb, C)`</s> | $`C_{ij} \leftarrow \alpha \sum_k A_{ik}^* B_{kj} + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), Bc) + bb*c;}); return std::move(Cr);});` |
| | ~~`blas::gemm(aa, blas::J(A), B, bb, C)`~~ | $`C_{ij} \leftarrow \alpha \sum_k A_{ik}^* B_{kj} + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), Bc) + bb*c;}); return std::move(Cr);});` |
| | <s>`blas::gemm(aa, blas::J(A), blas::J(B), bb, C)`</s> | $`C_{ij} \leftarrow \alpha \sum_k \bar{A_{ik}} \bar{B_{kj}} + \beta C_{ij}`$ (not BLAS-implemented) | | | `transform(begin(A), end(A), begin(C), begin(C), [BT=transposed(B)](auto const& Ar, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ar](auto const& Bc, auto&& c) {return aa*blas::dot(blas::C(Ar), blas::C(Bc)) + bb*c;}); return std::move(Cr);});` |
| | `blas::gemm(aa, A, blas::H(B), bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k A_{ik} \bar B_{jk} + \beta C_{ij}`$ | `C = aa*(A* ~*B)` (or `+=`) | `C = blas::gemm(aa, A, blas::H(B))` `C += blas::gemm(aa, A, blas::H(B))` | `transform(begin(A), end(A), begin(CC), begin(CC), [&](auto const& Ar, auto&& Cr){return blas::gemv(aa, blas::J(B), Ar, bb, move(Cr));})` |
| | `blas::gemm(aa, blas::H(A), B, bb, C)` | $`C_{ij} \leftarrow \alpha \sum_k \bar A_{ki} B_{kj} + \beta C_{ij}`$ | `CC=~*A *B` | `C=blas::gemm(aa, blas::H(A), B)` | `transform(begin(transposed(A)), end(transposed(A)), begin(CC), begin(CC), [BT=transposed(B)](auto const& Ac, auto&& Cr) {transform(begin(BT), end(BT), begin(Cr), begin(Cr), [&Ac](auto const& Bc, auto&& c){return aa*blas::dot(blas::C(Ac), Bc) + bb*c;}); return move(Cr);})` |