...
 
Commits (2)
#include "CDecoder_MS_fixed_flooded.h"
#include <xmmintrin.h>
#include <emmintrin.h>
#include <smmintrin.h>
CDecoder_MS_fixed_flooded::CDecoder_MS_fixed_flooded()
{
const size_t nb_edges = _M;
......@@ -91,14 +95,80 @@ void CDecoder_MS_fixed_flooded::decode(
}
}
/* TODO do not use stack */
char tmpVn[cn_deg_max][nb_cn];
/* SSE constants */
const __m128i vzero = _mm_set1_epi8(0);
const __m128i vone = _mm_set1_epi8(0xff);
/* Loop a fixed number of iterations */
while(nombre_iterations--) {
for(size_t cn_idx = 0; cn_idx < DEG_1_COMPUTATIONS; cn_idx++) {
int cn_offset = cn_idx*cn_deg_max;
for(size_t edge_idx = 0; edge_idx < DEG_1; edge_idx++)
tmpVn[edge_idx][cn_idx] = *p_vn_addr[cn_offset + edge_idx];
}
/* Check node updates.
* For each check node, send a message containing a new estimate
* to each of its neighbouring variable node.
*/
for(size_t cn_idx = 0; cn_idx < DEG_1_COMPUTATIONS; cn_idx++) {
for(size_t cn_idx = 0; cn_idx < DEG_1_COMPUTATIONS; cn_idx+=16) {
__m128i vsign_global = _mm_set1_epi8(0);
__m128i vmin_llr1 = _mm_set1_epi8(vSAT_POS_MSG);
__m128i vmin_llr2 = _mm_set1_epi8(vSAT_POS_MSG);
__m128i vabs[DEG_1];
/* We compute a different estimate (the message sent), for each
* neighbouring VN */
for(size_t edge_idx = 0; edge_idx < DEG_1; edge_idx++) {
__m128i vn = _mm_load_si128((__m128i*)&tmpVn[edge_idx][cn_idx]);
__m128i vn_neg = _mm_sub_epi8(vzero, vn);
vsign_global = _mm_xor_si128(vsign_global, vn);
vabs[edge_idx] = _mm_abs_epi8(vn);
__m128i vmax_abs_min_llr1 = _mm_max_epi8(vabs[edge_idx], vmin_llr1);
__m128i vcomp = _mm_cmpgt_epi8(vabs[edge_idx], vmin_llr2);
vmin_llr2 = _mm_and_si128(vcomp, vmin_llr2);
vmin_llr2 = _mm_or_si128(vmax_abs_min_llr1, _mm_andnot_si128(vcomp, vmax_abs_min_llr1));
vmin_llr1 = _mm_min_epi8(vmin_llr1, vabs[edge_idx]);
}
#if (DEG_1 & 0x1)
vsign_global = _mm_xor_si128(vone, vsign_global);
#endif
for(size_t edge_idx = 0; edge_idx < DEG_1; edge_idx++) {
__m128i vn = _mm_load_si128((__m128i*)&tmpVn[edge_idx][cn_idx]);
__m128i vsign = _mm_xor_si128(vn, vsign_global);
__m128i vcomp = _mm_cmpeq_epi8(vabs[edge_idx], vmin_llr1);
__m128i vmin = _mm_and_si128(vcomp, vmin_llr2);
vmin = _mm_or_si128(vmin, _mm_andnot_si128(vcomp, vmin_llr1));
__m128i vmin_neg = _mm_sub_epi8(vzero, vmin);
vcomp = _mm_cmpgt_epi8(vzero, vsign);
__m128i vcn_msg = _mm_and_si128(vcomp, vmin_neg);
vcn_msg = _mm_or_si128(vcn_msg, _mm_andnot_si128(vcomp, vmin));
char cn_msgs_tmp[16];
_mm_store_si128((__m128i*)cn_msgs_tmp, vcn_msg);
for (int i = 0 ; i < 16 ; i++) {
int cn_offset = (cn_idx+i)*cn_deg_max + edge_idx;
_cn_msgs[cn_offset] = cn_msgs_tmp[i];
}
}
}
/* Leftovers */
for(size_t cn_idx = (DEG_1_COMPUTATIONS/16)*16; cn_idx < DEG_1_COMPUTATIONS; cn_idx++) {
int cn_offset = cn_idx*cn_deg_max;
......@@ -111,12 +181,12 @@ void CDecoder_MS_fixed_flooded::decode(
/* We compute a different estimate (the message sent), for each
* neighbouring VN */
for(size_t edge_idx = 0; edge_idx < DEG_1; edge_idx++) {
char vn = *p_vn_addr[cn_offset + edge_idx];
char vn = tmpVn[edge_idx][cn_idx];
signGlobal ^= vn;
abs[edge_idx] = (vn >= 0 ? vn : -vn);
char tmp = minLLR1;
char maxAbsMinLLR1 = abs[edge_idx] > minLLR1 ? abs[edge_idx] : minLLR1;
minLLR2 = minLLR2 < abs[edge_idx] ? minLLR2 : maxAbsMinLLR1;
minLLR1 = minLLR1 < abs[edge_idx] ? minLLR1 : abs[edge_idx];
minLLR2 = minLLR2 < abs[edge_idx] ? minLLR2 : abs[edge_idx] > tmp ? abs[edge_idx] : tmp;
}
#if (DEG_1 & 0x1)
......@@ -124,7 +194,7 @@ void CDecoder_MS_fixed_flooded::decode(
#endif
for(size_t edge_idx = 0; edge_idx < DEG_1; edge_idx++) {
char sign = *p_vn_addr[cn_offset + edge_idx] ^ signGlobal;
char sign = tmpVn[edge_idx][cn_idx] ^ signGlobal;
char min = abs[edge_idx] == minLLR1 ? minLLR2 : minLLR1;
_cn_msgs[cn_offset + edge_idx] = sign < 0 ? -min : min;
}
......@@ -149,9 +219,8 @@ void CDecoder_MS_fixed_flooded::decode(
char vn = *p_vn_addr[cn_offset + edge_idx];
signGlobal ^= vn;
abs[edge_idx] = (vn >= 0 ? vn : -vn);
char tmp = minLLR1;
minLLR2 = minLLR2 < abs[edge_idx] ? minLLR2 : abs[edge_idx] > minLLR1 ? abs[edge_idx] : minLLR1;
minLLR1 = minLLR1 < abs[edge_idx] ? minLLR1 : abs[edge_idx];
minLLR2 = minLLR2 < abs[edge_idx] ? minLLR2 : abs[edge_idx] > tmp ? abs[edge_idx] : tmp;
}
#if (DEG_2 & 0x1)
......