...
 
Commits (17)
......@@ -24,8 +24,10 @@ SRC= \
../src/CDecoder/template/CDecoder_fixed.cpp \
../src/CDecoder/template/CDecoder_fixed_SSE.cpp \
../src/CDecoder/template/CDecoder_fixed_reds.cpp \
../src/CDecoder/template/CDecoder_fixed_layered.cpp \
../src/CDecoder/OMS/CDecoder_OMS_fixed_SSE.cpp \
../src/CDecoder/MS/CDecoder_MS_fixed_reds.cpp \
../src/CDecoder/MS/CDecoder_MS_fixed_layered.cpp \
../src/CDecoder/NMS/CDecoder_NMS_fixed_SSE.cpp \
../src/CEncoder/CFakeEncoder.cpp \
../src/CEncoder/Encoder.cpp \
......
......@@ -35,6 +35,7 @@ using namespace std;
#include "./NMS/CDecoder_NMS_fixed_AVX.h"
#include "./MS/CDecoder_MS_fixed_reds.h"
#include "./MS/CDecoder_MS_fixed_layered.h"
#define UNAVAILABLE { cout << "(EE) Error, decoder unavailable" << endl; \
cout << "(EE) - decoder type : " << type << endl; \
......@@ -126,6 +127,8 @@ CDecoder* CreateDecoder(
if(format.compare("fixed") == 0 && arch.compare("reds") == 0) {
CDecoder_MS_fixed_reds *dec = new CDecoder_MS_fixed_reds();
return dec;
} else if (format.compare("fixed") == 0 && arch.compare("layered") == 0) {
return new CDecoder_MS_fixed_layered();
}
else {
UNAVAILABLE;
......
#include "CDecoder_MS_fixed_layered.h"
#include <emmintrin.h>
#include <tmmintrin.h>
#include <smmintrin.h>
CDecoder_MS_fixed_layered::CDecoder_MS_fixed_layered()
{
p_vn_addr = new int16_t *[MESSAGE];
for(size_t i = 0; i < MESSAGE; i++) {
p_vn_addr[i] = &var_nodes[PosNoeudsVariable[i]];
}
}
CDecoder_MS_fixed_layered::~CDecoder_MS_fixed_layered()
{
delete p_vn_addr;
}
char sat_add(char a, char b) {
int c = (int)a + (int)b;
if(c > ((char)127)) return ((char)127);
if(c < ((char)-127)) return ((char)-127);
return (char)c;
}
char sat_sub(char a, char b) {
int c = a - b;
if(c > ((char)127)) return ((char)127);
if(c < ((char)-127)) return ((char)-127);
return (char)c;
}
inline __m128i CMOV_EPI16(__m128i dest, __m128i new_val, __m128i cmp0, __m128i cmp1) {
__m128i pos_mask = _mm_cmpeq_epi16(cmp0, cmp1);
__m128i tmp0 = _mm_andnot_si128(pos_mask, dest);
__m128i tmp1 = _mm_and_si128(pos_mask, new_val);
return _mm_or_si128(tmp0, tmp1);
}
#define MIN(x, y) (y) ^ (((x) ^ (y)) & -((x) < (y)))
void inline
CDecoder_MS_fixed_layered::cn_kernel7(
size_t cn_idx,
int16_t **p_vn_addr,
int16_t *cv_msgs)
{
const size_t cn_deg = 7;
int16_t v_to_c_msgs[SSE_16BIT_ELEM];
int16_t abs_msgs[SSE_16BIT_ELEM];
int16_t new_msgs[SSE_16BIT_ELEM];
size_t cn_offset;
int16_t global_sign, sign;
int16_t min1_LLR;
int16_t min2_LLR;
int16_t min;
int16_t abs_msg, msg;
int16_t new_msg;
int16_t abs_mask;
int16_t s0, s1, s2, s3;
cn_offset = cn_idx*cn_deg_max;
/* Collect all messages coming from VNs adjacent to current CN */
for(size_t vn_idx = 0; vn_idx < cn_deg; vn_idx++) {
v_to_c_msgs[vn_idx] =
*p_vn_addr[cn_offset+vn_idx] -
cv_msgs[cn_offset+vn_idx];
}
for(size_t i = cn_deg; i < SSE_16BIT_ELEM; i++) {
v_to_c_msgs[i] = INT16_MAX;
}
/* Compute new estimation directed to VNs,
* store it for the next iteration, and update estimation on VNs. */
min1_LLR = INT16_MAX;
min2_LLR = INT16_MAX;
s0 = v_to_c_msgs[0] ^ v_to_c_msgs[1];
s1 = v_to_c_msgs[2] ^ v_to_c_msgs[3];
s2 = v_to_c_msgs[4] ^ v_to_c_msgs[5];
s3 = v_to_c_msgs[6];
s0 = s0 ^ s1;
s2 = s2 ^ s3;
global_sign = s0 ^ s2;
/*for(size_t vn_idx = 0; vn_idx < cn_deg; vn_idx++) {
msg = v_to_c_msgs[vn_idx];
abs_mask = msg >> INT16_WIDTH - 1;
abs_msg = (msg + abs_mask) ^ abs_mask;
min2_LLR = min2_LLR > abs_msg ? (min1_LLR > abs_msg ? min1_LLR : abs_msg) : min2_LLR;
min1_LLR = min1_LLR > abs_msg ? abs_msg:min1_LLR;
abs_msgs[vn_idx] = abs_msg;
}*/
// For now just test abs and min1
__m128i vc_msgs = _mm_loadu_si128((__m128i*)v_to_c_msgs);
__m128i abs_vec = _mm_abs_epi16(vc_msgs);
_mm_storeu_si128((__m128i*)abs_msgs, abs_vec);
__m128i min1_res = _mm_minpos_epu16(abs_vec);
min1_LLR = _mm_extract_epi16(min1_res, 0);
int8_t min1_pos = _mm_extract_epi8(min1_res, 2);
/*
abs_msgs[min1_pos] = 0x7FFF;
/* Compute min2 sequentially */
/*int16_t min0, min1, min2, min3;
min0 = MIN(abs_msgs[0], abs_msgs[1]);
min1 = MIN(abs_msgs[2], abs_msgs[3]);
min2 = MIN(abs_msgs[4], abs_msgs[5]);
min3 = abs_msgs[6];
min0 = MIN(min0, min1);
min2 = MIN(min2, min3);
min2_LLR = MIN(min0, min2);*/
__m128i pos_mask = _mm_set_epi16(7,6,5,4,3,2,1,0);
__m128i pos_brdcst = _mm_set1_epi16(min1_pos);
pos_mask = _mm_cmpeq_epi16(pos_mask, pos_brdcst);
abs_vec = _mm_or_si128(abs_vec, pos_mask);
min2_LLR = _mm_extract_epi16(_mm_minpos_epu16(abs_vec), 0);
//abs_msgs[min1_pos] = min1_LLR;
/* Invert sign because CN degree is odd */
global_sign = ~global_sign;
/* Broadcast the first minimum and second minimum*/
__m128i min1_brdcst = _mm_set1_epi16(min1_LLR);
__m128i min2_brdcst = _mm_set1_epi16(min2_LLR);
/* Fusion min1 vector and min2 such that min2 is placed at the position where
* we found min1 */
min2_brdcst = _mm_and_si128(pos_mask, min2_brdcst);
min1_brdcst = _mm_andnot_si128(pos_mask, min1_brdcst);
__m128i final_min = _mm_or_si128(min1_brdcst, min2_brdcst);
__m128i sign_vec = _mm_xor_si128(vc_msgs, _mm_set1_epi16(global_sign));
sign_vec = _mm_srai_epi16(sign_vec, 15);
sign_vec = _mm_or_si128(sign_vec, _mm_set1_epi16(1));
__m128i new_msgs_vec = _mm_mullo_epi16(sign_vec, final_min);
_mm_storeu_si128((__m128i*)new_msgs, new_msgs_vec);
for(size_t vn_idx = 0; vn_idx < cn_deg; vn_idx++) {
//min = abs_msgs[vn_idx] != min1_LLR ? min1_LLR:min2_LLR;
//min = mins[vn_idx];
//sign = (global_sign ^ v_to_c_msgs[vn_idx]);
//sign = 1 | (sign >> (INT16_WIDTH-1)); // either -1 or 1
//new_msg = sign * min;
new_msg = new_msgs[vn_idx];
cv_msgs[cn_offset+vn_idx] = new_msg;
*p_vn_addr[cn_offset+vn_idx] = new_msg + v_to_c_msgs[vn_idx];
}
}
void inline
CDecoder_MS_fixed_layered::cn_kernel6(
size_t cn_idx,
int16_t **p_vn_addr,
int16_t *cv_msgs)
{
const size_t cn_deg = 6;
int16_t v_to_c_msgs[cn_deg_max];
int16_t abs_msgs[cn_deg_max];
size_t cn_offset;
int16_t global_sign, sign;
int16_t min1_LLR;
int16_t min2_LLR;
int16_t min;
int16_t abs_msg, msg;
int16_t new_msg;
int16_t abs_mask;
cn_offset = cn_idx*cn_deg_max;
/* Collect all messages coming from VNs adjacent to current CN */
for(size_t vn_idx = 0; vn_idx < cn_deg; vn_idx++) {
v_to_c_msgs[vn_idx] =
*p_vn_addr[cn_offset+vn_idx] -
cv_msgs[cn_offset+vn_idx];
}
/* Compute new estimation directed to VNs,
* store it for the next iteration, and update estimation on VNs. */
global_sign = 0;
min1_LLR = INT16_MAX;
min2_LLR = INT16_MAX;
for(size_t vn_idx = 0; vn_idx < cn_deg; vn_idx++) {
msg = v_to_c_msgs[vn_idx];
global_sign ^= msg;
abs_mask = msg >> INT16_WIDTH - 1;
abs_msg = (msg + abs_mask) ^ abs_mask;
min2_LLR = min2_LLR > abs_msg ? (min1_LLR > abs_msg ? min1_LLR : abs_msg) : min2_LLR;
min1_LLR = min1_LLR > abs_msg ? abs_msg:min1_LLR;
abs_msgs[vn_idx] = abs_msg;
}
if(cn_deg & 0x1) {
global_sign = ~global_sign;
}
for(size_t vn_idx = 0; vn_idx < cn_deg; vn_idx++) {
min = abs_msgs[vn_idx] != min1_LLR ? min1_LLR:min2_LLR;
sign = (global_sign ^ v_to_c_msgs[vn_idx]);
sign = 1 | (sign >> (INT16_WIDTH-1)); // either -1 or 1
new_msg = sign * min;
cv_msgs[cn_offset+vn_idx] = new_msg;
*p_vn_addr[cn_offset+vn_idx] = new_msg + v_to_c_msgs[vn_idx];
}
}
void CDecoder_MS_fixed_layered::decode(
char var_nodes[],
char Rprime_fix[],
int nombre_iterations)
{
size_t cn_idx;
for(size_t i = 0; i < NOEUD; i++) {
CDecoder_fixed_layered::var_nodes[i] = (int16_t)var_nodes[i];
}
for(size_t i = 0; i < MESSAGE; i++) {
c_to_v_msgs[i] = 0;
}
while(nombre_iterations--) {
cn_idx = 0;
// Degree 7
for(; cn_idx < DEG_1_COMPUTATIONS; cn_idx++) {
cn_kernel7(cn_idx, p_vn_addr, c_to_v_msgs);
}
// Degree 6
for(; cn_idx < DEG_1_COMPUTATIONS + DEG_2_COMPUTATIONS; cn_idx++) {
cn_kernel6(cn_idx, p_vn_addr, c_to_v_msgs);
}
}
/* Hard decision */
for(size_t i = 0; i < NOEUD; i++) {
Rprime_fix[i] = CDecoder_fixed_layered::var_nodes[i] >= 0 ? 1:0;
}
}
\ No newline at end of file
#ifndef CDECODER_MS_FIXED_LAYERED_H
#define CDECODER_MS_FIXED_LAYERED_H
#include "../template/CDecoder_fixed_layered.h"
class CDecoder_MS_fixed_layered : public CDecoder_fixed_layered {
protected:
int16_t **p_vn_addr; // From a CN perspective, the addresses of all neighbouring VNs
public:
CDecoder_MS_fixed_layered();
~CDecoder_MS_fixed_layered();
void decode(char var_nodes[], char Rprime_fix[], int nombre_iterations);
private:
void cn_kernel7(size_t cn_idx, int16_t **p_vn_addr, int16_t *cv_msgs);
void cn_kernel6(size_t cn_idx, int16_t **p_vn_addr, int16_t *cv_msgs);
};
#define SSE_16BIT_ELEM 8
#endif
\ No newline at end of file
#include "./CDecoder_fixed_layered.h"
#include "./Constantes/constantes_sse.h"
CDecoder_fixed_layered::CDecoder_fixed_layered()
{
var_nodes = new int16_t[NOEUD];
c_to_v_msgs = new int16_t[MESSAGE];
}
CDecoder_fixed_layered::~CDecoder_fixed_layered()
{
delete var_nodes;
delete c_to_v_msgs;
}
void CDecoder_fixed_layered::decode(float var_nodes[], char Rprime_fix[], int nombre_iterations)
{
}
\ No newline at end of file
#ifndef __CDecoder_layered_
#define __CDecoder_layered_
#include "../../Constantes/constantes_sse.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <stdint.h>
#include "./CDecoder_fixed.h"
class CDecoder_fixed_layered : public CDecoder_fixed {
protected:
const size_t cn_deg_max = DEG_1;
//char *c_to_v_msgs;
//char *var_nodes;
int16_t *c_to_v_msgs;
int16_t *var_nodes;
public:
CDecoder_fixed_layered();
virtual ~CDecoder_fixed_layered();
virtual void decode(char var_nodes[], char Rprime_fix[], int nombre_iterations) = 0;
virtual void decode(float var_nodes[], char Rprime_fix[], int nombre_iterations);
};
#endif
......@@ -289,6 +289,10 @@ int main(int argc, char* argv[]) {
arch = "reds";
nb_frames = 1;
} else if(strcmp(argv[p], "-layered") == 0) {
arch = "layered";
nb_frames = 1;
} else if (strcmp(argv[p], "-NMS") == 0) {
type = "NMS";
p_decoder.nms_factor_float = atof(argv[p + 1]);
......