Commit 16d3fbac authored by Christian R. Helmrich's avatar Christian R. Helmrich

low-resolution SBR

parent 1259070c
......@@ -846,7 +846,11 @@ int main (const int argc, char* argv[])
if (!readStdin && (mod3Percent > 0) && !(mp4Writer.getFrameCount () % mod3Percent))
{
#if ENABLE_SIMPLE_SBR
if ((i++) < (coreSbrFrameLengthIndex >= 3 ? 17 : 34)) // with short files
#else
if ((i++) < 34) // for short files
#endif
{
fprintf_s (stdout, "-"); fflush (stdout);
}
......
......@@ -116,15 +116,23 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC
const bool indepFlag /*= false*/)
{
const unsigned nb = (sbrDataCh0 != nullptr ? 2 * ((sbrDataCh0[0] >> 23) & 1) + 2 : 0); // noise bits/ch = 2 or 4
#if ENABLE_INTERTES
const bool issTes = (nb > 0 ? ((sbrDataCh0[0] >> 30) & 1) : false);
const int8_t res = (nb > 0 ? (sbrDataCh0[0] >> 29) & 1 : 0); // bs_amp_res
#else
const int16_t res = (nb > 0 ? sbrDataCh0[0] >> 29 : 0); // short bs_amp_res
#endif
const bool stereo = (sbrDataCh1 != nullptr);
const bool couple = (stereo ? ((sbrDataCh1[0] >> 23) & 1) : false);
unsigned bitCount = (stereo ? (couple ? 2 : 7 + nb) : 0) + 6 + nb, i, tmpCh0, tmpCh1;
unsigned bitCount = (stereo ? (couple ? 2 : 7 + nb) : 0) + 6 + nb;
unsigned i, envCh0, envCh1, resCh0, resCh1; // bs_num_env[], bs_freq_res[]
if (nb == 0) return 0;
tmpCh0 = (sbrDataCh0[0] >> 21) & 3;
tmpCh1 = ((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 21) & 3;
envCh0 = 1 << ((sbrDataCh0[0] >> 21) & 3);
resCh0 = (sbrDataCh0[0] >> 20) & 1;
envCh1 = 1 << (((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 21) & 3);
resCh1 = ((stereo && !couple ? sbrDataCh1[0] : sbrDataCh0[0]) >> 20) & 1;
if (stereo) m_auBitStream.write (couple ? 1 : 0, 1); // _coupling
......@@ -132,20 +140,20 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC
m_auBitStream.write ((sbrDataCh0[0] >> 20) & 7, 5); // class data
if (stereo && !couple) m_auBitStream.write ((sbrDataCh1[0] >> 20) & 7, 5);
// sbr_dtdf()
i = (1u << tmpCh0) - (indepFlag ? 1 : 0); // actual bs_num_env[0]
// sbr_dtdf(), assumes bs_pvc == 0, i.e. no PVC like rest of code
i = envCh0 - (indepFlag ? 1 : 0);
if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 12) & 255, i); // _df_env
bitCount += i;
i = (tmpCh0 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);// bs_num_noise[0]
i = __min (2, envCh0) - (indepFlag ? 1 : 0);
if (i > 0) m_auBitStream.write ((sbrDataCh0[0] >> 4) & 255, i); // df_noise
bitCount += i;
if (stereo)
{
i = (1u << tmpCh1) - (indepFlag ? 1 : 0);
i = envCh1 - (indepFlag ? 1 : 0);
if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 12) & 255, i);
bitCount += i;
i = (tmpCh1 > 0 ? 2 : 1) - (indepFlag ? 1 : 0);
i = __min (2, envCh1) - (indepFlag ? 1 : 0);
if (i > 0) m_auBitStream.write ((sbrDataCh1[0] >> 4) & 255, i);
bitCount += i;
}
......@@ -155,37 +163,64 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC
m_auBitStream.write (sbrDataCh0[0] & i, nb); // bs_invf_mode[0][]
if (stereo && !couple) m_auBitStream.write (sbrDataCh1[0] & i, nb);
// sbr_envelope() for mono/left channel, assumes bs_pvc_mode == 0
for (i = 1; i <= (1u << tmpCh0); i++) // dt loop
// sbr_envelope() for mono/left channel, assumes bs_df_env[] == 0
for (i = 1; i <= envCh0; i++) // dt loop
{
const uint8_t bits = (res > 0 && tmpCh0 > 0 ? 6 : 7);
const uint8_t bits = (res > 0 && envCh0 > 1 ? 6 : 7); // start
const uint8_t bitd = (2 + 3 * resCh0) * 2; // differential, <25 TODO: VLC words
m_auBitStream.write (15/*sbrDataCh0[i] & 127*/, bits); // bs_data_env
m_auBitStream.write (sbrDataCh0[i] & 127, bits); // bs_data_env
bitCount += bits;
m_auBitStream.write (sbrDataCh0[i] >> 7, 5<<1); // TODO: VLC words
bitCount += 5<<1;
m_auBitStream.write (sbrDataCh0[i] >> 7, bitd);
bitCount += bitd;
#if ENABLE_INTERTES
if (issTes)
{
m_auBitStream.write ((sbrDataCh0[9] >> (i - 1)) & 1, 1); // bs_temp_shape[ch][env=i]
bitCount++;
if ((sbrDataCh0[9] >> (i - 1)) & 1)
{
m_auBitStream.write (GAMMA, 2); // bs_inter_temp_shape_mode
bitCount += 2;
}
}
#endif
}
if (stereo && !couple)
{
for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope() dt loop
for (i = 1; i <= envCh1; i++) // decoup. sbr_envelope() dt loop
{
const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 6 : 7);
const uint8_t bits = (res > 0 && envCh1 > 1 ? 6 : 7);
const uint8_t bitd = (2 + 3 * resCh1) * 2; // TODO: VLC words
m_auBitStream.write (sbrDataCh1[i] & 127, bits);
bitCount += bits;
m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words
bitCount += 5<<1;
m_auBitStream.write (sbrDataCh1[i] >> 7, bitd);
bitCount += bitd;
#if ENABLE_INTERTES
if (issTes)
{
m_auBitStream.write ((sbrDataCh1[9] >> (i - 1)) & 1, 1); // bs_temp_shape[ch][env]
bitCount++;
if ((sbrDataCh1[9] >> (i - 1)) & 1)
{
m_auBitStream.write (GAMMA, 2);
bitCount += 2;
}
}
#endif
}
}
for (i = (tmpCh0 > 0 ? 2 : 1); i > 0; i--) // sbr_noise() dt loop
// sbr_noise() for mono/left channel, assumes bs_df_noise[i] == 0
for (i = __min (2, envCh0); i > 0; i--) // dt loop
{
m_auBitStream.write (31/*(sbrDataCh0[9] >> (12 * i)) & 31*/, 5); // _data_noise
m_auBitStream.write ((sbrDataCh0[9] >> (13 * i)) & 31, 5); // _data_noise
bitCount += 5;
if (nb == 4)
{
m_auBitStream.write ((sbrDataCh0[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word
m_auBitStream.write ((sbrDataCh0[9] >> (13 * i - 5)) & 31, 1); // TODO: VLC word
bitCount++;
}
}
......@@ -194,24 +229,37 @@ unsigned BitStreamWriter::writeChannelWiseSbrData (const int32_t* const sbrDataC
{
if (couple)
{
for (i = 1; i <= (1u << tmpCh1); i++) // sbr_envelope dt loop
for (i = 1; i <= envCh1; i++) // coup. sbr_envelope() dt loop
{
const uint8_t bits = (res > 0 && tmpCh1 > 0 ? 5 : 6);
const uint8_t bits = (res > 0 && envCh1 > 1 ? 5 : 6);
const uint8_t bitd = (2 + 3 * resCh1) * 2; // TODO: VLC words
m_auBitStream.write (sbrDataCh1[i] & 63, bits);
bitCount += bits;
m_auBitStream.write (sbrDataCh1[i] >> 7, 5<<1); // TODO: VLC words
bitCount += 5<<1;
m_auBitStream.write (sbrDataCh1[i] >> 7, bitd);
bitCount += bitd;
#if ENABLE_INTERTES
if (issTes)
{
m_auBitStream.write ((sbrDataCh1[9] >> (i - 1)) & 1, 1); // bs_temp_shape[ch][i]
bitCount++;
if ((sbrDataCh1[9] >> (i - 1)) & 1)
{
m_auBitStream.write (GAMMA, 2);
bitCount += 2;
}
}
#endif
}
}
for (i = (tmpCh1 > 0 ? 2 : 1); i > 0; i--) // sbr_noise dt loop
for (i = __min (2, envCh1); i > 0; i--) // sbr_noise() dt loop
{
m_auBitStream.write ((sbrDataCh1[9] >> (12 * i)) & 31, 5);
m_auBitStream.write ((sbrDataCh1[9] >> (13 * i)) & 31, 5);
bitCount += 5;
if (nb == 4)
{
m_auBitStream.write ((sbrDataCh1[9] >> (12 * i - 6)) & 31, 1); // TODO: VLC word
m_auBitStream.write ((sbrDataCh1[9] >> (13 * i - 5)) & 31, 1); // TODO: VLC word
bitCount++;
}
}
......@@ -677,8 +725,11 @@ unsigned BitStreamWriter::createAudioConfig (const char samplingFrequencyIndex,
if (sbrRatioShiftValue > 0) // sbrRatioIndex > 0: SbrConfig
{
const uint32_t sf = (samplingFrequencyIndex == 6 || samplingFrequencyIndex < 5 ? 10 : (samplingFrequencyIndex < 8 ? 9 : 8)); // bs_stop_freq
#if ENABLE_INTERTES
m_auBitStream.write (2, 3); // bs_interTes = 1, harmonicSBR, bs_pvc = 0
#else
m_auBitStream.write (0, 3); // fix harmonicSBR, bs_interTes, bs_pvc = 0
#endif
bitCount += 13; // incl. SbrDfltHeader following hereafter
m_auBitStream.write (15, 4); // 11025 @ 44.1, 11625 @ 48, 15000 @ 64 kHz
m_auBitStream.write (sf, 4); // 16193 @ 44.1, 18375 @ 48, 22500 @ 64 kHz
......@@ -778,7 +829,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
{
if (usacIndependencyFlag)
{
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo()
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24) & 63, 6); // SbrInfo(), bs_pvc = 0
m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1
bitCount += 7;
}
......@@ -820,7 +871,7 @@ unsigned BitStreamWriter::createAudioFrame (CoreCoderData** const elementData,
{
if (usacIndependencyFlag)
{
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24), 6); // SbrInfo()
m_auBitStream.write ((sbrInfoAndData[ci][0] >> 24) & 63, 6); // SbrInfo(), bs_pvc = 0
m_auBitStream.write (1, 1);// fix sbrUseDfltHeader = 1
bitCount += 7;
}
......
......@@ -371,25 +371,6 @@ static inline unsigned toNumChannels (const USAC_CCI chConfigurationIndex)
return numberOfChannels[__max (0, (signed char) chConfigurationIndex)];
}
// ISO/IEC 23003-3, Table 68
static const uint8_t elementCountConfig[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 1, 2, 3, 3, 4, 5, 2, 2, 2, 5, 5};
static const ELEM_TYPE elementTypeConfig[USAC_MAX_NUM_ELCONFIGS][USAC_MAX_NUM_ELEMENTS] = {
{ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_UNDEF
{ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_1_CH
{ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_5_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE, ID_EL_UNDEF}, // CCI_6_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE}, // CCI_8_CH
{ID_USAC_SCE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CHM
{ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CHR
{ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CHR
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_SCE, ID_USAC_LFE}, // CCI_7_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE} // CCI_8_CHM
};
// ISO/IEC 14496-3, Table 4.140
static const uint16_t sfbOffsetL0[42] = { // 88.2 and 96 kHz
0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 64, 72, 80, 88, 96, 108,
......@@ -785,8 +766,8 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
const uint32_t maxSfbLong = (samplingRate < 37566 || m_shiftValSBR > 0 ? m_numSwbLong // was MAX_NUM_SWB_LONG
: brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
const uint32_t maxSfbLong = (useMaxBandwidth ? m_numSwbLong : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint32_t scaleSBR = (m_shiftValSBR > 0 ? 8 : 0); // reduces core rate by 25 %
const uint64_t scaleSr = (samplingRate < 27713 ? (samplingRate < 23004 ? 32 : 34) - __min (3 << m_shiftValSBR, m_bitRateMode)
: (samplingRate < 37566 && m_bitRateMode != 3u ? 36 : 37)) - (nChannels >> 1);
......@@ -847,7 +828,7 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
if (m_perCorrHCurr[el] > 128) // execute stereo pre-processing to increase correlation
{
const int16_t chanCorrSign = (coreConfig.stereoConfig & 2 ? -1 : 1);
const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][__min (m_numSwbLong, maxSfbLong + 1)]);
const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbOffsetsL[m_swbTableIdx][__min (m_numSwbLong, maxSfbLong + 1)]);
const uint8_t steppFadeLen = (eightShorts0 ? 4 : (coreConfig.tnsActive ? 32 : 64));
const uint8_t steppFadeOff = ((m_bitRateMode + 77000 / samplingRate) & 6) << (eightShorts0 ? 2 : 5);
const int64_t steppWeightI = __min (64, m_perCorrHCurr[el] - 128) >> (eightShorts0 || coreConfig.tnsActive ? 1 : 0);
......@@ -987,9 +968,10 @@ unsigned ExhaleEncoder::psychBitAllocation () // perceptual bit-allocation via s
#if !RESTRICT_TO_AAC
if ((maxSfbCh > 0) && m_noiseFilling[el] && (m_bitRateMode <= 3 || !eightShorts))
{
const uint32_t maxSfbCurr = (eightShorts ? (samplingRate < 37566 ? 14 : brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)) : maxSfbLong);
const uint32_t maxSfbCurr = (eightShorts ? (useMaxBandwidth ? 17 - (samplingRate >> 13) // was 14, good for 22.05 - 32 kHz
: brModeAndFsToMaxSfbShort (m_bitRateMode, samplingRate)) : maxSfbLong);
const bool keepMaxSfbCurr = ((samplingRate < 37566) || (samplingRate >= 46009 && samplingRate < 55426 && eightShorts));
const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) ? 4u : 3u)) >> 2,
const uint8_t numSwbFrame = __min ((numSwbCh * ((maxSfbCh == maxSfbCurr) || (m_bitRateMode <= 2) || (m_shiftValSBR > 0) ? 4u : 3u)) >> 2,
(eightShorts ? maxSfbCh : maxSfbLong) + (m_bitRateMode < 2 || m_bitRateMode > 3 || keepMaxSfbCurr ? 0 : 1));
#ifndef NO_DTX_MODE
if ((m_bitRateMode < 1) && (m_numElements == 1) && (samplingRate < 27713) && eightShorts)
......@@ -1081,6 +1063,8 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
const unsigned nChannels = toNumChannels (m_channelConf);
const unsigned nSamplesInFrame = toFrameLength (m_frameLength);
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
const unsigned nSamplesTempAna = (nSamplesInFrame * 25) >> 4; // pre-delay for look-ahead
const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
const unsigned* const coeffMagn = m_sfbQuantizer.getCoeffMagnPtr ();
uint8_t meanSpecFlat[USAC_MAX_NUM_CHANNELS];
uint8_t meanTempFlat[USAC_MAX_NUM_CHANNELS] = {208, 208, 208, 208, 208, 208, 208, 208};
......@@ -1180,8 +1164,8 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
if (grpData.sfbsPerGroup > 0) // rate control part 2 to reach constrained VBR (CVBR)
{
const uint8_t maxSfbLong = (samplingRate < 37566 ? 63 - (samplingRate >> 11) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint8_t maxSfbShort = (samplingRate < 37566 ? 21 - (samplingRate >> 12) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
const uint8_t maxSfbLong = (useMaxBandwidth ? 54 - (samplingRate >> 13) : brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate));
const uint8_t maxSfbShort = (useMaxBandwidth ? 19 - (samplingRate >> 13) : brModeAndFsToMaxSfbShort(m_bitRateMode, samplingRate));
const uint16_t peakIndex = (shortWinCurr ? 0 : (m_specAnaCurr[ci] >> 5) & 2047);
const unsigned sfmBasedSfbStart = (shortWinCurr ? maxSfbShort - 2 + (meanSpecFlat[ci] >> 6) : maxSfbLong - 6 + (meanSpecFlat[ci] >> 5)) +
(shortWinCurr ? -3 + (((1 << 5) + meanTempFlat[ci]) >> 6) : -7 + (((1 << 4) + meanTempFlat[ci]) >> 5));
......@@ -1316,8 +1300,28 @@ unsigned ExhaleEncoder::quantizationCoding () // apply MDCT quantization and en
#endif
if ((coreConfig.elementType < ID_USAC_LFE) && (m_shiftValSBR > 0)) // collect SBR data
{
int32_t* const sbrLevel = &m_coreSignals[ci][nSamplesTempAna - 64 + nSamplesInFrame];
memset (m_coreSignals[ci], 0, 10 * sizeof (int32_t)); // TODO
m_coreSignals[ci][0] = 1 << 20; // fix bs_freq_res = high
#if ENABLE_INTERTES
m_coreSignals[ci][0] = (shortWinPrev ? 0x40000000 : 0x40100000); // freq_res, interTes
#else
m_coreSignals[ci][0] = (shortWinPrev ? 0 : 1) << 20; // bs_freq_res = low resp. high
#endif
const int32_t msfVal = (shortWinPrev ? 31 : __max (2, __max (m_meanFlatPrev[ci], meanSpecFlat[ci]) >> 3));
m_meanFlatPrev[ci] = meanSpecFlat[ci];
m_coreSignals[ci][9] = (msfVal << 13) | (msfVal << 26); // noise level(s), 31 = none
m_coreSignals[ci][0] |= 4 - int32_t (sqrt (0.75 * msfVal)); // filter mode, 0 = none
const uint64_t enAdd = (uint64_t) sbrLevel[11] * (uint64_t) sbrLevel[11]; // envelope
const uint64_t enSub = (uint64_t) sbrLevel[21] * (uint64_t) sbrLevel[21]; // 1.9 frms
const uint64_t enSum = (uint64_t) sbrLevel[20] * (uint64_t) sbrLevel[20]; // of delay
const uint64_t enAdj = (enSum + enAdd - enSub + (nSamplesInFrame >> 1)) / nSamplesInFrame;
m_coreSignals[ci][1] = (enAdj > 8192 ? int32_t (1.375 - 0.03125 * msfVal + 6.64385619 * log10 ((double) enAdj)) - 26 : 0);
memcpy (&sbrLevel[20], &sbrLevel[10] /*last*/, 10 * sizeof (int32_t));
memcpy (&sbrLevel[10], sbrLevel /*& current*/, 10 * sizeof (int32_t)); // delay line
}
ci++;
}
......@@ -1338,6 +1342,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
const unsigned nSamplesInShort = nSamplesInFrame >> 3;
const unsigned samplingRate = toSamplingRate (m_frequencyIdx);
const unsigned lfeChannelIndex = (m_channelConf >= CCI_6_CH ? __max (5, nChannels - 1) : USAC_MAX_NUM_CHANNELS);
const bool useMaxBandwidth = (samplingRate < 37566 || m_shiftValSBR > 0);
unsigned ci = 0, s; // running index
unsigned errorValue = 0; // no error
......@@ -1384,7 +1389,7 @@ unsigned ExhaleEncoder::spectralProcessing () // complete ics_info(), calc TNS
const bool eightShorts = (coreConfig.icsInfoCurr[0].windowSequence == EIGHT_SHORT);
const uint8_t meanSpecFlat = (((m_specAnaCurr[ci] >> 16) & UCHAR_MAX) + ((m_specAnaCurr[ci + 1] >> 16) & UCHAR_MAX) + 1) >> 1;
const uint16_t* const swbo = swbOffsetsL[m_swbTableIdx];
const uint16_t nSamplesMax = (samplingRate < 37566 ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
const uint16_t nSamplesMax = (useMaxBandwidth ? nSamplesInFrame : swbo[brModeAndFsToMaxSfbLong (m_bitRateMode, samplingRate)]);
const int16_t steAnaStats = m_specAnalyzer.stereoSigAnalysis (m_mdctSignals[ci], m_mdctSignals[ci + 1],
m_mdstSignals[ci], m_mdstSignals[ci + 1], nSamplesMax,
nSamplesInFrame, eightShorts, coreConfig.stereoDataCurr);
......@@ -1862,6 +1867,7 @@ ExhaleEncoder::ExhaleEncoder (int32_t* const inputPcmData, unsigned ch
m_mdctQuantMag[ch] = nullptr;
m_mdctSignals[ch] = nullptr;
m_mdstSignals[ch] = nullptr;
m_meanFlatPrev[ch] = 0;
m_scaleFacData[ch] = nullptr;
m_specAnaCurr[ch] = 0;
m_specFlatPrev[ch] = 0;
......@@ -1940,7 +1946,7 @@ unsigned ExhaleEncoder::encodeLookahead ()
*(predSig + 2) * (int64_t) filterC[2] + *(predSig + 3) * (int64_t) filterC[3];
*(--predSig) = int32_t ((predSample > 0 ? -predSample + (1 << 9) - 1 : -predSample) >> 9);
}
if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, (nSamplesInFrame >> 2) * sizeof (int32_t));
if (m_shiftValSBR > 0) memset (m_coreSignals[ch], 0, ((nSamplesInFrame * 41) >> (4 + m_shiftValSBR)) * sizeof (int32_t));
}
// set initial temporal channel statistic to something meaningful before first coded frame
......
......@@ -79,6 +79,7 @@ private:
uint8_t* m_mdctQuantMag[USAC_MAX_NUM_CHANNELS];
int32_t* m_mdctSignals[USAC_MAX_NUM_CHANNELS];
int32_t* m_mdstSignals[USAC_MAX_NUM_CHANNELS];
uint8_t m_meanFlatPrev[USAC_MAX_NUM_CHANNELS];
#if !RESTRICT_TO_AAC
bool m_noiseFilling[USAC_MAX_NUM_ELEMENTS];
bool m_nonMpegExt;
......
......@@ -32,6 +32,12 @@
#define USAC_NUM_FREQ_TABLES 6
#define USAC_NUM_SAMPLE_RATES (2 * AAC_NUM_SAMPLE_RATES)
#define ENABLE_INTERTES 0 // inter-sample TES in SBR
#if ENABLE_INTERTES
# define GAMMA 1 // 2?
#endif
#define RESTRICT_TO_AAC 0 // allow only AAC tool-set
#if RESTRICT_TO_AAC
......@@ -168,6 +174,25 @@ const uint8_t eightTimesSqrt256Minus[256] = {
38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 25, 24, 23, 21, 20, 18, 16, 14, 11, 8
};
// ISO/IEC 23003-3:2012, Table 68
static const uint8_t elementCountConfig[USAC_MAX_NUM_ELCONFIGS] = {0, 1, 1, 2, 3, 3, 4, 5, 2, 2, 2, 5, 5};
static const ELEM_TYPE elementTypeConfig[USAC_MAX_NUM_ELCONFIGS][USAC_MAX_NUM_ELEMENTS] = {
{ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_UNDEF
{ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_1_CH
{ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_5_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE, ID_EL_UNDEF}, // CCI_6_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE}, // CCI_8_CH
{ID_USAC_SCE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_2_CHM
{ID_USAC_CPE, ID_USAC_SCE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_3_CHR
{ID_USAC_CPE, ID_USAC_CPE, ID_EL_UNDEF, ID_EL_UNDEF, ID_EL_UNDEF}, // CCI_4_CHR
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_SCE, ID_USAC_LFE}, // CCI_7_CH
{ID_USAC_SCE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_CPE, ID_USAC_LFE} // CCI_8_CHM
};
// fast calculation of x / den: (x * oneTwentyEightOver[den]) >> 7, accurate for 0 <= x <= 162
const uint8_t oneTwentyEightOver[14] = {0, 128, 64, 43, 32, 26, 22, 19, 16, 15, 13, 12, 11, 10};
......
......@@ -11,7 +11,7 @@
#include "exhaleLibPch.h"
#include "tempAnalysis.h"
static const int16_t lffc2x[65] = { // low-frequency filter coefficients
static const int16_t lpfc12[65] = { // 50% low-pass filter coefficients
// 269-pt. sinc windowed by 0.409 * cos(0*pi.*t) - 0.5 * cos(2*pi.*t) + 0.091 * cos(4*pi.*t)
17887, -27755, 16590, -11782, 9095, -7371, 6166, -5273, 4582, -4029, 3576, -3196, 2873,
-2594, 2350, -2135, 1944, -1773, 1618, -1478, 1351, -1235, 1129, -1032, 942, -860, 784,
......@@ -19,6 +19,17 @@ static const int16_t lffc2x[65] = { // low-frequency filter coefficients
-124, 108, -95, 82, -71, 61, -52, 44, -37, 31, -26, 21, -17, 14, -11, 8, -6, 5, -3, 2, -1, 1
};
static const int16_t lpfc34[128] = { // 25% low-pass filter coefficients
// see also A. H. Nuttall, "Some Windows with Very Good Sidelobe Behavior," IEEE, Feb. 1981.
3 /*<<16*/, 26221, -8914, 19626, 0, -11731, 13789, -8331, 0, 6431, -8148, 5212, 0, -4360,
5688, -3728, 0, 3240, -4291, 2849, 0, -2529, 3378, -2260, 0, 2032, -2729, 1834, 0, -1662,
2240, -1510, 0, 1375, -1856, 1253, 0, -1144, 1546, -1045, 0, 955, -1292, 873, 0, -798,
1079, -729, 0, 666, -900, 608, 0, -555, 748, -505, 0, 459, -620, 418, 0, -379, 510, -343,
0, 310, -417, 280, 0, -252, 338, -227, 0, 203, -272, 182, 0, -162, 216, -144, 0, 128, -170,
113, 0, -100, 132, -88, 0, 77, -101, 67, 0, -58, 76, -50, 0, 43, -56, 37, 0, -31, 41, -26,
0, 22, -28, 18, 0, -15, 19, -12, 0, 10, -12, 8, 0, -6, 7, -4, 0, 3, -4, 2, 0, -1, 2, -1
};
// static helper functions
static unsigned updateAbsStats (const int32_t* const chSig, const int nSamples, unsigned* const maxAbsVal, int16_t* const maxAbsIdx)
{
......@@ -86,6 +97,7 @@ TempAnalyzer::TempAnalyzer ()
{
m_avgAbsHpPrev[ch] = 0;
m_maxAbsHpPrev[ch] = 0;
m_maxHfLevPrev[ch] = 0;
m_maxIdxHpPrev[ch] = 1;
m_pitchLagPrev[ch] = 0;
m_tempAnaStats[ch] = 0;
......@@ -122,7 +134,7 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M
const int resamplerOffset = (int) lookaheadOffset - 128;
if ((timeSignals == nullptr) || (nChannels > USAC_MAX_NUM_CHANNELS) || (lfeChannelIndex > USAC_MAX_NUM_CHANNELS) || (sbrShift > 1) ||
(nSamplesInFrame > 2048) || (nSamplesInFrame < 2) || (lookaheadOffset > 4096) || (lookaheadOffset <= 256u * sbrShift))
(nSamplesInFrame > 2048) || (nSamplesInFrame <= 128 * sbrShift) || (lookaheadOffset > 4096) || (lookaheadOffset <= 256u * sbrShift))
{
return 1;
}
......@@ -135,6 +147,7 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M
// --- get L1 norm and pitch lag of both sides
unsigned sumAbsValL = 0, sumAbsValR = 0;
unsigned maxAbsValL = 0, maxAbsValR = 0;
int32_t maxHfrLevL = 8, maxHfrLevR = 8;
int16_t maxAbsIdxL = 0, maxAbsIdxR = 0;
int splitPtL = 0;
int splitPtC = halfFrameOffset;
......@@ -147,21 +160,52 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M
if (applyResampler && lrCoreTimeSignals[ch] != nullptr) // downsampler
{
/*LF*/int32_t* lrSig = &lrCoreTimeSignals[ch][resamplerOffset >> sbrShift]; // low-rate,
const int32_t* hrSig = &timeSignals[ch][resamplerOffset]; // high-rate input time signal
/*LF*/int32_t* lrSig = &lrCoreTimeSignals[ch][resamplerOffset >> sbrShift];
const int32_t* hrSig = &timeSignals[ch][resamplerOffset];
/*MF*/uint64_t ue[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0}; // unit energies
for (int i = nSamplesInFrame >> sbrShift; i > 0; i--, lrSig++, hrSig += 2)
{
int64_t r = ((int64_t) hrSig[0] << 17) + (hrSig[-1] + (int64_t) hrSig[1]) * -2*SHRT_MIN;
int16_t s;
for (u = 65, s = 129; u > 0; s -= 2) r += (hrSig[-s] + (int64_t) hrSig[s]) * lffc2x[--u];
for (u = 65, s = 129; u > 0; s -= 2) r += (hrSig[-s] + (int64_t) hrSig[s]) * lpfc12[--u];
*lrSig = int32_t ((r + (1 << 17)) >> 18); // low-pass and low-rate
// TODO: bandpass
*lrSig = int32_t ((r + (1 << 17)) >> 18); // low-pass at half rate
if (*lrSig < -8388608) *lrSig = -8388608;
else
if (*lrSig > 8388607) *lrSig = 8388607;
if ((i & 1) != 0) // compute quarter-rate mid-frequency SBR signal
{
r = ((3 * (int64_t) hrSig[0]) << 16) - (hrSig[-1] + (int64_t) hrSig[1]) * SHRT_MIN - r;
r += (hrSig[-2] + (int64_t) hrSig[2]) * SHRT_MIN;
for (s = 127; s > 0; s--/*u = s*/) r += (hrSig[-s] + (int64_t) hrSig[s]) * lpfc34[s];
r = (r + (1 << 17)) >> 18; // SBR env. band-pass at quarter rate
ue[i >> 7] += uint64_t (r * r);
}
}
if (ch != lfeChannelIndex) // calculate overall and unit-wise levels
{
const unsigned numUnits = nSamplesInFrame >> (sbrShift + 7);
int32_t* const hfrLevel = &lrCoreTimeSignals[ch][(resamplerOffset + nSamplesInFrame) >> sbrShift];
for (u = numUnits; u > 0; /*u*/)
{
ue[8] += ue[--u];
hfrLevel[numUnits - u] = int32_t (0.5 + sqrt ((double) ue[u]));
}
hfrLevel[0] = int32_t (0.5 + sqrt ((double) ue[8]));
// stabilize transient detection below
for (u = numUnits >> 1; u > 0; u--)
{
if (maxHfrLevL < hfrLevel[u]) /* update max. */ maxHfrLevL = hfrLevel[u];
if (maxHfrLevR < hfrLevel[u + (numUnits >> 1)]) maxHfrLevR = hfrLevel[u + (numUnits >> 1)];
}
}
}
......@@ -225,9 +269,9 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M
m_transientLoc[ch] = -1;
// re-init stats history for this channel
m_avgAbsHpPrev[ch] = 0;
m_maxAbsHpPrev[ch] = 0; // maxAbsValR
m_maxIdxHpPrev[ch] = 1; // maxAbsIdxR
m_pitchLagPrev[ch] = 0; // pLagBestR
m_maxAbsHpPrev[ch] = 0;
m_maxIdxHpPrev[ch] = 1;
m_pitchLagPrev[ch] = 0;
}
else // nonzero signal in the current frame
{
......@@ -299,14 +343,23 @@ unsigned TempAnalyzer::temporalAnalysis (const int32_t* const timeSignals[USAC_M
// --- temporal analysis statistics for frame
m_tempAnaStats[ch] = packAvgTempAnalysisStats (sumAbsHpL, sumAbsHpR, m_avgAbsHpPrev[ch],
sumAbsPpL + sumAbsPpR, maxAbsValL + maxAbsValR);
u = maxAbsValR;
if ((m_maxHfLevPrev[ch] < (maxHfrLevL >> 3)) || (maxHfrLevL < (maxHfrLevR >> 3))) // transient
{
maxAbsValL = maxHfrLevL;
maxAbsValR = maxHfrLevR;
m_maxAbsHpPrev[ch] = m_maxHfLevPrev[ch];
}
m_transientLoc[ch] = packTransLocWithPitchLag (maxAbsValL, maxAbsValR, m_maxAbsHpPrev[ch],
maxAbsIdxL, maxAbsIdxR, __max (1, pLagBestR));
// update stats history for this channel
m_avgAbsHpPrev[ch] = sumAbsHpR;
m_maxAbsHpPrev[ch] = maxAbsValR;
m_maxAbsHpPrev[ch] = u;
m_maxIdxHpPrev[ch] = (unsigned) maxAbsIdxR;
m_pitchLagPrev[ch] = (unsigned) pLagBestR;
} // if sumAbsValL == 0 && sumAbsValR == 0
if (applyResampler) m_maxHfLevPrev[ch] = maxHfrLevR;
} // for ch
return 0; // no error
......
......@@ -24,6 +24,7 @@ private:
// member variables
unsigned m_avgAbsHpPrev[USAC_MAX_NUM_CHANNELS];
unsigned m_maxAbsHpPrev[USAC_MAX_NUM_CHANNELS];
int32_t m_maxHfLevPrev[USAC_MAX_NUM_CHANNELS];
unsigned m_maxIdxHpPrev[USAC_MAX_NUM_CHANNELS];
unsigned m_pitchLagPrev[USAC_MAX_NUM_CHANNELS];
uint32_t m_tempAnaStats[USAC_MAX_NUM_CHANNELS];
......