FEC前向糾錯,卷積編碼之維特比譯碼


因為要學習做WCDMA的流程解析,需要先提取卷積數據,首先就要做FEC卷積譯碼。
於是網上翻了好大一圈,特地學習了下viterbi譯碼算法,費很大力氣才湊齊能夠正確跑起來的代碼,特記錄一下。

說點題外話:viterbi是個人,全名Andrew J. Viterbi,一枚數學家,美國高通公司的創始人之一(沒錯,就是現在手機上那個高通芯片的高通),現代無線數字通訊工程的奠基人。
此外,viterbi算法不但可用來做卷積譯碼,另外一個廣泛的應用是做股票證券量化預測計算(隱馬模型預測)。沒錯,就是高頻量化交易炒股的算法基礎。美國量化對沖基金的傳奇,大獎章基金背后的老板James Simons,首創將隱馬模型預測用於炒股,也是一枚數學家。
兩枚玩數學算法的巨富,誰說學數學算法沒用的?

FEC前向糾錯譯碼用的viterbi算法代碼摘抄自libfec,里面的實現很全面,還有mmx,sse加速版。
測試是做WCDMA,所以解交織和譯碼用的3GPP WCDMA的卷積編碼參數(一幀編碼長度262bits,卷積編碼多項式是 r=1/2 k=9),我就只摘取了需要的viterbi29部分,有需要全部實現的同學可以自行去下載libfec。
下面是寫來用於測試驗證算法正確性的線程代碼。從IQ文件中一次讀取2個float數,讀取262+8(一幀viterbi譯碼長度)個數據后進行譯碼,若譯碼成功程序停在對應的測試斷點上。

UINT CALLBACK ProcessIQDataThread(void *p_param)
{
	Convolution2Viterbi_t *p_sys = (Convolution2Viterbi_t *)p_param;
	vector<double> *p_data = NULL;

	int framebits = 262;
	struct v29 *vp;
	vector<float> uninter1;
	vector<float> uninter2;
	vector<float> radioFrame;
	vector<float> frameL;
	vector<float> frameR;
	vector<uint8_t> viterbi_in;
	uint8_t viterbi_out[33];

	uninter2.resize((framebits + 8));
	uninter1.resize((framebits + 8) * 2);
	viterbi_in.resize((framebits + 8) * 2);

	if ((vp = (struct v29 *)create_viterbi29_port(framebits)) == NULL) {
		return -1;
	}	

	while (!p_sys->exit_thread)
	{
		if (!p_data)
		{
			WaitForSingleObject(p_sys->iq_data_event, 200);

			p_sys->iq_data_list.Lock();
			p_data = p_sys->iq_data_list.GetUse();
			p_sys->iq_data_list.UnLock();
		}
		if (!p_data)
			continue;

		// 讀取2個浮點數
		for (int i = 0; i < p_data->size() / 2; i++)
		{
			frameL.push_back(p_data->at(0));
			frameR.push_back(p_data->at(1));
		}

		// 一幀譯碼長度
		if (frameL.size() >= (framebits + 8))
		{
			// 解交織inter2
			deInterleavingNP(30, inter2Perm, frameL, uninter2);
			radioFrame.insert(radioFrame.end(), uninter2.begin(), uninter2.end());

			deInterleavingNP(30, inter2Perm, frameR, uninter2);
			radioFrame.insert(radioFrame.end(), uninter2.begin(), uninter2.end());

			if (radioFrame.size() >= (framebits + 8) * 2)
			{
				// 解交織inter1
				deInterleavingNP(inter1Columns[1], inter1Perm[1], radioFrame, uninter1);
				radioFrame.clear();
			
				// 使用Viterbi算法做FEC譯碼
				for (int i = 0; i < (framebits + 8) * 2; i++)
				{
					viterbi_in.push_back( ((int)uninter1[i] >> 24) );
				}

				init_viterbi29_port(vp, 0);
				update_viterbi29_blk_port(vp, viterbi_in.data(), framebits + 8);
				chainback_viterbi29_port(vp, viterbi_out, framebits, 0);

#ifdef _DEBUG
				// 數據驗證
				for (int i = 0; i < 33 - 4; i++) {
					if (viterbi_out[i] == 0x98 && viterbi_out[i + 1] == 0x54 && viterbi_out[i + 2] == 0xA0 && viterbi_out[i + 3] == 0x00)
						int bbb = 0;
					if (viterbi_out[i] == 0x05 && viterbi_out[i + 1] == 0x33 && viterbi_out[i + 2] == 0x00 && viterbi_out[i + 3] == 0x0c)
						int bbb = 0;
					if (viterbi_out[i] == 0x00 && viterbi_out[i + 1] == 0x03 && viterbi_out[i + 2] == 0xf4 && viterbi_out[i + 3] == 0x40)
						int bbb = 0;
					if (viterbi_out[i] == 0xc5 && viterbi_out[i + 1] == 0x80 && viterbi_out[i + 2] == 0x05 && viterbi_out[i + 3] == 0x5a)
						int bbb = 0;
					if (viterbi_out[i] == 0xe4 && viterbi_out[i + 1] == 0x00 && viterbi_out[i + 2] == 0x33 && viterbi_out[i + 3] == 0xe6)
						int bbb = 0;
					if (viterbi_out[i] == 0x72 && viterbi_out[i + 1] == 0x08 && viterbi_out[i + 2] == 0x38)
						int bbb = 0;
					if (viterbi_out[i] == 0xe2 && viterbi_out[i + 1] == 0x00 && viterbi_out[i + 2] == 0x38)
						int bbb = 0;
				}
#endif
			}

			frameL.clear();
			frameR.clear();
		}

		p_sys->iq_data_list.Lock();
		p_sys->iq_data_list.PushEmpty(p_data);
		p_data = p_sys->iq_data_list.GetUse();
		p_sys->iq_data_list.UnLock();
	}

	return 0;
}

解交織 Interleaving.h

#pragma once

#include <vector>
#include <assert.h>

const int inter1Columns[4] = {
		1, // TTI = 10ms
		2, // TTI = 20ms
		4, // TTI = 40ms
		8 // TTI = 80ms
};

// 25.212 4.2.5.2 table 4: Inter-Column permutation pattern for 1st interleaving:
const char inter1Perm[4][8] = {
		{0}, // TTI = 10ms
		{0, 1}, // TTI = 20ms
		{0, 2, 1, 3}, // TTI = 40ms
		{0, 4, 2, 6, 1, 5, 3, 7} // TTI = 80ms
};

// 25.212 4.2.11 table 7: Inter-Column permutation pattern for 2nd interleaving:
const char inter2Perm[30] = { 0,20,10,5,15,25,3,13,23,8,18,28,1,11,21,
		6,16,26,4,14,24,19,9,29,12,2,7,22,27,17 };

/* FIRST steps two pointers through a mapping, one pointer into the interleaved
 * data and the other through the uninterleaved data.  The fifth argument, COPY,
 * determines whether the copy is from interleaved to uninterleaved, or back.
 * FIRST assumes no padding is necessary.
 * The reason for the define is to minimize the cost of parameterization and
 * function calls, as this is meant for L1 code, while also minimizing the
 * duplication of code.
 */

#define FIRST(UNINTERLEAVED,UNINTERLEAVEDP,INTERLEAVED,INTERLEAVEDP,COPY) \
		assert(UNINTERLEAVED.size() == INTERLEAVED.size()); \
		unsigned int rows = UNINTERLEAVED.size() / columns; \
		assert(rows * columns == UNINTERLEAVED.size()); \
		const char *colp = permutation; \
		float *INTERLEAVEDP = &INTERLEAVED[0]; \
		for (unsigned i = 0; i < columns; i++) { \
			float *UNINTERLEAVEDP = &UNINTERLEAVED[*colp++]; \
			for (unsigned int j = 0; j < rows; j++) { \
				COPY; \
				UNINTERLEAVEDP += columns; \
			} \
		}

 /** interleaving with No Padding */
void interleavingNP(const unsigned int columns, const char *permutation, vector<float> &in, vector<float> &out)
{
	FIRST(in, inp, out, outp, *outp++ = *inp)
}

/** de-interleaving with No Padding */
void deInterleavingNP(const unsigned int columns, const char *permutation, vector<float> &in, vector<float> &out)
{
	FIRST(out, outp, in, inp, *outp = *inp++)
}

/* SECOND steps two pointers through a mapping, one pointer into the interleaved
 * data and the other through the uninterleaved data.  The fifth argument, COPY,
 * determines whether the copy is from interleaved to uninterleaved, or back.
 * SECOND pads if necessary.
 * The reason for the define is to minimize the cost of parameterization and
 * function calls, as this is meant for L1 code, while also minimizing the
 * duplication of code.
 */

#define SECOND(UNINTERLEAVED,UNINTERLEAVEDP,INTERLEAVED,INTERLEAVEDP,COPY) \
		assert(UNINTERLEAVED.size() == INTERLEAVED.size()); \
		int R2 = (UNINTERLEAVED.size() + columns - 1) / columns; \
		int padding = columns * R2 - UNINTERLEAVED.size(); \
		int rows = R2; \
		int firstPaddedColumn = columns - padding; \
		const char *colp = permutation; \
		float *UNINTERLEAVEDP = &UNINTERLEAVED[0]; \
		for (int i = 0; i < columns; i++) { \
			int trows = rows - (*colp >= firstPaddedColumn); \
			float *INTERLEAVEDP = &INTERLEAVED[*colp++]; \
			for (int j = 0; j < trows; j++) { \
				COPY; \
				INTERLEAVEDP += columns; \
			} \
		}

 /** interleaving With Padding */
void interleavingWP(const int columns, const char *permutation, vector<float> &in, vector<float> &out)
{
	SECOND(in, inp, out, outp, *outp = *inp++)
}

/** de-interleaving With Padding */
void deInterleavingWP(const int columns, const char *permutation, vector<float> &in, vector<float> &out)
{
	SECOND(out, outp, in, inp, *outp++ = *inp)
}

/*
Determining the constants.

From the standard we know:
 * All frame sizes for the BCH.
  * transport block is 246 bits
  * there are two radio frames, 270 bits each
  * TTI is 20 ms
  * SF is 256
  * parity word Li is 16 bits
 * For all downlink TrCH except BCH, the radio frame size is  2*38400/SF = 76800/SF.
  * For SF=256 that's 300.
  * For SF=4 that's 19200.
 * The maximum code block size for convulutional coding is 540 bits (25.212 4.2.2.2).
  * That corresponds to a radio frame size of 1080 bits, or a spreading factor of 71,
	meaning that the smallest spreading factor that can be used is 128.
  * 76800/128 = 600 coded bits -> roughly 300 data bits.
  * That corresponds to an input rate of roughly 30 kb/s at.
 * The maximum code block size for turbo coding is 5114 bits (25.212 4.2.2.2).
  * That corresponds to a radio frame size of 15342 bits, or a spreading factor of 5,
	meaning that the smallest spreading factor that can be used is 8.
  * 76800/8 = 9600 coded bits -> roughly 3200 data bits.
  * That corresponds to an input rate of roughly 320 kb/s.

OK - SO HOW DO YOU GET HIGHER RATES?? HOW CAN YOU USE SF=4??
A: Use the full 5114-but code block and then expand it with rate-matching.
You still can't get the full ~640 kb/s implied by SF=4, but you get to ~500 kb/s.

(pat) A: They considered this problem.  See 25.212 4.2.2.2 Code block segmentation.
In Layer1, after transport block concatenation, you then simply chop the result up
into the largest pieces that can go through the encoder, then put them back together after.

From "higher layers" we are given:
 * SF: 4, 8, 16, 32, 64, 128, 256.
 * P: 24, 16, 8, 0 bits
 * TTI: 10, 20, 40 ms.

To simplify things, we set:
 * TTI 10 ms always on DCH and FACH, 20 ms on PCH and BCH
 * BCH and PCH are always rate-1/2 convolutional code
 * DCH and FACH are always rate-1/3 turbo code
 * no rate-matching, no puncturing
 * parity word is always 16 bits
 * So the only parameter than changes is spreading factor.

 * We will only support 15-slot (non-compressed) slot formats.


From our simplifications we also know:
  * For non-BCH/PCH TrCH there is one radio frame,
	76800/SF channel (coded) bits, per transport block.
  * DCH and FACH always use rate-1/3 turbo code,
	which has 12 terminating bits in the output.
  * For DCH and FACH, the transport block size is
	((76800/SF - 12)/3) - P = (25600/SF) - 4 - P data bits,
	where P is the parity word size.
   * Fix P=16 for simplicity and transport block size is (25600/SF) - 20.
   * for SF=256, that's 80 bits.
   * for SF=16, that's 1580 bits.
   * for SF=8, that's 3180 bits.

 * For PCH there is one radio frame,
	76800/SF channel (coded) bits, per transport block.
 * SF=64, for that's 1200 channel bits.
 * It's a rate-1/2 conv code, so that's 1200/2 - 8 - P data bits.
 * P=16 so that's 1200/2 - 24 = 576 transport bits.  Really?

*/
#if 0

const int inter1Columns[] = { 1, 2, 4, 8 };

const char inter1Perm[4][8] = {
	{0},
	{0, 1},
	{0, 2, 1, 3},
	{0, 4, 2, 6, 1, 5, 3, 7}
};

const char inter2Perm[] = {
	0, 20, 10, 5, 15, 25, 3, 13, 23, 8, 18, 28, 1, 11, 21,
	6, 16, 26, 4, 14, 24, 19, 9, 29, 12, 2, 7, 22, 27, 17
};

vector<char> randomBitVector(int n)
{
	vector<char> t(n);
	for (int i = 0; i < n; i++) t[i] = random() % 2;
	return t;
}

void testInterleavings()
{
	int lth1 = 48;
	int C2 = 30;
	for (int i = 0; i < 4; i++) {
		vector<char> v1 = randomBitVector(lth1);
		vector<char> v2(lth1);
		vector<char> v3(lth1);
		v1.interleavingNP(inter1Columns[i], inter1Perm[i], v2);
		v2.deInterleavingNP(inter1Columns[i], inter1Perm[i], v3);
		cout << "first " << i << " " << (veq(v1, v3) ? "ok" : "fail") << endl;
	}
	for (int lth2 = 90; lth2 < 120; lth2++) {
		vector<char> v1 = randomBitVector(lth2);
		vector<char> v2(lth2);
		vector<char> v3(lth2);
		v1.interleavingWP(C2, inter2Perm, v2);
		v2.deInterleavingWP(C2, inter2Perm, v3);
		cout << "second " << lth2 << " " << (veq(v1, v3) ? "ok" : "fail") << endl;
	}
	for (int lth = 48; lth <= 4800; lth *= 10) {
		TurboInterleaver er(lth);
		cout << "Turbo Interleaver permutation(" << lth << ") " << (permutationCheck(er.permutation()) ? "ok" : "fail") << endl;
		vector<char> er1 = randomBitVector(lth);
		vector<char> er2(lth);
		er.interleave(er1, er2);
		vector<char> er3(lth);
		er.unInterleave(er2, er3);
		cout << "Turbo Interleaver(" << lth << ") " << (veq(er1.sliced(), er3) ? "ok" : "fail") << endl;
	}
}

#endif

viterbi譯碼

partab實現 partab.c

/* Utility routines for FEC support
 * Copyright 2004, Phil Karn, KA9Q
 */

#include <stdio.h>  
#include "fec.h"  

unsigned char Partab[256] = {0};
int P_init = 0;

/* Create 256-entry odd-parity lookup table
 * Needed only on non-ia32 machines
 */
void partab_init(void) {
	int i, cnt, ti;

	/* Initialize parity lookup table */
	for (i = 0; i < 256; i++) {
		cnt = 0;
		ti = i;
		while (ti) {
			if (ti & 1)
				cnt++;
			ti >>= 1;
		}
		Partab[i] = cnt & 1;
	}
	P_init = 1;
}

/* Lookup table giving count of 1 bits for integers 0-255 */
int Bitcnt[] = {
 0, 1, 1, 2, 1, 2, 2, 3,
 1, 2, 2, 3, 2, 3, 3, 4,
 1, 2, 2, 3, 2, 3, 3, 4,
 2, 3, 3, 4, 3, 4, 4, 5,
 1, 2, 2, 3, 2, 3, 3, 4,
 2, 3, 3, 4, 3, 4, 4, 5,
 2, 3, 3, 4, 3, 4, 4, 5,
 3, 4, 4, 5, 4, 5, 5, 6,
 1, 2, 2, 3, 2, 3, 3, 4,
 2, 3, 3, 4, 3, 4, 4, 5,
 2, 3, 3, 4, 3, 4, 4, 5,
 3, 4, 4, 5, 4, 5, 5, 6,
 2, 3, 3, 4, 3, 4, 4, 5,
 3, 4, 4, 5, 4, 5, 5, 6,
 3, 4, 4, 5, 4, 5, 5, 6,
 4, 5, 5, 6, 5, 6, 6, 7,
 1, 2, 2, 3, 2, 3, 3, 4,
 2, 3, 3, 4, 3, 4, 4, 5,
 2, 3, 3, 4, 3, 4, 4, 5,
 3, 4, 4, 5, 4, 5, 5, 6,
 2, 3, 3, 4, 3, 4, 4, 5,
 3, 4, 4, 5, 4, 5, 5, 6,
 3, 4, 4, 5, 4, 5, 5, 6,
 4, 5, 5, 6, 5, 6, 6, 7,
 2, 3, 3, 4, 3, 4, 4, 5,
 3, 4, 4, 5, 4, 5, 5, 6,
 3, 4, 4, 5, 4, 5, 5, 6,
 4, 5, 5, 6, 5, 6, 6, 7,
 3, 4, 4, 5, 4, 5, 5, 6,
 4, 5, 5, 6, 5, 6, 6, 7,
 4, 5, 5, 6, 5, 6, 6, 7,
 5, 6, 6, 7, 6, 7, 7, 8,
};

fec.h

/* User include file for libfec
 * Copyright 2004, Phil Karn, KA9Q
 * May be used under the terms of the GNU Lesser General Public License (LGPL)
 */

#ifndef _FEC_H_
#define _FEC_H_

#ifdef __cplusplus
extern "C" {
#endif

/* r=1/2 k=9 convolutional encoder polynomials */
#define	V29POLYA	0x1af
#define	V29POLYB	0x11d

void *create_viterbi29_port(int len);
void set_viterbi29_polynomial_port(int polys[2]);
int init_viterbi29_port(void *p, int starting_state);
int chainback_viterbi29_port(void *p, unsigned char *data, unsigned int nbits, unsigned int endstate);
void delete_viterbi29_port(void *p);
int update_viterbi29_blk_port(void *p, unsigned char *syms, int nbits);

void partab_init();

static inline int parityb(unsigned char x) {
	extern unsigned char Partab[256];
	extern int P_init;
	if (!P_init) {
		partab_init();
	}
	return Partab[x];
}

static inline int parity(int x) {
	/* Fold down to one byte */
	x ^= (x >> 16);
	x ^= (x >> 8);
	return parityb(x);
}

#ifdef __cplusplus
}
#endif

#endif /* _FEC_H_ */

viterbi29_port.c

/* K=9 r=1/2 Viterbi decoder in portable C
 * Copyright Feb 2004, Phil Karn, KA9Q
 * May be used under the terms of the GNU Lesser General Public License (LGPL)
 */
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "fec.h"

typedef union { unsigned int w[256]; } metric_t;
typedef union { unsigned long w[8]; } decision_t;

static union { unsigned char c[128]; } Branchtab29[2];
static int Init = 0;

/* State info for instance of Viterbi decoder */
struct v29 {
	metric_t metrics1; /* path metric buffer 1 */
	metric_t metrics2; /* path metric buffer 2 */
	decision_t* dp;          /* Pointer to current decision */
	metric_t* old_metrics, * new_metrics; /* Pointers to path metrics, swapped on every bit */
	decision_t* decisions;   /* Beginning of decisions for block */
};

/* Initialize Viterbi decoder for start of new frame */
int init_viterbi29_port(void* p, int starting_state) {
	struct v29* vp = p;
	int i;

	if (p == NULL)
		return -1;
	for (i = 0; i < 256; i++)
		vp->metrics1.w[i] = 63;

	vp->old_metrics = &vp->metrics1;
	vp->new_metrics = &vp->metrics2;
	vp->dp = vp->decisions;
	vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */
	return 0;
}

void set_viterbi29_polynomial_port(int polys[2]) {
	int state;

	for (state = 0; state < 128; state++) {
		Branchtab29[0].c[state] = (polys[0] < 0) ^ parity((2 * state) & abs(polys[0])) ? 255 : 0;
		Branchtab29[1].c[state] = (polys[1] < 0) ^ parity((2 * state) & abs(polys[1])) ? 255 : 0;
	}
	Init++;
}

/* Create a new instance of a Viterbi decoder */
void* create_viterbi29_port(int len) {
	struct v29* vp;

	if (!Init) {
		int polys[2] = { V29POLYA,V29POLYB };
		set_viterbi29_polynomial_port(polys);
	}
	if ((vp = (struct v29*)malloc(sizeof(struct v29))) == NULL)
		return NULL;

	if ((vp->decisions = (decision_t*)malloc((len + 8) * sizeof(decision_t))) == NULL) {
		free(vp);
		return NULL;
	}
	init_viterbi29_port(vp, 0);

	return vp;
}

/* Viterbi chainback */
int chainback_viterbi29_port(
	void* p,
	unsigned char* data,	/* Decoded output data */
	unsigned int nbits,		/* Number of data bits */
	unsigned int endstate)  /* Terminal encoder state */
{
	struct v29* vp = p;
	decision_t* d;

	if (p == NULL)
		return -1;

	d = vp->decisions;
	/* Make room beyond the end of the encoder register so we can
	 * accumulate a full byte of decoded data
	 */
	endstate %= 256;

	/* The store into data[] only needs to be done every 8 bits.
	 * But this avoids a conditional branch, and the writes will
	 * combine in the cache anyway
	 */
	d += 8; /* Look past tail */
	while (nbits-- != 0) {
		int k;

		k = (d[nbits].w[(endstate) / 32] >> (endstate % 32)) & 1;
		data[nbits >> 3] = endstate = (endstate >> 1) | (k << 7);
	}
	return 0;
}

/* Delete instance of a Viterbi decoder */
void delete_viterbi29_port(void* p) {
	struct v29* vp = p;

	if (vp != NULL) {
		free(vp->decisions);
		free(vp);
	}
}

/* C-language butterfly */
#define BFLY(i) {\
unsigned int metric,m0,m1,decision;\
    metric = (Branchtab29[0].c[i] ^ sym0) + (Branchtab29[1].c[i] ^ sym1);\
    m0 = vp->old_metrics->w[i] + metric;\
    m1 = vp->old_metrics->w[i+128] + (510 - metric);\
    decision = (signed int)(m0-m1) > 0;\
    vp->new_metrics->w[2*i] = decision ? m1 : m0;\
    d->w[i/16] |= decision << ((2*i)&31);\
    m0 -= (metric+metric-510);\
    m1 += (metric+metric-510);\
    decision = (signed int)(m0-m1) > 0;\
    vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\
    d->w[i/16] |= decision << ((2*i+1)&31);\
}

/* Update decoder with a block of demodulated symbols
 * Note that nbits is the number of decoded data bits, not the number
 * of symbols!
 */
int update_viterbi29_blk_port(void* p, unsigned char* syms, int nbits) {
	struct v29* vp = p;
	decision_t* d;

	if (p == NULL)
		return -1;

	d = (decision_t*)vp->dp;
	while (nbits--) {
		void* tmp;
		unsigned char sym0, sym1;
		int i;

		for (i = 0; i < 8; i++)
			d->w[i] = 0;
		sym0 = *syms++;
		sym1 = *syms++;

		for (i = 0; i < 128; i++)
			BFLY(i);

		d++;
		tmp = vp->old_metrics;
		vp->old_metrics = vp->new_metrics;
		vp->new_metrics = tmp;
	}
	vp->dp = d;
	return 0;
}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM