[C++知识库] LZW编码解码 C++

开发: C++知识库 Java知识库 JavaScript Python PHP知识库人工智能区块链大数据移动开发嵌入式开发工具数据结构与算法开发测试游戏开发网络协议系统运维
教程: HTML教程 CSS教程 JavaScript教程 Go语言教程 JQuery教程 VUE教程 VUE3教程 Bootstrap教程 SQL数据库教程 C语言教程 C++教程 Java教程 Python教程 Python3教程 C#教程
数码: 电脑笔记本显卡显示器固态硬盘硬盘耳机手机 iphone vivo oppo 小米华为单反装机图拉丁

-> C++知识库 -> LZW编码解码 C++ -> 正文阅读

[C++知识库]LZW编码解码 C++

为了给单片机的一些常量数据压缩下，找到了简单的lzw压缩算法

网上找了一个源码，发现没达到最优的压缩，又改了下，

主要是bit数据的存放统一用16位，太浪费，变为动态调整

测试代码如下

    uint8_t org[]={
        //'q','q','q','q','q','q','q','w','c'
        //'O','n','e','d','a','y'
		//'X','y','m','y','m','o','t'
		'A','B','R','A','C','A','D','A','B','R','A','B','R','A','B','R','A'
        ,0x00
        };
    //为方便测试，这里只测试了字母，用0x00结尾。
    //其实可以是任意16进制数，数据结尾自定义
    uint8_t* c=org;
	int orgSize=0;
	while(*c){
		orgSize++;
		c++;
	}
	printf("orgSize:%d\r\n",orgSize);

    //把org[]编码压缩到endatas[]中
    uint8_t endatas[300];
    Biter bitPuter(endatas);

    LzwCode lzwCode(200);
    lzwCode.Encode(org,orgSize,&bitPuter);
    
    Serial.printf("encode data:\r\n");
    for(int i=0;i<bitPuter.Size();i++){
		Serial.printf("%02x ",endatas[i]);
	}
    Serial.printf("\r\n");
    
    Biter bitGeter(endatas,bitPuter.Size());
    //把endatas[]解码到dedatas[]中
    uint8_t dedatas[300];
    int size=lzwCode.Decode(&bitGeter,dedatas);
    Serial.printf("code data:%d\r\n",size);
    for(int i=0;i<size;i++){
		Serial.printf("%c",dedatas[i]);
	}
    Serial.printf("\r\n");

源文件如下：

编解码代码 lzw.h?lzw.cpp

#ifndef _MY_LZW_H
#define _MY_LZW_H

#include <Arduino.h>
#include "Biter.h"

class DictItem {
  public:
    //0x00-0xff为【基本元素】，不超过8位
    uint8_t suffix;

    int parent;
    int firstchild;
    int nextsibling;
  private:
};

class LzwCode {
  public:
    LzwCode(uint16_t dsize0);
    ~LzwCode();

    void Init(uint16_t dsize0);
    void Dispose();

    void Encode(uint8_t* fp,int size, Biter *bf);
    int Decode(Biter *bf,uint8_t *fp);

    void PrintDictionary();

  private:
    DictItem* dictionary=0;
    uint16_t dictSize;
    //下一个字典的编码
    uint16_t next_code;

    uint8_t bitw;//读取、写入的bit位数
    uint32_t bitm;//掩码

    void InitDictionary();
    int DecodeString(int start, int code,uint8_t* list);
    int InDictionary(int character, int string_code);
    void AddToDictionary(int character, int string_code,uint8_t decode=0);
};

#endif

#include "lzw.h"

LzwCode::LzwCode(uint16_t dsize0){
    Init(dsize0);
}

LzwCode::~LzwCode(){
    Dispose();
}

/**
 * @brief 初始化LZW
 * 
 * @param {dsize0} 基本表外的字典大小
 **/
void LzwCode::Init(uint16_t dsize0){
    Dispose();

	dictSize=dsize0+256;
    dictionary=new DictItem[dictSize];
}

void LzwCode::Dispose(){
    if(dictionary){
        delete[] dictionary;
        dictionary=0;
    }
}

/**
 * @brief 初始化 基础压缩表
 **/
void LzwCode::InitDictionary(){
    //以0x00-0xff为【基本元素】，所以有256个元素
	for(int i=0; i<256; i++){
		dictionary[i].suffix = i;
		dictionary[i].parent = -1;
		dictionary[i].firstchild = -1;
		dictionary[i].nextsibling = i+1;
	}
	dictionary[255].nextsibling = -1;

	next_code = 256;
}

/**
 * @brief 打印基本表之外的扩展压缩表
 **/
void LzwCode::PrintDictionary(){
	int n;
	int count;
	//存放扩展元素（每个是基本元素），
	//单个扩展元素包含的基本元素不超过50个
	uint8_t list[50];

	Serial.printf("dict:\r\n");
	for(n=256; n<next_code; n++){
		Serial.printf("%4d->", n);
		count = DecodeString(0, n, list);
		while(count-- > 0)
			Serial.printf("%c", (char)(list[count]));
		Serial.printf( "\r\n");
	}
}

/**
 * @brief 获取扩展元素的所有 基本元素
 * 
 * @param {start} list起始存放位置
 * @param {code} 基节点
 **/
int LzwCode::DecodeString(int start, int code,uint8_t* list){
	int index = start;

	while(code>=0){
		list[index] = dictionary[code].suffix;
		code = dictionary[code].parent;
		index++;
	}
	return index;
}

/**
 * @brief 判断{string_code}+{character}是否在字典
 * 		判断{编码对应的元素}+{当前元素}是否在字典
 * 
 * @param {character} 当前字符
 * @param {string_code} 上一次的字典编码
 * 
 * @return 未找到：-1  找到：字典编码
 **/
int LzwCode::InDictionary(int character, int string_code){
	int sibling;//下一个
	if(string_code<0)
        return character;

	sibling = dictionary[string_code].firstchild;
	while(sibling>-1){
		if(character == dictionary[sibling].suffix)
            return sibling;
		sibling = dictionary[sibling].nextsibling;
	}
	return -1;
}

/**
 * @brief {string_code}+{character}加入字典
 * 		{编码对应的元素}+{当前元素}加入字典
 * 
 * @param {character} 当前字符
 * @param {string_code} 上一次的字典编码
 * @param {decode} 是否处于解码操作
 **/
void LzwCode::AddToDictionary(int character, int string_code,uint8_t decode){
	int firstsibling, nextsibling;

	if(decode){
		if((next_code&bitm)!=0){
			bitm<<=1;
			bitw++;
			Serial.printf("bitw %d\r\n", bitw);
		}
	}

	if(0>string_code)
        return;

	if(next_code>=dictSize){
		Serial.printf("Dictionary overflow!!!!\r\n");
		return;
	}

	dictionary[next_code].suffix = character;
	dictionary[next_code].parent = string_code;
	dictionary[next_code].nextsibling = -1;
	dictionary[next_code].firstchild = -1;
	firstsibling = dictionary[string_code].firstchild;
	if(-1<firstsibling){	// the parent has child
		nextsibling = firstsibling;
		while( -1<dictionary[nextsibling].nextsibling ) 
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}else{// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}

	if(!decode){
		if((next_code&bitm)!=0){
			bitm<<=1;
			bitw++;
			Serial.printf("bitw %d\r\n", bitw);
		}
	}

	next_code++;
}

/**
 * @brief 编码、压缩
 * 
 * @param {datas} 要压缩的数据
 * @param {dsize} 要压缩数据的大小
 * @param {bp} 压缩后的数据
 **/
void LzwCode::Encode(uint8_t* datas,int dsize, Biter *bp){
	int character;
	int string_code;
	int index;
	int n=0;

	bitw=8;
	bitm=1<<8;

    bp->Put(dsize,3*8);
	
	InitDictionary();

	string_code = -1;
	while(n<dsize){
		character=datas[n++];
		
		//Serial.printf("%d\r\n", character);
		index = InDictionary(character, string_code);
		if(0<=index){
            // 在字典中
			string_code = index;
		}else{
            // 不在字典中
            bp->Put(string_code,bitw);

            AddToDictionary(character, string_code);
			
			string_code = character;
		}
	}
    bp->Put(string_code,bitw);
	bp->Flush();
}

/**
 * @brief 解码、解压缩
 * 
 * @param {bf} 压缩的数据
 * @param {fp} 解压的数据
 **/
int LzwCode::Decode(Biter *bf,uint8_t *fp){
	int character;
	int new_code, last_code;
	int phrase_length;

	//存放扩展元素（每个是基本元素），
	//单个扩展元素包含的基本元素不超过50个
	uint8_t list[50];
	
	int n=0;
	bitw=8;
	bitm=1<<8;

	int size=bf->Get(3*8);
	int orgsize=size;
	Serial.printf("size:%d\r\n", size);
	
	InitDictionary();
	last_code = -1;
	while (size>0) {
		new_code = (int)bf->Get(bitw);
		if (new_code >= next_code) { // this is the case CSCSC( not in dict)
			list[0] = character;
			phrase_length = DecodeString(1, last_code,list);
		} else {
			phrase_length = DecodeString(0, new_code,list);
		}
		character = list[phrase_length - 1];
		while (0 < phrase_length) {
			phrase_length--;
			fp[n++]=list[phrase_length];
			size--;
		}

        AddToDictionary(character, last_code,1);
            
		last_code = new_code;
	}

	return orgsize;
}

比特操作代码??Biter.h?Biter.cpp

#ifndef _MY_BITER_H
#define _MY_BITER_H

#include <Arduino.h>

class Biter {
  public:
    Biter(uint8_t* datas0,uint32_t datasize0=0);
    void Init(uint8_t* datas0,uint32_t datasize0=0);

    uint32_t Get(int bitWidth);
    void Put(uint32_t data, int bitWidth);
    void Flush();
    int Size();

  private:
    uint8_t* datas;
	  uint32_t datasize;

    uint32_t index;//datas的存放位置
    uint8_t mask;//当前bit存放的位置

    uint8_t cache;//写入的缓存，满8位存入datas、或一次读取8位
    
    uint8_t GetBit();
    void PutBit(uint8_t bit);
};

#endif

#include "Biter.h"

Biter::Biter(uint8_t *datas0, uint32_t datasize0)
{
	Init(datas0, datasize0);
}

void Biter::Init(uint8_t *datas0, uint32_t datasize0)
{
	this->datas = datas0;
	this->datasize = datasize0;
	this->index = 0;
	this->mask = 0x80;
	this->cache = 0;
}

/*******************Get*************************/

/**
 * @brief 获取1bit数据
 *
 * @return 0或1
 **/
uint8_t Biter::GetBit()
{
	if (this->mask == 0x80)
	{
		if (this->index == this->datasize)
		{
			return 0;
		}
		this->cache = this->datas[this->index++];
	}

	uint8_t value = this->mask & this->cache;

	if (this->mask == 0x01)
		this->mask = 0x80;
	else
		this->mask >>= 1;

	return value ? 1 : 0;
}

/**
 * @brief 获取n比特数据
 *
 * @param {bitWidth} 低bitWidth位
 **/
uint32_t Biter::Get(int bitWidth)
{
	uint32_t maskv;
	uint32_t value;

	maskv = 1L << (bitWidth - 1);
	value = 0L;
	while (maskv != 0)
	{
		if (GetBit())
			value |= maskv;
		maskv >>= 1;
	}

	return value;
}

/*******************Set*************************/

/**
 * @brief 放入1bit数据
 *
 * @param {bit} 0或1
 **/
void Biter::PutBit(uint8_t bit)
{
	// 1则写入，0不变
	if (bit != 0)
		this->cache |= this->mask;

	this->mask >>= 1;
	if (this->mask == 0)
	{
		// 满8位存入
		this->datas[this->index++] = this->cache;
		this->cache = 0;
		this->mask = 0x80;
	}
}

/**
 * @brief 放入n比特数据
 *
 * @param {data} 数据
 * @param {bitWidth} 低bitWidth位
 **/
void Biter::Put(uint32_t data, int bitWidth)
{
	uint32_t maskv;

	maskv = 1L << (bitWidth - 1);
	while (maskv != 0)
	{
		PutBit((data & maskv) ? 1 : 0);

		maskv >>= 1;
	}
}

void Biter::Flush()
{
	// 如果填充没完成，把剩余的放入数据
	if (this->mask == 0x80)
	{
		this->datas[this->index++] = this->cache;
	}
}

/**
 * @brief 获取存入数据的字节大小
 **/
int Biter::Size()
{
	return this->index;
}