winamp/Src/external_dependencies/openmpt-trunk/include/ancient/src/BZIP2Decompressor.cpp

/* Copyright (C) Teemu Suutari */

#include <cstdint>
#include <cstring>

#include "BZIP2Decompressor.hpp"
#include "HuffmanDecoder.hpp"
#include "InputStream.hpp"
#include "OutputStream.hpp"
#include "common/MemoryBuffer.hpp"
#include "common/CRC32.hpp"
#include "common/Common.hpp"


namespace ancient::internal
{

bool BZIP2Decompressor::detectHeader(uint32_t hdr) noexcept
{
	return ((hdr&0xffff'ff00U)==FourCC("BZh\0") && (hdr&0xffU)>='1' && (hdr&0xffU)<='9');
}

bool BZIP2Decompressor::detectHeaderXPK(uint32_t hdr) noexcept
{
	return (hdr==FourCC("BZP2"));
}

std::shared_ptr<Decompressor> BZIP2Decompressor::create(const Buffer &packedData,bool exactSizeKnown,bool verify)
{
	return std::make_shared<BZIP2Decompressor>(packedData,exactSizeKnown,verify);
}

std::shared_ptr<XPKDecompressor> BZIP2Decompressor::create(uint32_t hdr,uint32_t recursionLevel,const Buffer &packedData,std::shared_ptr<XPKDecompressor::State> &state,bool verify)
{
	return std::make_shared<BZIP2Decompressor>(hdr,recursionLevel,packedData,state,verify);
}

BZIP2Decompressor::BZIP2Decompressor(const Buffer &packedData,bool exactSizeKnown,bool verify) :
	_packedData(packedData),
	_packedSize(0)
{
	uint32_t hdr=packedData.readBE32(0);
	if (!detectHeader(hdr)) throw Decompressor::InvalidFormatError();;
	_blockSize=((hdr&0xffU)-'0')*100'000;
}

BZIP2Decompressor::BZIP2Decompressor(uint32_t hdr,uint32_t recursionLevel,const Buffer &packedData,std::shared_ptr<XPKDecompressor::State> &state,bool verify) :
	XPKDecompressor(recursionLevel),
	_packedData(packedData),
	_packedSize(_packedData.size())
{
	uint32_t blockHdr=packedData.readBE32(0);
	if (!detectHeader(blockHdr)) throw Decompressor::InvalidFormatError();;
	_blockSize=((blockHdr&0xffU)-'0')*100'000;
}

BZIP2Decompressor::~BZIP2Decompressor()
{
	// nothing needed
}

const std::string &BZIP2Decompressor::getName() const noexcept
{
	static std::string name="bz2: bzip2";
	return name;
}

const std::string &BZIP2Decompressor::getSubName() const noexcept
{
	static std::string name="XPK-BZP2: bzip2";
	return name;
}

size_t BZIP2Decompressor::getPackedSize() const noexcept
{
	// no way to know before decompressing
	return _packedSize;
}


size_t BZIP2Decompressor::getRawSize() const noexcept
{
	// same thing, decompression needed first
	return _rawSize;
}

void BZIP2Decompressor::decompressImpl(Buffer &rawData,bool verify)
{
	size_t packedSize=_packedSize?_packedSize:_packedData.size();
	size_t rawSize=_rawSize?_rawSize:rawData.size();

	ForwardInputStream inputStream(_packedData,4,packedSize);
	MSBBitReader<ForwardInputStream> bitReader(inputStream);
	auto readBits=[&](uint32_t count)->uint32_t
	{
		return bitReader.readBits8(count);
	};
	auto readBit=[&]()->uint32_t
	{
		return bitReader.readBits8(1);
	};

	ForwardOutputStream outputStream(rawData,0,rawSize);

	// stream verification
	//
	// there is so much wrong in bzip2 CRC-calculation :(
	// 1. The bit ordering is opposite what everyone else does with CRC32
	// 2. The block CRCs are calculated separately, no way of calculating a complete
	//    CRC without knowing the block layout
	// 3. The CRC is the end of the stream and the stream is bit aligned. You
	//    can't read CRC without decompressing the stream.
	uint32_t crc=0;
	auto calculateBlockCRC=[&](size_t blockPos,size_t blockSize)
	{
		crc=(crc<<1)|(crc>>31);
		crc^=CRC32Rev(rawData,blockPos,blockSize,0);
	};

	HuffmanDecoder<uint8_t> selectorDecoder
	{
		// incomplete Huffman table. errors possible
		HuffmanCode<uint8_t>{1,0b000000,0},
		HuffmanCode<uint8_t>{2,0b000010,1},
		HuffmanCode<uint8_t>{3,0b000110,2},
		HuffmanCode<uint8_t>{4,0b001110,3},
		HuffmanCode<uint8_t>{5,0b011110,4},
		HuffmanCode<uint8_t>{6,0b111110,5}
	};

	HuffmanDecoder<int32_t> deltaDecoder
	{
		HuffmanCode<int32_t>{1,0b00,0},
		HuffmanCode<int32_t>{2,0b10,1},
		HuffmanCode<int32_t>{2,0b11,-1}
	};

	MemoryBuffer tmpBuffer(_blockSize);
	uint8_t *tmpBufferPtr=tmpBuffer.data();

	// This is the dark, ancient secret of bzip2.
	// versions before 0.9.5 had a data randomization for "too regular"
	// data problematic for the bwt-implementation at that time.
	// although it is never utilized anymore, the support is still there
	// And this is exactly the kind of ancient stuff we want to support :)
	//
	// On this specific part (since it is a table of magic numbers)
	// we have no way other than copying it from the original reference

// Table has a separate copyright, lets have it as a separate file as well
#include "BZIP2Table.hpp"

	for (;;)
	{
		uint32_t blockHdrHigh=readBits(32);
		uint32_t blockHdrLow=readBits(16);
		if (blockHdrHigh==0x31415926U && blockHdrLow==0x5359U)
		{
			// a block

			// this is rather spaghetti...
			readBits(32);	// block crc, not interested
			bool randomized=readBit();

			// basically the random inserted is one LSB after n-th bytes
			// per defined in the table.
			uint32_t randomPos=1;
			uint32_t randomCounter=randomTable[0]-1;
			auto randomBit=[&]()->bool
			{
				// Beauty is in the eye of the beholder: this is smallest form to hide the ugliness
				return (!randomCounter--)?randomCounter=randomTable[randomPos++&511]:false;
			};

			uint32_t currentPtr=readBits(24);

			uint32_t currentBlockSize=0;
			{
				uint32_t numHuffmanItems=2;
				uint32_t huffmanValues[256];

				{
					// this is just a little bit inefficient but still we reading bit by bit since
					// reference does it. (bitsream format details do not spill over)
					std::vector<bool> usedMap(16);
					for (uint32_t i=0;i<16;i++) usedMap[i]=readBit();

					std::vector<bool> huffmanMap(256);
					for (uint32_t i=0;i<16;i++)
					{
						for (uint32_t j=0;j<16;j++)
							huffmanMap[i*16+j]=(usedMap[i])?readBit():false;
					}

					for (uint32_t i=0;i<256;i++) if (huffmanMap[i]) numHuffmanItems++;
					if (numHuffmanItems==2) throw DecompressionError();

					for (uint32_t currentValue=0,i=0;i<256;i++)
						if (huffmanMap[i]) huffmanValues[currentValue++]=i;
				}

				uint32_t huffmanGroups=readBits(3);
				if (huffmanGroups<2 || huffmanGroups>6) throw DecompressionError();

				uint32_t selectorsUsed=readBits(15);
				if (!selectorsUsed) throw DecompressionError();

				MemoryBuffer huffmanSelectorList(selectorsUsed);

				auto unMTF=[](uint8_t value,uint8_t map[])->uint8_t
				{
					uint8_t ret=map[value];
					if (value)
					{
						uint8_t tmp=map[value];
						for (uint32_t i=value;i;i--)
							map[i]=map[i-1];
						map[0]=tmp;
					}
					return ret;
				};

				// create Huffman selectors
				uint8_t selectorMTFMap[6]={0,1,2,3,4,5};

				for (uint32_t i=0;i<selectorsUsed;i++)
				{
					uint8_t item=unMTF(selectorDecoder.decode(readBit),selectorMTFMap);
					if (item>=huffmanGroups) throw DecompressionError();
					huffmanSelectorList[i]=item;
				}

				typedef HuffmanDecoder<uint32_t> BZIP2Decoder;
				std::vector<BZIP2Decoder> dataDecoders(huffmanGroups);

				// Create all tables
				for (uint32_t i=0;i<huffmanGroups;i++)
				{
					uint8_t bitLengths[258];

					uint32_t currentBits=readBits(5);
					for (uint32_t j=0;j<numHuffmanItems;j++)
					{
						int32_t delta;
						do
						{
							delta=deltaDecoder.decode(readBit);
							currentBits+=delta;
						} while (delta);
						if (currentBits<1 || currentBits>20) throw DecompressionError();
						bitLengths[j]=currentBits;
					}

					dataDecoders[i].createOrderlyHuffmanTable(bitLengths,numHuffmanItems);
				}

				// Huffman decode + unRLE + unMTF
				BZIP2Decoder *currentHuffmanDecoder=nullptr;
				uint32_t currentHuffmanIndex=0;
				uint8_t dataMTFMap[256];
				for (uint32_t i=0;i<numHuffmanItems-2;i++) dataMTFMap[i]=i;

				uint32_t currentRunLength=0;
				uint32_t currentRLEWeight=1;

				auto decodeRLE=[&]()
				{
					if (currentRunLength)
					{
						if (currentBlockSize+currentRunLength>_blockSize) throw DecompressionError();
						for (uint32_t i=0;i<currentRunLength;i++) tmpBufferPtr[currentBlockSize++]=huffmanValues[dataMTFMap[0]];
					}
					currentRunLength=0;
					currentRLEWeight=1;
				};

				for (uint32_t streamIndex=0;;streamIndex++)
				{
					if (!(streamIndex%50))
					{
						if (currentHuffmanIndex>=selectorsUsed) throw DecompressionError();
						currentHuffmanDecoder=&dataDecoders[huffmanSelectorList[currentHuffmanIndex++]];
					}
					uint32_t symbolMTF=currentHuffmanDecoder->decode(readBit);
					// stop marker is referenced only once, and it is the last one
					// This means we do no have to un-MTF it for detection
					if (symbolMTF==numHuffmanItems-1) break;
					if (currentBlockSize>=_blockSize) throw DecompressionError();
					if (symbolMTF<2)
					{
						currentRunLength+=currentRLEWeight<<symbolMTF;
						currentRLEWeight<<=1;
					} else {
						decodeRLE();
						uint8_t symbol=unMTF(symbolMTF-1,dataMTFMap);
						if (currentBlockSize>=_blockSize) throw DecompressionError();
						tmpBufferPtr[currentBlockSize++]=huffmanValues[symbol];
					}
				}
				decodeRLE();
				if (currentPtr>=currentBlockSize) throw DecompressionError();
			}

			// inverse BWT + final RLE decoding.
			// there are a few dark corners here as well
			// 1. Can the stream end at 4 literals without count? I assume it is a valid optimization (and that this does not spillover to next block)
			// 2. Can the RLE-step include counts 252 to 255 even if reference does not do them? I assume yes here as here as well
			// 3. Can the stream be empty? We do not take issue here about that (that should be culled out earlier already)
			uint32_t sums[256];
			for (uint32_t i=0;i<256;i++) sums[i]=0;

			for (uint32_t i=0;i<currentBlockSize;i++)
			{
				sums[tmpBufferPtr[i]]++;
			}

			uint32_t rank[256];
			for (uint32_t tot=0,i=0;i<256;i++)
			{
				rank[i]=tot;
				tot+=sums[i];
			}

			// not at all happy about the memory consumption, but it simplifies the implementation a lot
			// and by sacrificing 4*size (size as in actual block size) we do not have to have slow search nor another temporary buffer
			// since by calculating forward table we can do forward decoding of the data on the same pass as iBWT
			//
			// also, because I'm lazy
			MemoryBuffer forwardIndex(currentBlockSize*sizeof(uint32_t));
			auto forwardIndexPtr=forwardIndex.cast<uint32_t>();
			for (uint32_t i=0;i<currentBlockSize;i++)
				forwardIndexPtr[rank[tmpBufferPtr[i]]++]=i;

			// output + final RLE decoding
			uint8_t currentCh=0;
			uint32_t currentChCount=0;
			auto outputByte=[&](uint8_t ch)
			{
				if (randomized && randomBit()) ch^=1;
				if (!currentChCount)
				{
					currentCh=ch;
					currentChCount=1;
				} else {
					if (ch==currentCh && currentChCount!=4)
					{
						currentChCount++;
					} else {
						auto outputBlock=[&](uint32_t count)
						{
							for (uint32_t i=0;i<count;i++) outputStream.writeByte(currentCh);
						};

						if (currentChCount==4)
						{
							outputBlock(uint32_t(ch)+4);
							currentChCount=0;
						} else {
							outputBlock(currentChCount);
							currentCh=ch;
							currentChCount=1;
						}
					}
				}
			};

			size_t destOffsetStart=outputStream.getOffset();

			// and now the final iBWT + unRLE is easy...
			for (uint32_t i=0;i<currentBlockSize;i++)
			{
				currentPtr=forwardIndexPtr[currentPtr];
				outputByte(tmpBufferPtr[currentPtr]);
			}
			// cleanup the state, a bit hackish way to do it
			if (currentChCount) outputByte(currentChCount==4?0:~currentCh);

			if (verify)
				calculateBlockCRC(destOffsetStart,outputStream.getOffset()-destOffsetStart);

		} else if (blockHdrHigh==0x17724538U && blockHdrLow==0x5090U) {
			// end of blocks
			uint32_t rawCRC=readBits(32);
			if (verify && crc!=rawCRC) throw VerificationError();
			break;
		} else throw DecompressionError();
	}

	if (!_rawSize) _rawSize=outputStream.getOffset();
	if (!_packedSize) _packedSize=inputStream.getOffset();
	if (_rawSize!=outputStream.getOffset()) throw DecompressionError();
}

void BZIP2Decompressor::decompressImpl(Buffer &rawData,const Buffer &previousData,bool verify)
{
	return decompressImpl(rawData,verify);
}

}
Initial community commit 2024-09-24 12:54:57 +00:00			`/* Copyright (C) Teemu Suutari */`

			`#include <cstdint>`
			`#include <cstring>`

			`#include "BZIP2Decompressor.hpp"`
			`#include "HuffmanDecoder.hpp"`
			`#include "InputStream.hpp"`
			`#include "OutputStream.hpp"`
			`#include "common/MemoryBuffer.hpp"`
			`#include "common/CRC32.hpp"`
			`#include "common/Common.hpp"`


			`namespace ancient::internal`
			`{`

			`bool BZIP2Decompressor::detectHeader(uint32_t hdr) noexcept`
			`{`
			`return ((hdr&0xffff'ff00U)==FourCC("BZh\0") && (hdr&0xffU)>='1' && (hdr&0xffU)<='9');`
			`}`

			`bool BZIP2Decompressor::detectHeaderXPK(uint32_t hdr) noexcept`
			`{`
			`return (hdr==FourCC("BZP2"));`
			`}`

			`std::shared_ptr<Decompressor> BZIP2Decompressor::create(const Buffer &packedData,bool exactSizeKnown,bool verify)`
			`{`
			`return std::make_shared<BZIP2Decompressor>(packedData,exactSizeKnown,verify);`
			`}`

			`std::shared_ptr<XPKDecompressor> BZIP2Decompressor::create(uint32_t hdr,uint32_t recursionLevel,const Buffer &packedData,std::shared_ptr<XPKDecompressor::State> &state,bool verify)`
			`{`
			`return std::make_shared<BZIP2Decompressor>(hdr,recursionLevel,packedData,state,verify);`
			`}`

			`BZIP2Decompressor::BZIP2Decompressor(const Buffer &packedData,bool exactSizeKnown,bool verify) :`
			`_packedData(packedData),`
			`_packedSize(0)`
			`{`
			`uint32_t hdr=packedData.readBE32(0);`
			`if (!detectHeader(hdr)) throw Decompressor::InvalidFormatError();;`
			`_blockSize=((hdr&0xffU)-'0')*100'000;`
			`}`

			`BZIP2Decompressor::BZIP2Decompressor(uint32_t hdr,uint32_t recursionLevel,const Buffer &packedData,std::shared_ptr<XPKDecompressor::State> &state,bool verify) :`
			`XPKDecompressor(recursionLevel),`
			`_packedData(packedData),`
			`_packedSize(_packedData.size())`
			`{`
			`uint32_t blockHdr=packedData.readBE32(0);`
			`if (!detectHeader(blockHdr)) throw Decompressor::InvalidFormatError();;`
			`_blockSize=((blockHdr&0xffU)-'0')*100'000;`
			`}`

			`BZIP2Decompressor::~BZIP2Decompressor()`
			`{`
			`// nothing needed`
			`}`

			`const std::string &BZIP2Decompressor::getName() const noexcept`
			`{`
			`static std::string name="bz2: bzip2";`
			`return name;`
			`}`

			`const std::string &BZIP2Decompressor::getSubName() const noexcept`
			`{`
			`static std::string name="XPK-BZP2: bzip2";`
			`return name;`
			`}`

			`size_t BZIP2Decompressor::getPackedSize() const noexcept`
			`{`
			`// no way to know before decompressing`
			`return _packedSize;`
			`}`


			`size_t BZIP2Decompressor::getRawSize() const noexcept`
			`{`
			`// same thing, decompression needed first`
			`return _rawSize;`
			`}`

			`void BZIP2Decompressor::decompressImpl(Buffer &rawData,bool verify)`
			`{`
			`size_t packedSize=_packedSize?_packedSize:_packedData.size();`
			`size_t rawSize=_rawSize?_rawSize:rawData.size();`

			`ForwardInputStream inputStream(_packedData,4,packedSize);`
			`MSBBitReader<ForwardInputStream> bitReader(inputStream);`
			`auto readBits=[&](uint32_t count)->uint32_t`
			`{`
			`return bitReader.readBits8(count);`
			`};`
			`auto readBit=[&]()->uint32_t`
			`{`
			`return bitReader.readBits8(1);`
			`};`

			`ForwardOutputStream outputStream(rawData,0,rawSize);`

			`// stream verification`
			`//`
			`// there is so much wrong in bzip2 CRC-calculation :(`
			`// 1. The bit ordering is opposite what everyone else does with CRC32`
			`// 2. The block CRCs are calculated separately, no way of calculating a complete`
			`// CRC without knowing the block layout`
			`// 3. The CRC is the end of the stream and the stream is bit aligned. You`
			`// can't read CRC without decompressing the stream.`
			`uint32_t crc=0;`
			`auto calculateBlockCRC=[&](size_t blockPos,size_t blockSize)`
			`{`
			`crc=(crc<<1)\|(crc>>31);`
			`crc^=CRC32Rev(rawData,blockPos,blockSize,0);`
			`};`

			`HuffmanDecoder<uint8_t> selectorDecoder`
			`{`
			`// incomplete Huffman table. errors possible`
			`HuffmanCode<uint8_t>{1,0b000000,0},`
			`HuffmanCode<uint8_t>{2,0b000010,1},`
			`HuffmanCode<uint8_t>{3,0b000110,2},`
			`HuffmanCode<uint8_t>{4,0b001110,3},`
			`HuffmanCode<uint8_t>{5,0b011110,4},`
			`HuffmanCode<uint8_t>{6,0b111110,5}`
			`};`

			`HuffmanDecoder<int32_t> deltaDecoder`
			`{`
			`HuffmanCode<int32_t>{1,0b00,0},`
			`HuffmanCode<int32_t>{2,0b10,1},`
			`HuffmanCode<int32_t>{2,0b11,-1}`
			`};`

			`MemoryBuffer tmpBuffer(_blockSize);`
			`uint8_t *tmpBufferPtr=tmpBuffer.data();`

			`// This is the dark, ancient secret of bzip2.`
			`// versions before 0.9.5 had a data randomization for "too regular"`
			`// data problematic for the bwt-implementation at that time.`
			`// although it is never utilized anymore, the support is still there`
			`// And this is exactly the kind of ancient stuff we want to support :)`
			`//`
			`// On this specific part (since it is a table of magic numbers)`
			`// we have no way other than copying it from the original reference`

			`// Table has a separate copyright, lets have it as a separate file as well`
			`#include "BZIP2Table.hpp"`

			`for (;;)`
			`{`
			`uint32_t blockHdrHigh=readBits(32);`
			`uint32_t blockHdrLow=readBits(16);`
			`if (blockHdrHigh==0x31415926U && blockHdrLow==0x5359U)`
			`{`
			`// a block`

			`// this is rather spaghetti...`
			`readBits(32); // block crc, not interested`
			`bool randomized=readBit();`

			`// basically the random inserted is one LSB after n-th bytes`
			`// per defined in the table.`
			`uint32_t randomPos=1;`
			`uint32_t randomCounter=randomTable[0]-1;`
			`auto randomBit=[&]()->bool`
			`{`
			`// Beauty is in the eye of the beholder: this is smallest form to hide the ugliness`
			`return (!randomCounter--)?randomCounter=randomTable[randomPos++&511]:false;`
			`};`

			`uint32_t currentPtr=readBits(24);`

			`uint32_t currentBlockSize=0;`
			`{`
			`uint32_t numHuffmanItems=2;`
			`uint32_t huffmanValues[256];`

			`{`
			`// this is just a little bit inefficient but still we reading bit by bit since`
			`// reference does it. (bitsream format details do not spill over)`
			`std::vector<bool> usedMap(16);`
			`for (uint32_t i=0;i<16;i++) usedMap[i]=readBit();`

			`std::vector<bool> huffmanMap(256);`
			`for (uint32_t i=0;i<16;i++)`
			`{`
			`for (uint32_t j=0;j<16;j++)`
			`huffmanMap[i*16+j]=(usedMap[i])?readBit():false;`
			`}`

			`for (uint32_t i=0;i<256;i++) if (huffmanMap[i]) numHuffmanItems++;`
			`if (numHuffmanItems==2) throw DecompressionError();`

			`for (uint32_t currentValue=0,i=0;i<256;i++)`
			`if (huffmanMap[i]) huffmanValues[currentValue++]=i;`
			`}`

			`uint32_t huffmanGroups=readBits(3);`
			`if (huffmanGroups<2 \|\| huffmanGroups>6) throw DecompressionError();`

			`uint32_t selectorsUsed=readBits(15);`
			`if (!selectorsUsed) throw DecompressionError();`

			`MemoryBuffer huffmanSelectorList(selectorsUsed);`

			`auto unMTF=[](uint8_t value,uint8_t map[])->uint8_t`
			`{`
			`uint8_t ret=map[value];`
			`if (value)`
			`{`
			`uint8_t tmp=map[value];`
			`for (uint32_t i=value;i;i--)`
			`map[i]=map[i-1];`
			`map[0]=tmp;`
			`}`
			`return ret;`
			`};`

			`// create Huffman selectors`
			`uint8_t selectorMTFMap[6]={0,1,2,3,4,5};`

			`for (uint32_t i=0;i<selectorsUsed;i++)`
			`{`
			`uint8_t item=unMTF(selectorDecoder.decode(readBit),selectorMTFMap);`
			`if (item>=huffmanGroups) throw DecompressionError();`
			`huffmanSelectorList[i]=item;`
			`}`

			`typedef HuffmanDecoder<uint32_t> BZIP2Decoder;`
			`std::vector<BZIP2Decoder> dataDecoders(huffmanGroups);`

			`// Create all tables`
			`for (uint32_t i=0;i<huffmanGroups;i++)`
			`{`
			`uint8_t bitLengths[258];`

			`uint32_t currentBits=readBits(5);`
			`for (uint32_t j=0;j<numHuffmanItems;j++)`
			`{`
			`int32_t delta;`
			`do`
			`{`
			`delta=deltaDecoder.decode(readBit);`
			`currentBits+=delta;`
			`} while (delta);`
			`if (currentBits<1 \|\| currentBits>20) throw DecompressionError();`
			`bitLengths[j]=currentBits;`
			`}`

			`dataDecoders[i].createOrderlyHuffmanTable(bitLengths,numHuffmanItems);`
			`}`

			`// Huffman decode + unRLE + unMTF`
			`BZIP2Decoder *currentHuffmanDecoder=nullptr;`
			`uint32_t currentHuffmanIndex=0;`
			`uint8_t dataMTFMap[256];`
			`for (uint32_t i=0;i<numHuffmanItems-2;i++) dataMTFMap[i]=i;`

			`uint32_t currentRunLength=0;`
			`uint32_t currentRLEWeight=1;`

			`auto decodeRLE=[&]()`
			`{`
			`if (currentRunLength)`
			`{`
			`if (currentBlockSize+currentRunLength>_blockSize) throw DecompressionError();`
			`for (uint32_t i=0;i<currentRunLength;i++) tmpBufferPtr[currentBlockSize++]=huffmanValues[dataMTFMap[0]];`
			`}`
			`currentRunLength=0;`
			`currentRLEWeight=1;`
			`};`

			`for (uint32_t streamIndex=0;;streamIndex++)`
			`{`
			`if (!(streamIndex%50))`
			`{`
			`if (currentHuffmanIndex>=selectorsUsed) throw DecompressionError();`
			`currentHuffmanDecoder=&dataDecoders[huffmanSelectorList[currentHuffmanIndex++]];`
			`}`
			`uint32_t symbolMTF=currentHuffmanDecoder->decode(readBit);`
			`// stop marker is referenced only once, and it is the last one`
			`// This means we do no have to un-MTF it for detection`
			`if (symbolMTF==numHuffmanItems-1) break;`
			`if (currentBlockSize>=_blockSize) throw DecompressionError();`
			`if (symbolMTF<2)`
			`{`
			`currentRunLength+=currentRLEWeight<<symbolMTF;`
			`currentRLEWeight<<=1;`
			`} else {`
			`decodeRLE();`
			`uint8_t symbol=unMTF(symbolMTF-1,dataMTFMap);`
			`if (currentBlockSize>=_blockSize) throw DecompressionError();`
			`tmpBufferPtr[currentBlockSize++]=huffmanValues[symbol];`
			`}`
			`}`
			`decodeRLE();`
			`if (currentPtr>=currentBlockSize) throw DecompressionError();`
			`}`

			`// inverse BWT + final RLE decoding.`
			`// there are a few dark corners here as well`
			`// 1. Can the stream end at 4 literals without count? I assume it is a valid optimization (and that this does not spillover to next block)`
			`// 2. Can the RLE-step include counts 252 to 255 even if reference does not do them? I assume yes here as here as well`
			`// 3. Can the stream be empty? We do not take issue here about that (that should be culled out earlier already)`
			`uint32_t sums[256];`
			`for (uint32_t i=0;i<256;i++) sums[i]=0;`

			`for (uint32_t i=0;i<currentBlockSize;i++)`
			`{`
			`sums[tmpBufferPtr[i]]++;`
			`}`

			`uint32_t rank[256];`
			`for (uint32_t tot=0,i=0;i<256;i++)`
			`{`
			`rank[i]=tot;`
			`tot+=sums[i];`
			`}`

			`// not at all happy about the memory consumption, but it simplifies the implementation a lot`
			`// and by sacrificing 4*size (size as in actual block size) we do not have to have slow search nor another temporary buffer`
			`// since by calculating forward table we can do forward decoding of the data on the same pass as iBWT`
			`//`
			`// also, because I'm lazy`
			`MemoryBuffer forwardIndex(currentBlockSize*sizeof(uint32_t));`
			`auto forwardIndexPtr=forwardIndex.cast<uint32_t>();`
			`for (uint32_t i=0;i<currentBlockSize;i++)`
			`forwardIndexPtr[rank[tmpBufferPtr[i]]++]=i;`

			`// output + final RLE decoding`
			`uint8_t currentCh=0;`
			`uint32_t currentChCount=0;`
			`auto outputByte=[&](uint8_t ch)`
			`{`
			`if (randomized && randomBit()) ch^=1;`
			`if (!currentChCount)`
			`{`
			`currentCh=ch;`
			`currentChCount=1;`
			`} else {`
			`if (ch==currentCh && currentChCount!=4)`
			`{`
			`currentChCount++;`
			`} else {`
			`auto outputBlock=[&](uint32_t count)`
			`{`
			`for (uint32_t i=0;i<count;i++) outputStream.writeByte(currentCh);`
			`};`

			`if (currentChCount==4)`
			`{`
			`outputBlock(uint32_t(ch)+4);`
			`currentChCount=0;`
			`} else {`
			`outputBlock(currentChCount);`
			`currentCh=ch;`
			`currentChCount=1;`
			`}`
			`}`
			`}`
			`};`

			`size_t destOffsetStart=outputStream.getOffset();`

			`// and now the final iBWT + unRLE is easy...`
			`for (uint32_t i=0;i<currentBlockSize;i++)`
			`{`
			`currentPtr=forwardIndexPtr[currentPtr];`
			`outputByte(tmpBufferPtr[currentPtr]);`
			`}`
			`// cleanup the state, a bit hackish way to do it`
			`if (currentChCount) outputByte(currentChCount==4?0:~currentCh);`

			`if (verify)`
			`calculateBlockCRC(destOffsetStart,outputStream.getOffset()-destOffsetStart);`

			`} else if (blockHdrHigh==0x17724538U && blockHdrLow==0x5090U) {`
			`// end of blocks`
			`uint32_t rawCRC=readBits(32);`
			`if (verify && crc!=rawCRC) throw VerificationError();`
			`break;`
			`} else throw DecompressionError();`
			`}`

			`if (!_rawSize) _rawSize=outputStream.getOffset();`
			`if (!_packedSize) _packedSize=inputStream.getOffset();`
			`if (_rawSize!=outputStream.getOffset()) throw DecompressionError();`
			`}`

			`void BZIP2Decompressor::decompressImpl(Buffer &rawData,const Buffer &previousData,bool verify)`
			`{`
			`return decompressImpl(rawData,verify);`
			`}`

			`}`