Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Level 2 is now compatible with dictionary attach mode #1456

Merged
merged 2 commits into from
Jul 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 125 additions & 117 deletions lib/lz4hc.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,45 324,54 @@ typedef struct {
int back; /* negative value */
} LZ4HC_match_t;

LZ4HC_match_t LZ4HC_searchExtDict(const BYTE* ip, U32 ipIndex,
const BYTE* const iLowLimit, const BYTE* const iHighLimit,
const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex,
int currentBestML, int nbAttempts)
static LZ4HC_match_t LZ4MID_searchExtDict(const BYTE* ip, U32 ipIndex,
const BYTE* const iHighLimit,
const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex)
{
size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) dictCtx->dictLimit;
U32 lDictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)];
U32 matchIndex = lDictMatchIndex gDictEndIndex - (U32)lDictEndIndex;
int offset = 0, sBack = 0;
assert(lDictEndIndex <= 1 GB);
if (lDictMatchIndex>0)
DEBUGLOG(7, "lDictEndIndex = %zu, lDictMatchIndex = %u", lDictEndIndex, lDictMatchIndex);
while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) {
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit lDictMatchIndex;

if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
int mlt;
int back = 0;
const BYTE* vLimit = ip (lDictEndIndex - lDictMatchIndex);
if (vLimit > iHighLimit) vLimit = iHighLimit;
mlt = (int)LZ4_count(ip MINMATCH, matchPtr MINMATCH, vLimit) MINMATCH;
back = (ip > iLowLimit) ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0;
mlt -= back;
if (mlt > currentBestML) {
currentBestML = mlt;
offset = (int)(ipIndex - matchIndex);
sBack = back;
DEBUGLOG(7, "found match of length %i within extDictCtx", currentBestML);
} }
const U32* const hash4Table = dictCtx->hashTable;
const U32* const hash8Table = hash4Table LZ4MID_HASHTABLESIZE;
DEBUGLOG(7, "LZ4MID_searchExtDict (ipIdx=%u)", ipIndex);

/* search long match first */
{ U32 l8DictMatchIndex = hash8Table[LZ4MID_hash8Ptr(ip)];
U32 m8Index = l8DictMatchIndex gDictEndIndex - (U32)lDictEndIndex;
assert(lDictEndIndex <= 1 GB);
if (ipIndex - m8Index <= LZ4_DISTANCE_MAX) {
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit l8DictMatchIndex;
const size_t safeLen = MIN(lDictEndIndex - l8DictMatchIndex, (size_t)(iHighLimit - ip));
int mlt = (int)LZ4_count(ip, matchPtr, ip safeLen);
if (mlt >= MINMATCH) {
LZ4HC_match_t md;
DEBUGLOG(7, "Found long ExtDict match of len=%u", mlt);
md.len = mlt;
md.off = (int)(ipIndex - m8Index);
md.back = 0;
return md;
}
}
}

{ U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, lDictMatchIndex);
lDictMatchIndex -= nextOffset;
matchIndex -= nextOffset;
} }
/* search for short match second */
{ U32 l4DictMatchIndex = hash4Table[LZ4MID_hash4Ptr(ip)];
U32 m4Index = l4DictMatchIndex gDictEndIndex - (U32)lDictEndIndex;
if (ipIndex - m4Index <= LZ4_DISTANCE_MAX) {
const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit l4DictMatchIndex;
const size_t safeLen = MIN(lDictEndIndex - l4DictMatchIndex, (size_t)(iHighLimit - ip));
int mlt = (int)LZ4_count(ip, matchPtr, ip safeLen);
if (mlt >= MINMATCH) {
LZ4HC_match_t md;
DEBUGLOG(7, "Found short ExtDict match of len=%u", mlt);
md.len = mlt;
md.off = (int)(ipIndex - m4Index);
md.back = 0;
return md;
}
}
}

{ LZ4HC_match_t md;
md.len = currentBestML;
md.off = offset;
md.back = sBack;
/* nothing found */
{ LZ4HC_match_t const md = {0, 0, 0 };
return md;
}
}
Expand Down Expand Up @@ -440,7 449,8 @@ static int LZ4MID_compress (
unsigned matchDistance;

/* input sanitization */
DEBUGLOG(5, "LZ4HC_compress_2hashes (%i bytes)", *srcSizePtr);
DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr);
if (dict == usingDictCtxHc) DEBUGLOG(5, "usingDictCtxHc");
assert(*srcSizePtr >= 0);
if (*srcSizePtr) assert(src != NULL);
if (maxOutputSize) assert(dst != NULL);
Expand Down Expand Up @@ -540,10 550,9 @@ static int LZ4MID_compress (
if ( (dict == usingDictCtxHc)
&& (ipIndex - gDictEndIndex < LZ4_DISTANCE_MAX - 8) ) {
/* search a match in dictionary */
LZ4HC_match_t dMatch = LZ4HC_searchExtDict(ip, ipIndex,
anchor, matchlimit,
ctx->dictCtx, gDictEndIndex,
0, 2);
LZ4HC_match_t dMatch = LZ4MID_searchExtDict(ip, ipIndex,
matchlimit,
ctx->dictCtx, gDictEndIndex);
if (dMatch.len >= MINMATCH) {
DEBUGLOG(7, "found Dictionary match (offset=%i)", dMatch.off);
ip = dMatch.back;
Expand Down Expand Up @@ -1545,8 1554,8 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr,
LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
cParams_t cp;
DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d, clevel=%d)", LZ4_streamHCPtr, dictionary, dictSize, ctxPtr->compressionLevel);
assert(LZ4_streamHCPtr != NULL);
assert(dictSize >= 0);
assert(LZ4_streamHCPtr != NULL);
if (dictSize > 64 KB) {
dictionary = (size_t)dictSize - 64 KB;
dictSize = 64 KB;
Expand Down Expand Up @@ -1576,8 1585,10 @@ void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC
static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock)
{
DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock);
if (ctxPtr->end >= ctxPtr->prefixStart 4)
LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
if ( (ctxPtr->end >= ctxPtr->prefixStart 4)
&& (LZ4HC_getCLevelParams(ctxPtr->compressionLevel).strat != lz4mid) ) {
LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */
}

/* Only one memory segment for extDict, so any previous extDict is lost at this stage */
ctxPtr->lowLimit = ctxPtr->dictLimit;
Expand All @@ -1602,7 1613,8 @@ LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
LZ4_streamHCPtr, src, *srcSizePtr, limit);
assert(ctxPtr != NULL);
/* auto-init if forgotten */
if (ctxPtr->prefixStart == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src);
if (ctxPtr->prefixStart == NULL)
LZ4HC_init_internal (ctxPtr, (const BYTE*) src);

/* Check overflow */
if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) ctxPtr->dictLimit > 2 GB) {
Expand Down Expand Up @@ -1647,7 1659,6 @@ int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const ch
}



/* LZ4_saveDictHC :
* save history content
* into a user-provided buffer
Expand Down Expand Up @@ -1678,75 1689,6 @@ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictS
}


/***************************************************
* Deprecated Functions
***************************************************/

/* These functions currently generate deprecation warnings */

/* Wrappers for deprecated compression functions */
int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }


/* Deprecated streaming functions */
int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); }

/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
* @return : 0 on success, !=0 if error */
int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
{
LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
if (hc4 == NULL) return 1; /* init failed */
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
return 0;
}

#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
void* LZ4_createHC (const char* inputBuffer)
{
LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
if (hc4 == NULL) return NULL; /* not enough memory */
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
return hc4;
}

int LZ4_freeHC (void* LZ4HC_Data)
{
if (!LZ4HC_Data) return 0; /* support free on NULL */
FREEMEM(LZ4HC_Data);
return 0;
}
#endif

int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
{
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
}

int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
{
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
}

char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
{
LZ4HC_CCtx_internal* const s = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
const BYTE* const bufferStart = s->prefixStart - s->dictLimit s->lowLimit;
LZ4_resetStreamHC_fast((LZ4_streamHC_t*)LZ4HC_Data, s->compressionLevel);
/* ugly conversion trick, required to evade (const char*) -> (char*) cast-qual warning :( */
return (char*)(uptrval)bufferStart;
}


/* ================================================
* LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX])
* ===============================================*/
Expand All @@ -1767,7 1709,6 @@ LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen)
return price;
}


/* requires mlen >= MINMATCH */
LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
{
Expand All @@ -1783,8 1724,6 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
return price;
}



LZ4_FORCE_INLINE LZ4HC_match_t
LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
const BYTE* ip, const BYTE* const iHighLimit,
Expand Down Expand Up @@ -2107,3 2046,72 @@ if (limit == fillOutput) {
#endif
return retval;
}


/***************************************************
* Deprecated Functions
***************************************************/

/* These functions currently generate deprecation warnings */

/* Wrappers for deprecated compression functions */
int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); }
int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); }
int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); }
int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); }
int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); }
int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); }
int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); }
int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); }


/* Deprecated streaming functions */
int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); }

/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t)
* @return : 0 on success, !=0 if error */
int LZ4_resetStreamStateHC(void* state, char* inputBuffer)
{
LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4));
if (hc4 == NULL) return 1; /* init failed */
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
return 0;
}

#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
void* LZ4_createHC (const char* inputBuffer)
{
LZ4_streamHC_t* const hc4 = LZ4_createStreamHC();
if (hc4 == NULL) return NULL; /* not enough memory */
LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer);
return hc4;
}

int LZ4_freeHC (void* LZ4HC_Data)
{
if (!LZ4HC_Data) return 0; /* support free on NULL */
FREEMEM(LZ4HC_Data);
return 0;
}
#endif

int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel)
{
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited);
}

int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel)
{
return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput);
}

char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
{
LZ4HC_CCtx_internal* const s = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse;
const BYTE* const bufferStart = s->prefixStart - s->dictLimit s->lowLimit;
LZ4_resetStreamHC_fast((LZ4_streamHC_t*)LZ4HC_Data, s->compressionLevel);
/* ugly conversion trick, required to evade (const char*) -> (char*) cast-qual warning :( */
return (char*)(uptrval)bufferStart;
}
Loading