Line data Source code
1 : /*
2 : LZ4 - Fast LZ compression algorithm
3 : Copyright (C) 2011-2015, Yann Collet.
4 :
5 : BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 :
7 : Redistribution and use in source and binary forms, with or without
8 : modification, are permitted provided that the following conditions are
9 : met:
10 :
11 : * Redistributions of source code must retain the above copyright
12 : notice, this list of conditions and the following disclaimer.
13 : * Redistributions in binary form must reproduce the above
14 : copyright notice, this list of conditions and the following disclaimer
15 : in the documentation and/or other materials provided with the
16 : distribution.
17 :
18 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 : "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 : LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 : A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 : OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 : SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 : LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 : DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 : THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 : (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 : OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 :
30 : You can contact the author at :
31 : - LZ4 source repository : https://github.com/Cyan4973/lz4
32 : - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
33 : */
34 :
35 :
36 : /**************************************
37 : * Tuning parameters
38 : **************************************/
39 : /*
40 : * HEAPMODE :
41 : * Select how default compression functions will allocate memory for their hash table,
42 : * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
43 : */
44 : #define HEAPMODE 0
45 :
46 : /*
47 : * ACCELERATION_DEFAULT :
48 : * Select "acceleration" for MLZ4_compress_fast() when parameter value <= 0
49 : */
50 : #define ACCELERATION_DEFAULT 1
51 :
52 :
53 : /**************************************
54 : * CPU Feature Detection
55 : **************************************/
56 : /*
57 : * MLZ4_FORCE_SW_BITCOUNT
58 : * Define this parameter if your target system or compiler does not support hardware bit count
59 : */
60 : #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */
61 : # define MLZ4_FORCE_SW_BITCOUNT
62 : #endif
63 :
64 :
65 : /**************************************
66 : * Includes
67 : **************************************/
68 : #include "mlz4.h"
69 :
70 :
71 : /**************************************
72 : * Compiler Options
73 : **************************************/
74 : #ifdef _MSC_VER /* Visual Studio */
75 : # define FORCE_INLINE static __forceinline
76 : # include <intrin.h>
77 : # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
78 : # pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */
79 : #else
80 : # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
81 : # if defined(__GNUC__) || defined(__clang__)
82 : # define FORCE_INLINE static inline __attribute__((always_inline))
83 : # else
84 : # define FORCE_INLINE static inline
85 : # endif
86 : # else
87 : # define FORCE_INLINE static
88 : # endif /* __STDC_VERSION__ */
89 : #endif /* _MSC_VER */
90 :
91 : /* MLZ4_GCC_VERSION is defined into lz4.h */
92 : #if (MLZ4_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
93 : # define expect(expr,value) (__builtin_expect ((expr),(value)) )
94 : #else
95 : # define expect(expr,value) (expr)
96 : #endif
97 :
98 : #define likely(expr) expect((expr) != 0, 1)
99 : #define unlikely(expr) expect((expr) != 0, 0)
100 :
101 :
102 : /**************************************
103 : * Memory routines
104 : **************************************/
105 : #include <stdlib.h> /* malloc, calloc, free */
106 : #define ALLOCATOR(n,s) calloc(n,s)
107 : #define FREEMEM free
108 : #include <string.h> /* memset, memcpy */
109 : #define MEM_INIT memset
110 :
111 :
112 : /**************************************
113 : * Basic Types
114 : **************************************/
115 : #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
116 : # include <stdint.h>
117 : typedef uint8_t BYTE;
118 : typedef uint16_t U16;
119 : typedef uint32_t U32;
120 : typedef int32_t S32;
121 : typedef uint64_t U64;
122 : #else
123 : typedef unsigned char BYTE;
124 : typedef unsigned short U16;
125 : typedef unsigned int U32;
126 : typedef signed int S32;
127 : typedef unsigned long long U64;
128 : #endif
129 :
130 :
131 : /**************************************
132 : * Reading and writing into memory
133 : **************************************/
134 : #define STEPSIZE sizeof(size_t)
135 :
136 0 : static unsigned MLZ4_64bits(void) { return sizeof(void*)==8; }
137 :
138 0 : static unsigned MLZ4_isLittleEndian(void)
139 : {
140 0 : const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
141 0 : return one.c[0];
142 : }
143 :
144 :
145 0 : static U16 MLZ4_read16(const void* memPtr)
146 : {
147 : U16 val16;
148 0 : memcpy(&val16, memPtr, 2);
149 0 : return val16;
150 : }
151 :
152 0 : static U16 MLZ4_readLE16(const void* memPtr)
153 : {
154 0 : if (MLZ4_isLittleEndian())
155 : {
156 0 : return MLZ4_read16(memPtr);
157 : }
158 : else
159 : {
160 0 : const BYTE* p = (const BYTE*)memPtr;
161 0 : return (U16)((U16)p[0] + (p[1]<<8));
162 : }
163 : }
164 :
165 0 : static void MLZ4_writeLE16(void* memPtr, U16 value)
166 : {
167 0 : if (MLZ4_isLittleEndian())
168 : {
169 0 : memcpy(memPtr, &value, 2);
170 : }
171 : else
172 : {
173 0 : BYTE* p = (BYTE*)memPtr;
174 0 : p[0] = (BYTE) value;
175 0 : p[1] = (BYTE)(value>>8);
176 : }
177 0 : }
178 :
179 0 : static U32 MLZ4_read32(const void* memPtr)
180 : {
181 : U32 val32;
182 0 : memcpy(&val32, memPtr, 4);
183 0 : return val32;
184 : }
185 :
186 0 : static U64 MLZ4_read64(const void* memPtr)
187 : {
188 : U64 val64;
189 0 : memcpy(&val64, memPtr, 8);
190 0 : return val64;
191 : }
192 :
193 0 : static size_t MLZ4_read_ARCH(const void* p)
194 : {
195 0 : if (MLZ4_64bits())
196 0 : return (size_t)MLZ4_read64(p);
197 : else
198 0 : return (size_t)MLZ4_read32(p);
199 : }
200 :
201 :
202 0 : static void MLZ4_copy4(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 4); }
203 :
204 0 : static void MLZ4_copy8(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 8); }
205 :
206 : /* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
207 0 : static void MLZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
208 : {
209 0 : BYTE* d = (BYTE*)dstPtr;
210 0 : const BYTE* s = (const BYTE*)srcPtr;
211 0 : BYTE* e = (BYTE*)dstEnd;
212 0 : do { MLZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
213 0 : }
214 :
215 :
216 : /**************************************
217 : * Common Constants
218 : **************************************/
219 : #define MINMATCH 4
220 :
221 : #define COPYLENGTH 8
222 : #define LASTLITERALS 5
223 : #define MFLIMIT (COPYLENGTH+MINMATCH)
224 : static const int MLZ4_minLength = (MFLIMIT+1);
225 :
226 : #define KB *(1 <<10)
227 : #define MB *(1 <<20)
228 : #define GB *(1U<<30)
229 :
230 : #define MAXD_LOG 16
231 : #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
232 :
233 : #define ML_BITS 4
234 : #define ML_MASK ((1U<<ML_BITS)-1)
235 : #define RUN_BITS (8-ML_BITS)
236 : #define RUN_MASK ((1U<<RUN_BITS)-1)
237 :
238 :
239 : /**************************************
240 : * Common Utils
241 : **************************************/
242 : #define MLZ4_STATIC_ASSERT(c) { enum { MLZ4_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
243 :
244 :
245 : /**************************************
246 : * Common functions
247 : **************************************/
248 0 : static unsigned MLZ4_NbCommonBytes (size_t val)
249 : {
250 0 : if (MLZ4_isLittleEndian())
251 : {
252 0 : if (MLZ4_64bits())
253 : {
254 : # if defined(_MSC_VER) && defined(_WIN64) && !defined(MLZ4_FORCE_SW_BITCOUNT)
255 : unsigned long r = 0;
256 : _BitScanForward64( &r, (U64)val );
257 : return (int)(r>>3);
258 : # elif (defined(__clang__) || (MLZ4_GCC_VERSION >= 304)) && !defined(MLZ4_FORCE_SW_BITCOUNT)
259 0 : return (__builtin_ctzll((U64)val) >> 3);
260 : # else
261 : static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
262 : return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
263 : # endif
264 : }
265 : else /* 32 bits */
266 : {
267 : # if defined(_MSC_VER) && !defined(MLZ4_FORCE_SW_BITCOUNT)
268 : unsigned long r;
269 : _BitScanForward( &r, (U32)val );
270 : return (int)(r>>3);
271 : # elif (defined(__clang__) || (MLZ4_GCC_VERSION >= 304)) && !defined(MLZ4_FORCE_SW_BITCOUNT)
272 0 : return (__builtin_ctz((U32)val) >> 3);
273 : # else
274 : static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
275 : return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
276 : # endif
277 : }
278 : }
279 : else /* Big Endian CPU */
280 : {
281 0 : if (MLZ4_64bits())
282 : {
283 : # if defined(_MSC_VER) && defined(_WIN64) && !defined(MLZ4_FORCE_SW_BITCOUNT)
284 : unsigned long r = 0;
285 : _BitScanReverse64( &r, val );
286 : return (unsigned)(r>>3);
287 : # elif (defined(__clang__) || (MLZ4_GCC_VERSION >= 304)) && !defined(MLZ4_FORCE_SW_BITCOUNT)
288 0 : return (__builtin_clzll((U64)val) >> 3);
289 : # else
290 : unsigned r;
291 : if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
292 : if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
293 : r += (!val);
294 : return r;
295 : # endif
296 : }
297 : else /* 32 bits */
298 : {
299 : # if defined(_MSC_VER) && !defined(MLZ4_FORCE_SW_BITCOUNT)
300 : unsigned long r = 0;
301 : _BitScanReverse( &r, (unsigned long)val );
302 : return (unsigned)(r>>3);
303 : # elif (defined(__clang__) || (MLZ4_GCC_VERSION >= 304)) && !defined(MLZ4_FORCE_SW_BITCOUNT)
304 0 : return (__builtin_clz((U32)val) >> 3);
305 : # else
306 : unsigned r;
307 : if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
308 : r += (!val);
309 : return r;
310 : # endif
311 : }
312 : }
313 : }
314 :
315 0 : static unsigned MLZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
316 : {
317 0 : const BYTE* const pStart = pIn;
318 :
319 0 : while (likely(pIn<pInLimit-(STEPSIZE-1)))
320 : {
321 0 : size_t diff = MLZ4_read_ARCH(pMatch) ^ MLZ4_read_ARCH(pIn);
322 0 : if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
323 0 : pIn += MLZ4_NbCommonBytes(diff);
324 0 : return (unsigned)(pIn - pStart);
325 : }
326 :
327 0 : if (MLZ4_64bits()) if ((pIn<(pInLimit-3)) && (MLZ4_read32(pMatch) == MLZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
328 0 : if ((pIn<(pInLimit-1)) && (MLZ4_read16(pMatch) == MLZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
329 0 : if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
330 0 : return (unsigned)(pIn - pStart);
331 : }
332 :
333 :
334 : #ifndef MLZ4_COMMONDEFS_ONLY
335 : /**************************************
336 : * Local Constants
337 : **************************************/
338 : #define MLZ4_HASHLOG (MLZ4_MEMORY_USAGE-2)
339 : #define HASHTABLESIZE (1 << MLZ4_MEMORY_USAGE)
340 : #define HASH_SIZE_U32 (1 << MLZ4_HASHLOG) /* required as macro for static allocation */
341 :
342 : static const int MLZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
343 : static const U32 MLZ4_skipTrigger = 6; /* Increase this value ==> compression run slower on incompressible data */
344 :
345 :
346 : /**************************************
347 : * Local Structures and types
348 : **************************************/
349 : typedef struct {
350 : U32 hashTable[HASH_SIZE_U32];
351 : U32 currentOffset;
352 : U32 initCheck;
353 : const BYTE* dictionary;
354 : BYTE* bufferStart; /* obsolete, used for slideInputBuffer */
355 : U32 dictSize;
356 : } MLZ4_stream_t_internal;
357 :
358 : typedef enum { notLimited = 0, limitedOutput = 1 } limitedOutput_directive;
359 : typedef enum { byPtr, byU32, byU16 } tableType_t;
360 :
361 : typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
362 : typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
363 :
364 : typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
365 : typedef enum { full = 0, partial = 1 } earlyEnd_directive;
366 :
367 :
368 : /**************************************
369 : * Local Utils
370 : **************************************/
371 0 : int MLZ4_versionNumber (void) { return MLZ4_VERSION_NUMBER; }
372 0 : int MLZ4_compressBound(int isize) { return MLZ4_COMPRESSBOUND(isize); }
373 0 : int MLZ4_sizeofState() { return MLZ4_STREAMSIZE; }
374 :
375 :
376 :
377 : /********************************
378 : * Compression functions
379 : ********************************/
380 :
381 0 : static U32 MLZ4_hashSequence(U32 sequence, tableType_t const tableType)
382 : {
383 0 : if (tableType == byU16)
384 0 : return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(MLZ4_HASHLOG+1)));
385 : else
386 0 : return (((sequence) * 2654435761U) >> ((MINMATCH*8)-MLZ4_HASHLOG));
387 : }
388 :
389 : static const U64 prime5bytes = 889523592379ULL;
390 0 : static U32 MLZ4_hashSequence64(size_t sequence, tableType_t const tableType)
391 : {
392 0 : const U32 hashLog = (tableType == byU16) ? MLZ4_HASHLOG+1 : MLZ4_HASHLOG;
393 0 : const U32 hashMask = (1<<hashLog) - 1;
394 0 : return ((sequence * prime5bytes) >> (40 - hashLog)) & hashMask;
395 : }
396 :
397 0 : static U32 MLZ4_hashSequenceT(size_t sequence, tableType_t const tableType)
398 : {
399 0 : if (MLZ4_64bits())
400 0 : return MLZ4_hashSequence64(sequence, tableType);
401 0 : return MLZ4_hashSequence((U32)sequence, tableType);
402 : }
403 :
404 0 : static U32 MLZ4_hashPosition(const void* p, tableType_t tableType) { return MLZ4_hashSequenceT(MLZ4_read_ARCH(p), tableType); }
405 :
406 0 : static void MLZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase)
407 : {
408 0 : switch (tableType)
409 : {
410 0 : case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; }
411 0 : case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; }
412 0 : case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; }
413 : }
414 : }
415 :
416 0 : static void MLZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
417 : {
418 0 : U32 h = MLZ4_hashPosition(p, tableType);
419 0 : MLZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
420 0 : }
421 :
422 0 : static const BYTE* MLZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
423 : {
424 0 : if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
425 0 : if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
426 0 : { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */
427 : }
428 :
429 0 : static const BYTE* MLZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
430 : {
431 0 : U32 h = MLZ4_hashPosition(p, tableType);
432 0 : return MLZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
433 : }
434 :
435 0 : FORCE_INLINE int MLZ4_compress_generic(
436 : void* const ctx,
437 : const char* const source,
438 : char* const dest,
439 : const int inputSize,
440 : const int maxOutputSize,
441 : const limitedOutput_directive outputLimited,
442 : const tableType_t tableType,
443 : const dict_directive dict,
444 : const dictIssue_directive dictIssue,
445 : const U32 acceleration)
446 : {
447 0 : MLZ4_stream_t_internal* const dictPtr = (MLZ4_stream_t_internal*)ctx;
448 :
449 0 : const BYTE* ip = (const BYTE*) source;
450 : const BYTE* base;
451 : const BYTE* lowLimit;
452 0 : const BYTE* const lowRefLimit = ip - dictPtr->dictSize;
453 0 : const BYTE* const dictionary = dictPtr->dictionary;
454 0 : const BYTE* const dictEnd = dictionary + dictPtr->dictSize;
455 0 : const size_t dictDelta = dictEnd - (const BYTE*)source;
456 0 : const BYTE* anchor = (const BYTE*) source;
457 0 : const BYTE* const iend = ip + inputSize;
458 0 : const BYTE* const mflimit = iend - MFLIMIT;
459 0 : const BYTE* const matchlimit = iend - LASTLITERALS;
460 :
461 0 : BYTE* op = (BYTE*) dest;
462 0 : BYTE* const olimit = op + maxOutputSize;
463 :
464 : U32 forwardH;
465 0 : size_t refDelta=0;
466 :
467 : /* Init conditions */
468 0 : if ((U32)inputSize > (U32)MLZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
469 0 : switch(dict)
470 : {
471 0 : case noDict:
472 : default:
473 0 : base = (const BYTE*)source;
474 0 : lowLimit = (const BYTE*)source;
475 0 : break;
476 0 : case withPrefix64k:
477 0 : base = (const BYTE*)source - dictPtr->currentOffset;
478 0 : lowLimit = (const BYTE*)source - dictPtr->dictSize;
479 0 : break;
480 0 : case usingExtDict:
481 0 : base = (const BYTE*)source - dictPtr->currentOffset;
482 0 : lowLimit = (const BYTE*)source;
483 0 : break;
484 : }
485 0 : if ((tableType == byU16) && (inputSize>=MLZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */
486 0 : if (inputSize<MLZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
487 :
488 : /* First Byte */
489 0 : MLZ4_putPosition(ip, ctx, tableType, base);
490 0 : ip++; forwardH = MLZ4_hashPosition(ip, tableType);
491 :
492 : /* Main Loop */
493 : for ( ; ; )
494 : {
495 : const BYTE* match;
496 : BYTE* token;
497 : {
498 0 : const BYTE* forwardIp = ip;
499 0 : unsigned step = 1;
500 0 : unsigned searchMatchNb = acceleration << MLZ4_skipTrigger;
501 :
502 : /* Find a match */
503 : do {
504 0 : U32 h = forwardH;
505 0 : ip = forwardIp;
506 0 : forwardIp += step;
507 0 : step = (searchMatchNb++ >> MLZ4_skipTrigger);
508 :
509 0 : if (unlikely(forwardIp > mflimit)) goto _last_literals;
510 :
511 0 : match = MLZ4_getPositionOnHash(h, ctx, tableType, base);
512 0 : if (dict==usingExtDict)
513 : {
514 0 : if (match<(const BYTE*)source)
515 : {
516 0 : refDelta = dictDelta;
517 0 : lowLimit = dictionary;
518 : }
519 : else
520 : {
521 0 : refDelta = 0;
522 0 : lowLimit = (const BYTE*)source;
523 : }
524 : }
525 0 : forwardH = MLZ4_hashPosition(forwardIp, tableType);
526 0 : MLZ4_putPositionOnHash(ip, h, ctx, tableType, base);
527 :
528 0 : } while ( ((dictIssue==dictSmall) ? (match < lowRefLimit) : 0)
529 0 : || ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
530 0 : || (MLZ4_read32(match+refDelta) != MLZ4_read32(ip)) );
531 : }
532 :
533 : /* Catch up */
534 0 : while ((ip>anchor) && (match+refDelta > lowLimit) && (unlikely(ip[-1]==match[refDelta-1]))) { ip--; match--; }
535 :
536 : {
537 : /* Encode Literal length */
538 0 : unsigned litLength = (unsigned)(ip - anchor);
539 0 : token = op++;
540 0 : if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)))
541 0 : return 0; /* Check output limit */
542 0 : if (litLength>=RUN_MASK)
543 : {
544 0 : int len = (int)litLength-RUN_MASK;
545 0 : *token=(RUN_MASK<<ML_BITS);
546 0 : for(; len >= 255 ; len-=255) *op++ = 255;
547 0 : *op++ = (BYTE)len;
548 : }
549 0 : else *token = (BYTE)(litLength<<ML_BITS);
550 :
551 : /* Copy Literals */
552 0 : MLZ4_wildCopy(op, anchor, op+litLength);
553 0 : op+=litLength;
554 : }
555 :
556 0 : _next_match:
557 : /* Encode Offset */
558 0 : MLZ4_writeLE16(op, (U16)(ip-match)); op+=2;
559 :
560 : /* Encode MatchLength */
561 : {
562 : unsigned matchLength;
563 :
564 0 : if ((dict==usingExtDict) && (lowLimit==dictionary))
565 : {
566 : const BYTE* limit;
567 0 : match += refDelta;
568 0 : limit = ip + (dictEnd-match);
569 0 : if (limit > matchlimit) limit = matchlimit;
570 0 : matchLength = MLZ4_count(ip+MINMATCH, match+MINMATCH, limit);
571 0 : ip += MINMATCH + matchLength;
572 0 : if (ip==limit)
573 : {
574 0 : unsigned more = MLZ4_count(ip, (const BYTE*)source, matchlimit);
575 0 : matchLength += more;
576 0 : ip += more;
577 : }
578 0 : }
579 : else
580 : {
581 0 : matchLength = MLZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
582 0 : ip += MINMATCH + matchLength;
583 : }
584 :
585 0 : if ((outputLimited) && (unlikely(op + (1 + LASTLITERALS) + (matchLength>>8) > olimit)))
586 0 : return 0; /* Check output limit */
587 0 : if (matchLength>=ML_MASK)
588 : {
589 0 : *token += ML_MASK;
590 0 : matchLength -= ML_MASK;
591 0 : for (; matchLength >= 510 ; matchLength-=510) { *op++ = 255; *op++ = 255; }
592 0 : if (matchLength >= 255) { matchLength-=255; *op++ = 255; }
593 0 : *op++ = (BYTE)matchLength;
594 : }
595 0 : else *token += (BYTE)(matchLength);
596 : }
597 :
598 0 : anchor = ip;
599 :
600 : /* Test end of chunk */
601 0 : if (ip > mflimit) break;
602 :
603 : /* Fill table */
604 0 : MLZ4_putPosition(ip-2, ctx, tableType, base);
605 :
606 : /* Test next position */
607 0 : match = MLZ4_getPosition(ip, ctx, tableType, base);
608 0 : if (dict==usingExtDict)
609 : {
610 0 : if (match<(const BYTE*)source)
611 : {
612 0 : refDelta = dictDelta;
613 0 : lowLimit = dictionary;
614 : }
615 : else
616 : {
617 0 : refDelta = 0;
618 0 : lowLimit = (const BYTE*)source;
619 : }
620 : }
621 0 : MLZ4_putPosition(ip, ctx, tableType, base);
622 0 : if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1)
623 0 : && (match+MAX_DISTANCE>=ip)
624 0 : && (MLZ4_read32(match+refDelta)==MLZ4_read32(ip)) )
625 0 : { token=op++; *token=0; goto _next_match; }
626 :
627 : /* Prepare next loop */
628 0 : forwardH = MLZ4_hashPosition(++ip, tableType);
629 0 : }
630 :
631 0 : _last_literals:
632 : /* Encode Last Literals */
633 : {
634 0 : const size_t lastRun = (size_t)(iend - anchor);
635 0 : if ((outputLimited) && ((op - (BYTE*)dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize))
636 0 : return 0; /* Check output limit */
637 0 : if (lastRun >= RUN_MASK)
638 : {
639 0 : size_t accumulator = lastRun - RUN_MASK;
640 0 : *op++ = RUN_MASK << ML_BITS;
641 0 : for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
642 0 : *op++ = (BYTE) accumulator;
643 : }
644 : else
645 : {
646 0 : *op++ = (BYTE)(lastRun<<ML_BITS);
647 : }
648 0 : memcpy(op, anchor, lastRun);
649 0 : op += lastRun;
650 : }
651 :
652 : /* End */
653 0 : return (int) (((char*)op)-dest);
654 : }
655 :
656 :
657 0 : int MLZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
658 : {
659 0 : MLZ4_resetStream((MLZ4_stream_t*)state);
660 0 : if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
661 :
662 0 : if (maxOutputSize >= MLZ4_compressBound(inputSize))
663 : {
664 0 : if (inputSize < MLZ4_64Klimit)
665 0 : return MLZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
666 : else
667 0 : return MLZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, MLZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
668 : }
669 : else
670 : {
671 0 : if (inputSize < MLZ4_64Klimit)
672 0 : return MLZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
673 : else
674 0 : return MLZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, MLZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
675 : }
676 : }
677 :
678 :
679 0 : int MLZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
680 : {
681 : #if (HEAPMODE)
682 : void* ctxPtr = ALLOCATOR(1, sizeof(MLZ4_stream_t)); /* malloc-calloc always properly aligned */
683 : #else
684 : MLZ4_stream_t ctx;
685 0 : void* ctxPtr = &ctx;
686 : #endif
687 :
688 0 : int result = MLZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration);
689 :
690 : #if (HEAPMODE)
691 : FREEMEM(ctxPtr);
692 : #endif
693 0 : return result;
694 : }
695 :
696 :
697 0 : int MLZ4_compress_default(const char* source, char* dest, int inputSize, int maxOutputSize)
698 : {
699 0 : return MLZ4_compress_fast(source, dest, inputSize, maxOutputSize, 1);
700 : }
701 :
702 :
703 : /* hidden debug function */
704 : /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
705 0 : int MLZ4_compress_fast_force(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
706 : {
707 : MLZ4_stream_t ctx;
708 :
709 0 : MLZ4_resetStream(&ctx);
710 :
711 0 : if (inputSize < MLZ4_64Klimit)
712 0 : return MLZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
713 : else
714 0 : return MLZ4_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, MLZ4_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration);
715 : }
716 :
717 :
718 : /********************************
719 : * destSize variant
720 : ********************************/
721 :
722 0 : static int MLZ4_compress_destSize_generic(
723 : void* const ctx,
724 : const char* const src,
725 : char* const dst,
726 : int* const srcSizePtr,
727 : const int targetDstSize,
728 : const tableType_t tableType)
729 : {
730 0 : const BYTE* ip = (const BYTE*) src;
731 0 : const BYTE* base = (const BYTE*) src;
732 0 : const BYTE* lowLimit = (const BYTE*) src;
733 0 : const BYTE* anchor = ip;
734 0 : const BYTE* const iend = ip + *srcSizePtr;
735 0 : const BYTE* const mflimit = iend - MFLIMIT;
736 0 : const BYTE* const matchlimit = iend - LASTLITERALS;
737 :
738 0 : BYTE* op = (BYTE*) dst;
739 0 : BYTE* const oend = op + targetDstSize;
740 0 : BYTE* const oMaxLit = op + targetDstSize - 2 /* offset */ - 8 /* because 8+MINMATCH==MFLIMIT */ - 1 /* token */;
741 0 : BYTE* const oMaxMatch = op + targetDstSize - (LASTLITERALS + 1 /* token */);
742 0 : BYTE* const oMaxSeq = oMaxLit - 1 /* token */;
743 :
744 : U32 forwardH;
745 :
746 :
747 : /* Init conditions */
748 0 : if (targetDstSize < 1) return 0; /* Impossible to store anything */
749 0 : if ((U32)*srcSizePtr > (U32)MLZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size, too large (or negative) */
750 0 : if ((tableType == byU16) && (*srcSizePtr>=MLZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */
751 0 : if (*srcSizePtr<MLZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
752 :
753 : /* First Byte */
754 0 : *srcSizePtr = 0;
755 0 : MLZ4_putPosition(ip, ctx, tableType, base);
756 0 : ip++; forwardH = MLZ4_hashPosition(ip, tableType);
757 :
758 : /* Main Loop */
759 : for ( ; ; )
760 : {
761 : const BYTE* match;
762 : BYTE* token;
763 : {
764 0 : const BYTE* forwardIp = ip;
765 0 : unsigned step = 1;
766 0 : unsigned searchMatchNb = 1 << MLZ4_skipTrigger;
767 :
768 : /* Find a match */
769 : do {
770 0 : U32 h = forwardH;
771 0 : ip = forwardIp;
772 0 : forwardIp += step;
773 0 : step = (searchMatchNb++ >> MLZ4_skipTrigger);
774 :
775 0 : if (unlikely(forwardIp > mflimit))
776 0 : goto _last_literals;
777 :
778 0 : match = MLZ4_getPositionOnHash(h, ctx, tableType, base);
779 0 : forwardH = MLZ4_hashPosition(forwardIp, tableType);
780 0 : MLZ4_putPositionOnHash(ip, h, ctx, tableType, base);
781 :
782 0 : } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip))
783 0 : || (MLZ4_read32(match) != MLZ4_read32(ip)) );
784 : }
785 :
786 : /* Catch up */
787 0 : while ((ip>anchor) && (match > lowLimit) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
788 :
789 : {
790 : /* Encode Literal length */
791 0 : unsigned litLength = (unsigned)(ip - anchor);
792 0 : token = op++;
793 0 : if (op + ((litLength+240)/255) + litLength > oMaxLit)
794 : {
795 : /* Not enough space for a last match */
796 0 : op--;
797 0 : goto _last_literals;
798 : }
799 0 : if (litLength>=RUN_MASK)
800 : {
801 0 : unsigned len = litLength - RUN_MASK;
802 0 : *token=(RUN_MASK<<ML_BITS);
803 0 : for(; len >= 255 ; len-=255) *op++ = 255;
804 0 : *op++ = (BYTE)len;
805 : }
806 0 : else *token = (BYTE)(litLength<<ML_BITS);
807 :
808 : /* Copy Literals */
809 0 : MLZ4_wildCopy(op, anchor, op+litLength);
810 0 : op += litLength;
811 : }
812 :
813 0 : _next_match:
814 : /* Encode Offset */
815 0 : MLZ4_writeLE16(op, (U16)(ip-match)); op+=2;
816 :
817 : /* Encode MatchLength */
818 : {
819 : size_t matchLength;
820 :
821 0 : matchLength = MLZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
822 :
823 0 : if (op + ((matchLength+240)/255) > oMaxMatch)
824 : {
825 : /* Match description too long : reduce it */
826 0 : matchLength = (15-1) + (oMaxMatch-op) * 255;
827 : }
828 : //printf("offset %5i, matchLength%5i \n", (int)(ip-match), matchLength + MINMATCH);
829 0 : ip += MINMATCH + matchLength;
830 :
831 0 : if (matchLength>=ML_MASK)
832 : {
833 0 : *token += ML_MASK;
834 0 : matchLength -= ML_MASK;
835 0 : while (matchLength >= 255) { matchLength-=255; *op++ = 255; }
836 0 : *op++ = (BYTE)matchLength;
837 : }
838 0 : else *token += (BYTE)(matchLength);
839 : }
840 :
841 0 : anchor = ip;
842 :
843 : /* Test end of block */
844 0 : if (ip > mflimit) break;
845 0 : if (op > oMaxSeq) break;
846 :
847 : /* Fill table */
848 0 : MLZ4_putPosition(ip-2, ctx, tableType, base);
849 :
850 : /* Test next position */
851 0 : match = MLZ4_getPosition(ip, ctx, tableType, base);
852 0 : MLZ4_putPosition(ip, ctx, tableType, base);
853 0 : if ( (match+MAX_DISTANCE>=ip)
854 0 : && (MLZ4_read32(match)==MLZ4_read32(ip)) )
855 0 : { token=op++; *token=0; goto _next_match; }
856 :
857 : /* Prepare next loop */
858 0 : forwardH = MLZ4_hashPosition(++ip, tableType);
859 0 : }
860 :
861 0 : _last_literals:
862 : /* Encode Last Literals */
863 : {
864 0 : size_t lastRunSize = (size_t)(iend - anchor);
865 0 : if (op + 1 /* token */ + ((lastRunSize+240)/255) /* litLength */ + lastRunSize /* literals */ > oend)
866 : {
867 : /* adapt lastRunSize to fill 'dst' */
868 0 : lastRunSize = (oend-op) - 1;
869 0 : lastRunSize -= (lastRunSize+240)/255;
870 : }
871 0 : ip = anchor + lastRunSize;
872 :
873 0 : if (lastRunSize >= RUN_MASK)
874 : {
875 0 : size_t accumulator = lastRunSize - RUN_MASK;
876 0 : *op++ = RUN_MASK << ML_BITS;
877 0 : for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
878 0 : *op++ = (BYTE) accumulator;
879 : }
880 : else
881 : {
882 0 : *op++ = (BYTE)(lastRunSize<<ML_BITS);
883 : }
884 0 : memcpy(op, anchor, lastRunSize);
885 0 : op += lastRunSize;
886 : }
887 :
888 : /* End */
889 0 : *srcSizePtr = (int) (((const char*)ip)-src);
890 0 : return (int) (((char*)op)-dst);
891 : }
892 :
893 :
894 0 : static int MLZ4_compress_destSize_extState (void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize)
895 : {
896 0 : MLZ4_resetStream((MLZ4_stream_t*)state);
897 :
898 0 : if (targetDstSize >= MLZ4_compressBound(*srcSizePtr)) /* compression success is guaranteed */
899 : {
900 0 : return MLZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1);
901 : }
902 : else
903 : {
904 0 : if (*srcSizePtr < MLZ4_64Klimit)
905 0 : return MLZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16);
906 : else
907 0 : return MLZ4_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, MLZ4_64bits() ? byU32 : byPtr);
908 : }
909 : }
910 :
911 :
912 0 : int MLZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
913 : {
914 : #if (HEAPMODE)
915 : void* ctx = ALLOCATOR(1, sizeof(MLZ4_stream_t)); /* malloc-calloc always properly aligned */
916 : #else
917 : MLZ4_stream_t ctxBody;
918 0 : void* ctx = &ctxBody;
919 : #endif
920 :
921 0 : int result = MLZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize);
922 :
923 : #if (HEAPMODE)
924 : FREEMEM(ctx);
925 : #endif
926 0 : return result;
927 : }
928 :
929 :
930 :
931 : /********************************
932 : * Streaming functions
933 : ********************************/
934 :
935 0 : MLZ4_stream_t* MLZ4_createStream(void)
936 : {
937 0 : MLZ4_stream_t* lz4s = (MLZ4_stream_t*)ALLOCATOR(8, MLZ4_STREAMSIZE_U64);
938 : MLZ4_STATIC_ASSERT(MLZ4_STREAMSIZE >= sizeof(MLZ4_stream_t_internal)); /* A compilation error here means MLZ4_STREAMSIZE is not large enough */
939 0 : MLZ4_resetStream(lz4s);
940 0 : return lz4s;
941 : }
942 :
943 0 : void MLZ4_resetStream (MLZ4_stream_t* MLZ4_stream)
944 : {
945 0 : MEM_INIT(MLZ4_stream, 0, sizeof(MLZ4_stream_t));
946 0 : }
947 :
948 0 : int MLZ4_freeStream (MLZ4_stream_t* MLZ4_stream)
949 : {
950 0 : FREEMEM(MLZ4_stream);
951 0 : return (0);
952 : }
953 :
954 :
955 : #define HASH_UNIT sizeof(size_t)
956 0 : int MLZ4_loadDict (MLZ4_stream_t* MLZ4_dict, const char* dictionary, int dictSize)
957 : {
958 0 : MLZ4_stream_t_internal* dict = (MLZ4_stream_t_internal*) MLZ4_dict;
959 0 : const BYTE* p = (const BYTE*)dictionary;
960 0 : const BYTE* const dictEnd = p + dictSize;
961 : const BYTE* base;
962 :
963 0 : if ((dict->initCheck) || (dict->currentOffset > 1 GB)) /* Uninitialized structure, or reuse overflow */
964 0 : MLZ4_resetStream(MLZ4_dict);
965 :
966 0 : if (dictSize < (int)HASH_UNIT)
967 : {
968 0 : dict->dictionary = NULL;
969 0 : dict->dictSize = 0;
970 0 : return 0;
971 : }
972 :
973 0 : if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
974 0 : dict->currentOffset += 64 KB;
975 0 : base = p - dict->currentOffset;
976 0 : dict->dictionary = p;
977 0 : dict->dictSize = (U32)(dictEnd - p);
978 0 : dict->currentOffset += dict->dictSize;
979 :
980 0 : while (p <= dictEnd-HASH_UNIT)
981 : {
982 0 : MLZ4_putPosition(p, dict->hashTable, byU32, base);
983 0 : p+=3;
984 : }
985 :
986 0 : return dict->dictSize;
987 : }
988 :
989 :
990 0 : static void MLZ4_renormDictT(MLZ4_stream_t_internal* MLZ4_dict, const BYTE* src)
991 : {
992 0 : if ((MLZ4_dict->currentOffset > 0x80000000) ||
993 0 : ((size_t)MLZ4_dict->currentOffset > (size_t)src)) /* address space overflow */
994 : {
995 : /* rescale hash table */
996 0 : U32 delta = MLZ4_dict->currentOffset - 64 KB;
997 0 : const BYTE* dictEnd = MLZ4_dict->dictionary + MLZ4_dict->dictSize;
998 : int i;
999 0 : for (i=0; i<HASH_SIZE_U32; i++)
1000 : {
1001 0 : if (MLZ4_dict->hashTable[i] < delta) MLZ4_dict->hashTable[i]=0;
1002 0 : else MLZ4_dict->hashTable[i] -= delta;
1003 : }
1004 0 : MLZ4_dict->currentOffset = 64 KB;
1005 0 : if (MLZ4_dict->dictSize > 64 KB) MLZ4_dict->dictSize = 64 KB;
1006 0 : MLZ4_dict->dictionary = dictEnd - MLZ4_dict->dictSize;
1007 : }
1008 0 : }
1009 :
1010 :
1011 0 : int MLZ4_compress_fast_continue (MLZ4_stream_t* MLZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1012 : {
1013 0 : MLZ4_stream_t_internal* streamPtr = (MLZ4_stream_t_internal*)MLZ4_stream;
1014 0 : const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
1015 :
1016 0 : const BYTE* smallest = (const BYTE*) source;
1017 0 : if (streamPtr->initCheck) return 0; /* Uninitialized structure detected */
1018 0 : if ((streamPtr->dictSize>0) && (smallest>dictEnd)) smallest = dictEnd;
1019 0 : MLZ4_renormDictT(streamPtr, smallest);
1020 0 : if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1021 :
1022 : /* Check overlapping input/dictionary space */
1023 : {
1024 0 : const BYTE* sourceEnd = (const BYTE*) source + inputSize;
1025 0 : if ((sourceEnd > streamPtr->dictionary) && (sourceEnd < dictEnd))
1026 : {
1027 0 : streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
1028 0 : if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
1029 0 : if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
1030 0 : streamPtr->dictionary = dictEnd - streamPtr->dictSize;
1031 : }
1032 : }
1033 :
1034 : /* prefix mode : source data follows dictionary */
1035 0 : if (dictEnd == (const BYTE*)source)
1036 : {
1037 : int result;
1038 0 : if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
1039 0 : result = MLZ4_compress_generic(MLZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, dictSmall, acceleration);
1040 : else
1041 0 : result = MLZ4_compress_generic(MLZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, withPrefix64k, noDictIssue, acceleration);
1042 0 : streamPtr->dictSize += (U32)inputSize;
1043 0 : streamPtr->currentOffset += (U32)inputSize;
1044 0 : return result;
1045 : }
1046 :
1047 : /* external dictionary mode */
1048 : {
1049 : int result;
1050 0 : if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
1051 0 : result = MLZ4_compress_generic(MLZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, dictSmall, acceleration);
1052 : else
1053 0 : result = MLZ4_compress_generic(MLZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput, byU32, usingExtDict, noDictIssue, acceleration);
1054 0 : streamPtr->dictionary = (const BYTE*)source;
1055 0 : streamPtr->dictSize = (U32)inputSize;
1056 0 : streamPtr->currentOffset += (U32)inputSize;
1057 0 : return result;
1058 : }
1059 : }
1060 :
1061 :
1062 : /* Hidden debug function, to force external dictionary mode */
1063 0 : int MLZ4_compress_forceExtDict (MLZ4_stream_t* MLZ4_dict, const char* source, char* dest, int inputSize)
1064 : {
1065 0 : MLZ4_stream_t_internal* streamPtr = (MLZ4_stream_t_internal*)MLZ4_dict;
1066 : int result;
1067 0 : const BYTE* const dictEnd = streamPtr->dictionary + streamPtr->dictSize;
1068 :
1069 0 : const BYTE* smallest = dictEnd;
1070 0 : if (smallest > (const BYTE*) source) smallest = (const BYTE*) source;
1071 0 : MLZ4_renormDictT((MLZ4_stream_t_internal*)MLZ4_dict, smallest);
1072 :
1073 0 : result = MLZ4_compress_generic(MLZ4_dict, source, dest, inputSize, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
1074 :
1075 0 : streamPtr->dictionary = (const BYTE*)source;
1076 0 : streamPtr->dictSize = (U32)inputSize;
1077 0 : streamPtr->currentOffset += (U32)inputSize;
1078 :
1079 0 : return result;
1080 : }
1081 :
1082 :
1083 0 : int MLZ4_saveDict (MLZ4_stream_t* MLZ4_dict, char* safeBuffer, int dictSize)
1084 : {
1085 0 : MLZ4_stream_t_internal* dict = (MLZ4_stream_t_internal*) MLZ4_dict;
1086 0 : const BYTE* previousDictEnd = dict->dictionary + dict->dictSize;
1087 :
1088 0 : if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */
1089 0 : if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize;
1090 :
1091 0 : memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1092 :
1093 0 : dict->dictionary = (const BYTE*)safeBuffer;
1094 0 : dict->dictSize = (U32)dictSize;
1095 :
1096 0 : return dictSize;
1097 : }
1098 :
1099 :
1100 :
1101 : /*******************************
1102 : * Decompression functions
1103 : *******************************/
1104 : /*
1105 : * This generic decompression function cover all use cases.
1106 : * It shall be instantiated several times, using different sets of directives
1107 : * Note that it is essential this generic function is really inlined,
1108 : * in order to remove useless branches during compilation optimization.
1109 : */
1110 0 : FORCE_INLINE int MLZ4_decompress_generic(
1111 : const char* const source,
1112 : char* const dest,
1113 : int inputSize,
1114 : int outputSize, /* If endOnInput==endOnInputSize, this value is the max size of Output Buffer. */
1115 :
1116 : int endOnInput, /* endOnOutputSize, endOnInputSize */
1117 : int partialDecoding, /* full, partial */
1118 : int targetOutputSize, /* only used if partialDecoding==partial */
1119 : int dict, /* noDict, withPrefix64k, usingExtDict */
1120 : const BYTE* const lowPrefix, /* == dest if dict == noDict */
1121 : const BYTE* const dictStart, /* only if dict==usingExtDict */
1122 : const size_t dictSize /* note : = 0 if noDict */
1123 : )
1124 : {
1125 : /* Local Variables */
1126 0 : const BYTE* ip = (const BYTE*) source;
1127 0 : const BYTE* const iend = ip + inputSize;
1128 :
1129 0 : BYTE* op = (BYTE*) dest;
1130 0 : BYTE* const oend = op + outputSize;
1131 : BYTE* cpy;
1132 0 : BYTE* oexit = op + targetOutputSize;
1133 0 : const BYTE* const lowLimit = lowPrefix - dictSize;
1134 :
1135 0 : const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize;
1136 0 : const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4};
1137 0 : const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1138 :
1139 0 : const int safeDecode = (endOnInput==endOnInputSize);
1140 0 : const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
1141 :
1142 :
1143 : /* Special cases */
1144 0 : if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */
1145 0 : if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
1146 0 : if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1);
1147 :
1148 :
1149 : /* Main Loop */
1150 : while (1)
1151 : {
1152 : unsigned token;
1153 : size_t length;
1154 : const BYTE* match;
1155 :
1156 : /* get literal length */
1157 0 : token = *ip++;
1158 0 : if ((length=(token>>ML_BITS)) == RUN_MASK)
1159 : {
1160 : unsigned s;
1161 : do
1162 : {
1163 0 : s = *ip++;
1164 0 : length += s;
1165 : }
1166 0 : while (likely((endOnInput)?ip<iend-RUN_MASK:1) && (s==255));
1167 0 : if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* overflow detection */
1168 0 : if ((safeDecode) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* overflow detection */
1169 : }
1170 :
1171 : /* copy literals */
1172 0 : cpy = op+length;
1173 0 : if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
1174 0 : || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
1175 : {
1176 0 : if (partialDecoding)
1177 : {
1178 0 : if (cpy > oend) goto _output_error; /* Error : write attempt beyond end of output buffer */
1179 0 : if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */
1180 : }
1181 : else
1182 : {
1183 0 : if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */
1184 0 : if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */
1185 : }
1186 0 : memcpy(op, ip, length);
1187 0 : ip += length;
1188 0 : op += length;
1189 0 : break; /* Necessarily EOF, due to parsing restrictions */
1190 : }
1191 0 : MLZ4_wildCopy(op, ip, cpy);
1192 0 : ip += length; op = cpy;
1193 :
1194 : /* get offset */
1195 0 : match = cpy - MLZ4_readLE16(ip); ip+=2;
1196 0 : if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside destination buffer */
1197 :
1198 : /* get matchlength */
1199 0 : length = token & ML_MASK;
1200 0 : if (length == ML_MASK)
1201 : {
1202 : unsigned s;
1203 : do
1204 : {
1205 0 : if ((endOnInput) && (ip > iend-LASTLITERALS)) goto _output_error;
1206 0 : s = *ip++;
1207 0 : length += s;
1208 0 : } while (s==255);
1209 0 : if ((safeDecode) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* overflow detection */
1210 : }
1211 0 : length += MINMATCH;
1212 :
1213 : /* check external dictionary */
1214 0 : if ((dict==usingExtDict) && (match < lowPrefix))
1215 : {
1216 0 : if (unlikely(op+length > oend-LASTLITERALS)) goto _output_error; /* doesn't respect parsing restriction */
1217 :
1218 0 : if (length <= (size_t)(lowPrefix-match))
1219 : {
1220 : /* match can be copied as a single segment from external dictionary */
1221 0 : match = dictEnd - (lowPrefix-match);
1222 0 : memmove(op, match, length); op += length;
1223 : }
1224 : else
1225 : {
1226 : /* match encompass external dictionary and current segment */
1227 0 : size_t copySize = (size_t)(lowPrefix-match);
1228 0 : memcpy(op, dictEnd - copySize, copySize);
1229 0 : op += copySize;
1230 0 : copySize = length - copySize;
1231 0 : if (copySize > (size_t)(op-lowPrefix)) /* overlap within current segment */
1232 : {
1233 0 : BYTE* const endOfMatch = op + copySize;
1234 0 : const BYTE* copyFrom = lowPrefix;
1235 0 : while (op < endOfMatch) *op++ = *copyFrom++;
1236 : }
1237 : else
1238 : {
1239 0 : memcpy(op, lowPrefix, copySize);
1240 0 : op += copySize;
1241 : }
1242 : }
1243 0 : continue;
1244 0 : }
1245 :
1246 : /* copy repeated sequence */
1247 0 : cpy = op + length;
1248 0 : if (unlikely((op-match)<8))
1249 : {
1250 0 : const size_t dec64 = dec64table[op-match];
1251 0 : op[0] = match[0];
1252 0 : op[1] = match[1];
1253 0 : op[2] = match[2];
1254 0 : op[3] = match[3];
1255 0 : match += dec32table[op-match];
1256 0 : MLZ4_copy4(op+4, match);
1257 0 : op += 8; match -= dec64;
1258 0 : } else { MLZ4_copy8(op, match); op+=8; match+=8; }
1259 :
1260 0 : if (unlikely(cpy>oend-12))
1261 : {
1262 0 : if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals */
1263 0 : if (op < oend-8)
1264 : {
1265 0 : MLZ4_wildCopy(op, match, oend-8);
1266 0 : match += (oend-8) - op;
1267 0 : op = oend-8;
1268 : }
1269 0 : while (op<cpy) *op++ = *match++;
1270 : }
1271 : else
1272 0 : MLZ4_wildCopy(op, match, cpy);
1273 0 : op=cpy; /* correction */
1274 0 : }
1275 :
1276 : /* end of decoding */
1277 0 : if (endOnInput)
1278 0 : return (int) (((char*)op)-dest); /* Nb of output bytes decoded */
1279 : else
1280 0 : return (int) (((const char*)ip)-source); /* Nb of input bytes read */
1281 :
1282 : /* Overflow error detected */
1283 0 : _output_error:
1284 0 : return (int) (-(((const char*)ip)-source))-1;
1285 : }
1286 :
1287 :
1288 0 : int MLZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
1289 : {
1290 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
1291 : }
1292 :
1293 0 : int MLZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
1294 : {
1295 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
1296 : }
1297 :
1298 0 : int MLZ4_decompress_fast(const char* source, char* dest, int originalSize)
1299 : {
1300 0 : return MLZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
1301 : }
1302 :
1303 :
1304 : /* streaming decompression functions */
1305 :
1306 : typedef struct
1307 : {
1308 : const BYTE* externalDict;
1309 : size_t extDictSize;
1310 : const BYTE* prefixEnd;
1311 : size_t prefixSize;
1312 : } MLZ4_streamDecode_t_internal;
1313 :
1314 : /*
1315 : * If you prefer dynamic allocation methods,
1316 : * MLZ4_createStreamDecode()
1317 : * provides a pointer (void*) towards an initialized MLZ4_streamDecode_t structure.
1318 : */
1319 0 : MLZ4_streamDecode_t* MLZ4_createStreamDecode(void)
1320 : {
1321 0 : MLZ4_streamDecode_t* lz4s = (MLZ4_streamDecode_t*) ALLOCATOR(1, sizeof(MLZ4_streamDecode_t));
1322 0 : return lz4s;
1323 : }
1324 :
1325 0 : int MLZ4_freeStreamDecode (MLZ4_streamDecode_t* MLZ4_stream)
1326 : {
1327 0 : FREEMEM(MLZ4_stream);
1328 0 : return 0;
1329 : }
1330 :
1331 : /*
1332 : * MLZ4_setStreamDecode
1333 : * Use this function to instruct where to find the dictionary
1334 : * This function is not necessary if previous data is still available where it was decoded.
1335 : * Loading a size of 0 is allowed (same effect as no dictionary).
1336 : * Return : 1 if OK, 0 if error
1337 : */
1338 0 : int MLZ4_setStreamDecode (MLZ4_streamDecode_t* MLZ4_streamDecode, const char* dictionary, int dictSize)
1339 : {
1340 0 : MLZ4_streamDecode_t_internal* lz4sd = (MLZ4_streamDecode_t_internal*) MLZ4_streamDecode;
1341 0 : lz4sd->prefixSize = (size_t) dictSize;
1342 0 : lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
1343 0 : lz4sd->externalDict = NULL;
1344 0 : lz4sd->extDictSize = 0;
1345 0 : return 1;
1346 : }
1347 :
1348 : /*
1349 : *_continue() :
1350 : These decoding functions allow decompression of multiple blocks in "streaming" mode.
1351 : Previously decoded blocks must still be available at the memory position where they were decoded.
1352 : If it's not possible, save the relevant part of decoded data into a safe buffer,
1353 : and indicate where it stands using MLZ4_setStreamDecode()
1354 : */
1355 0 : int MLZ4_decompress_safe_continue (MLZ4_streamDecode_t* MLZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
1356 : {
1357 0 : MLZ4_streamDecode_t_internal* lz4sd = (MLZ4_streamDecode_t_internal*) MLZ4_streamDecode;
1358 : int result;
1359 :
1360 0 : if (lz4sd->prefixEnd == (BYTE*)dest)
1361 : {
1362 0 : result = MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
1363 : endOnInputSize, full, 0,
1364 0 : usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
1365 0 : if (result <= 0) return result;
1366 0 : lz4sd->prefixSize += result;
1367 0 : lz4sd->prefixEnd += result;
1368 : }
1369 : else
1370 : {
1371 0 : lz4sd->extDictSize = lz4sd->prefixSize;
1372 0 : lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
1373 0 : result = MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
1374 : endOnInputSize, full, 0,
1375 : usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
1376 0 : if (result <= 0) return result;
1377 0 : lz4sd->prefixSize = result;
1378 0 : lz4sd->prefixEnd = (BYTE*)dest + result;
1379 : }
1380 :
1381 0 : return result;
1382 : }
1383 :
1384 0 : int MLZ4_decompress_fast_continue (MLZ4_streamDecode_t* MLZ4_streamDecode, const char* source, char* dest, int originalSize)
1385 : {
1386 0 : MLZ4_streamDecode_t_internal* lz4sd = (MLZ4_streamDecode_t_internal*) MLZ4_streamDecode;
1387 : int result;
1388 :
1389 0 : if (lz4sd->prefixEnd == (BYTE*)dest)
1390 : {
1391 0 : result = MLZ4_decompress_generic(source, dest, 0, originalSize,
1392 : endOnOutputSize, full, 0,
1393 0 : usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
1394 0 : if (result <= 0) return result;
1395 0 : lz4sd->prefixSize += originalSize;
1396 0 : lz4sd->prefixEnd += originalSize;
1397 : }
1398 : else
1399 : {
1400 0 : lz4sd->extDictSize = lz4sd->prefixSize;
1401 0 : lz4sd->externalDict = (BYTE*)dest - lz4sd->extDictSize;
1402 0 : result = MLZ4_decompress_generic(source, dest, 0, originalSize,
1403 : endOnOutputSize, full, 0,
1404 : usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
1405 0 : if (result <= 0) return result;
1406 0 : lz4sd->prefixSize = originalSize;
1407 0 : lz4sd->prefixEnd = (BYTE*)dest + originalSize;
1408 : }
1409 :
1410 0 : return result;
1411 : }
1412 :
1413 :
1414 : /*
1415 : Advanced decoding functions :
1416 : *_usingDict() :
1417 : These decoding functions work the same as "_continue" ones,
1418 : the dictionary must be explicitly provided within parameters
1419 : */
1420 :
1421 0 : FORCE_INLINE int MLZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
1422 : {
1423 0 : if (dictSize==0)
1424 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
1425 0 : if (dictStart+dictSize == dest)
1426 : {
1427 0 : if (dictSize >= (int)(64 KB - 1))
1428 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
1429 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
1430 : }
1431 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
1432 : }
1433 :
1434 0 : int MLZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
1435 : {
1436 0 : return MLZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
1437 : }
1438 :
1439 0 : int MLZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
1440 : {
1441 0 : return MLZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
1442 : }
1443 :
1444 : /* debug function */
1445 0 : int MLZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
1446 : {
1447 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
1448 : }
1449 :
1450 :
1451 : /***************************************************
1452 : * Obsolete Functions
1453 : ***************************************************/
1454 : /* obsolete compression functions */
1455 0 : int MLZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) { return MLZ4_compress_default(source, dest, inputSize, maxOutputSize); }
1456 0 : int MLZ4_compress(const char* source, char* dest, int inputSize) { return MLZ4_compress_default(source, dest, inputSize, MLZ4_compressBound(inputSize)); }
1457 0 : int MLZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) { return MLZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); }
1458 0 : int MLZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) { return MLZ4_compress_fast_extState(state, src, dst, srcSize, MLZ4_compressBound(srcSize), 1); }
1459 0 : int MLZ4_compress_limitedOutput_continue (MLZ4_stream_t* MLZ4_stream, const char* src, char* dst, int srcSize, int maxDstSize) { return MLZ4_compress_fast_continue(MLZ4_stream, src, dst, srcSize, maxDstSize, 1); }
1460 0 : int MLZ4_compress_continue (MLZ4_stream_t* MLZ4_stream, const char* source, char* dest, int inputSize) { return MLZ4_compress_fast_continue(MLZ4_stream, source, dest, inputSize, MLZ4_compressBound(inputSize), 1); }
1461 :
1462 : /*
1463 : These function names are deprecated and should no longer be used.
1464 : They are only provided here for compatibility with older user programs.
1465 : - MLZ4_uncompress is totally equivalent to MLZ4_decompress_fast
1466 : - MLZ4_uncompress_unknownOutputSize is totally equivalent to MLZ4_decompress_safe
1467 : */
1468 0 : int MLZ4_uncompress (const char* source, char* dest, int outputSize) { return MLZ4_decompress_fast(source, dest, outputSize); }
1469 0 : int MLZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return MLZ4_decompress_safe(source, dest, isize, maxOutputSize); }
1470 :
1471 :
1472 : /* Obsolete Streaming functions */
1473 :
1474 0 : int MLZ4_sizeofStreamState() { return MLZ4_STREAMSIZE; }
1475 :
1476 0 : static void MLZ4_init(MLZ4_stream_t_internal* lz4ds, BYTE* base)
1477 : {
1478 0 : MEM_INIT(lz4ds, 0, MLZ4_STREAMSIZE);
1479 0 : lz4ds->bufferStart = base;
1480 0 : }
1481 :
1482 0 : int MLZ4_resetStreamState(void* state, char* inputBuffer)
1483 : {
1484 0 : if ((((size_t)state) & 3) != 0) return 1; /* Error : pointer is not aligned on 4-bytes boundary */
1485 0 : MLZ4_init((MLZ4_stream_t_internal*)state, (BYTE*)inputBuffer);
1486 0 : return 0;
1487 : }
1488 :
1489 0 : void* MLZ4_create (char* inputBuffer)
1490 : {
1491 0 : void* lz4ds = ALLOCATOR(8, MLZ4_STREAMSIZE_U64);
1492 0 : MLZ4_init ((MLZ4_stream_t_internal*)lz4ds, (BYTE*)inputBuffer);
1493 0 : return lz4ds;
1494 : }
1495 :
1496 0 : char* MLZ4_slideInputBuffer (void* MLZ4_Data)
1497 : {
1498 0 : MLZ4_stream_t_internal* ctx = (MLZ4_stream_t_internal*)MLZ4_Data;
1499 0 : int dictSize = MLZ4_saveDict((MLZ4_stream_t*)MLZ4_Data, (char*)ctx->bufferStart, 64 KB);
1500 0 : return (char*)(ctx->bufferStart + dictSize);
1501 : }
1502 :
1503 : /* Obsolete streaming decompression functions */
1504 :
1505 0 : int MLZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
1506 : {
1507 0 : return MLZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
1508 : }
1509 :
1510 0 : int MLZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
1511 : {
1512 0 : return MLZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
1513 : }
1514 :
1515 : #endif /* MLZ4_COMMONDEFS_ONLY */
1516 :
|