Skip to content

Commit 6bea947

Browse files
committed
HADOOP-9319. Update bundled LZ4 source to r99. (Binglin Chang via llu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1510734 13f79535-47bb-0310-9956-ffa450edef68
1 parent 1ffe056 commit 6bea947

File tree

18 files changed

+2154
-553
lines changed

18 files changed

+2154
-553
lines changed

hadoop-common-project/hadoop-common/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ Release 2.3.0 - UNRELEASED
283283

284284
IMPROVEMENTS
285285

286+
HADOOP-9319. Update bundled LZ4 source to r99. (Binglin Chang via llu)
287+
286288
HADOOP-9241. DU refresh interval is not configurable (harsh)
287289

288290
HADOOP-9417. Support for symlink resolution in LocalFileSystem /

hadoop-common-project/hadoop-common/LICENSE.txt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -252,24 +252,26 @@ in src/main/native/src/org/apache/hadoop/util:
252252
* BSD-style license that can be found in the LICENSE file.
253253
*/
254254

255-
For src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c:
255+
For src/main/native/src/org/apache/hadoop/io/compress/lz4/{lz4.h,lz4.c,
256+
lz4_encoder.h,lz4hc.h,lz4hc.c,lz4hc_encoder.h},
256257

257258
/*
258259
LZ4 - Fast LZ compression algorithm
259-
Copyright (C) 2011, Yann Collet.
260-
BSD License
260+
Header File
261+
Copyright (C) 2011-2013, Yann Collet.
262+
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
261263

262264
Redistribution and use in source and binary forms, with or without
263265
modification, are permitted provided that the following conditions are
264266
met:
265-
267+
266268
* Redistributions of source code must retain the above copyright
267269
notice, this list of conditions and the following disclaimer.
268270
* Redistributions in binary form must reproduce the above
269271
copyright notice, this list of conditions and the following disclaimer
270272
in the documentation and/or other materials provided with the
271273
distribution.
272-
274+
273275
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
274276
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
275277
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -281,4 +283,8 @@ in src/main/native/src/org/apache/hadoop/util:
281283
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
282284
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
283285
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
286+
287+
You can contact the author at :
288+
- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
289+
- LZ4 source repository : http://code.google.com/p/lz4/
284290
*/

hadoop-common-project/hadoop-common/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,12 @@
456456
<exclude>src/test/empty-file</exclude>
457457
<exclude>src/test/all-tests</exclude>
458458
<exclude>src/test/resources/kdc/ldif/users.ldif</exclude>
459+
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.h</exclude>
459460
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4.c</exclude>
461+
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4_encoder.h</exclude>
462+
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.h</exclude>
463+
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc.c</exclude>
464+
<exclude>src/main/native/src/org/apache/hadoop/io/compress/lz4/lz4hc_encoder.h</exclude>
460465
<exclude>src/test/java/org/apache/hadoop/fs/test-untar.tgz</exclude>
461466
</excludes>
462467
</configuration>

hadoop-common-project/hadoop-common/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ add_dual_library(hadoop
170170
${D}/io/compress/lz4/Lz4Compressor.c
171171
${D}/io/compress/lz4/Lz4Decompressor.c
172172
${D}/io/compress/lz4/lz4.c
173+
${D}/io/compress/lz4/lz4hc.c
173174
${SNAPPY_SOURCE_FILES}
174175
${D}/io/compress/zlib/ZlibCompressor.c
175176
${D}/io/compress/zlib/ZlibDecompressor.c

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,22 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
9696
public static final int IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT =
9797
256 * 1024;
9898

99-
/** Internal buffer size for Snappy compressor/decompressors */
99+
/** Internal buffer size for Lz4 compressor/decompressors */
100100
public static final String IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY =
101101
"io.compression.codec.lz4.buffersize";
102102

103103
/** Default value for IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY */
104104
public static final int IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT =
105105
256 * 1024;
106106

107+
/** Use lz4hc(slow but with high compression ratio) for lz4 compression */
108+
public static final String IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY =
109+
"io.compression.codec.lz4.use.lz4hc";
110+
111+
/** Default value for IO_COMPRESSION_CODEC_USELZ4HC_KEY */
112+
public static final boolean IO_COMPRESSION_CODEC_LZ4_USELZ4HC_DEFAULT =
113+
false;
114+
107115
/**
108116
* Service Authorization
109117
*/

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Lz4Codec.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ public CompressionOutputStream createOutputStream(OutputStream out,
107107
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY,
108108
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT);
109109

110-
int compressionOverhead = Math.max((int)(bufferSize * 0.01), 10);
110+
int compressionOverhead = bufferSize/255 + 16;
111111

112112
return new BlockCompressorStream(out, compressor, bufferSize,
113113
compressionOverhead);
@@ -140,7 +140,10 @@ public Compressor createCompressor() {
140140
int bufferSize = conf.getInt(
141141
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_KEY,
142142
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_BUFFERSIZE_DEFAULT);
143-
return new Lz4Compressor(bufferSize);
143+
boolean useLz4HC = conf.getBoolean(
144+
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_KEY,
145+
CommonConfigurationKeys.IO_COMPRESSION_CODEC_LZ4_USELZ4HC_DEFAULT);
146+
return new Lz4Compressor(bufferSize, useLz4HC);
144147
}
145148

146149
/**

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/lz4/Lz4Compressor.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class Lz4Compressor implements Compressor {
5252
private long bytesRead = 0L;
5353
private long bytesWritten = 0L;
5454

55+
private final boolean useLz4HC;
5556

5657
static {
5758
if (NativeCodeLoader.isNativeCodeLoaded()) {
@@ -72,15 +73,27 @@ public class Lz4Compressor implements Compressor {
7273
* Creates a new compressor.
7374
*
7475
* @param directBufferSize size of the direct buffer to be used.
76+
* @param useLz4HC use high compression ratio version of lz4,
77+
* which trades CPU for compression ratio.
7578
*/
76-
public Lz4Compressor(int directBufferSize) {
79+
public Lz4Compressor(int directBufferSize, boolean useLz4HC) {
80+
this.useLz4HC = useLz4HC;
7781
this.directBufferSize = directBufferSize;
7882

7983
uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
8084
compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize);
8185
compressedDirectBuf.position(directBufferSize);
8286
}
8387

88+
/**
89+
* Creates a new compressor.
90+
*
91+
* @param directBufferSize size of the direct buffer to be used.
92+
*/
93+
public Lz4Compressor(int directBufferSize) {
94+
this(directBufferSize, false);
95+
}
96+
8497
/**
8598
* Creates a new compressor with the default buffer size.
8699
*/
@@ -227,7 +240,7 @@ public synchronized int compress(byte[] b, int off, int len)
227240
}
228241

229242
// Compress data
230-
n = compressBytesDirect();
243+
n = useLz4HC ? compressBytesDirectHC() : compressBytesDirect();
231244
compressedDirectBuf.limit(n);
232245
uncompressedDirectBuf.clear(); // lz4 consumes all buffer input
233246

@@ -297,5 +310,7 @@ public synchronized void end() {
297310

298311
private native int compressBytesDirect();
299312

313+
private native int compressBytesDirectHC();
314+
300315
public native static String getLibraryName();
301316
}

hadoop-common-project/hadoop-common/src/main/native/native.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
</ItemDefinitionGroup>
7373
<ItemGroup>
7474
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4.c" />
75+
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4hc.c" />
7576
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\Lz4Compressor.c" />
7677
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\Lz4Decompressor.c" />
7778
<ClCompile Include="src\org\apache\hadoop\io\nativeio\file_descriptor.c" />

hadoop-common-project/hadoop-common/src/main/native/native.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@
5151
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4.c">
5252
<Filter>Source Files</Filter>
5353
</ClCompile>
54+
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\lz4hc.c">
55+
<Filter>Source Files</Filter>
56+
</ClCompile>
5457
<ClCompile Include="src\org\apache\hadoop\io\compress\lz4\Lz4Compressor.c">
5558
<Filter>Source Files</Filter>
5659
</ClCompile>

hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/lz4/Lz4Compressor.c

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,9 @@
2323
#ifdef UNIX
2424
#include "config.h"
2525
#endif // UNIX
26+
#include "lz4.h"
27+
#include "lz4hc.h"
2628

27-
//****************************
28-
// Simple Functions
29-
//****************************
30-
31-
extern int LZ4_compress (const char* source, char* dest, int isize);
32-
33-
/*
34-
LZ4_compress() :
35-
return : the number of bytes in compressed buffer dest
36-
note : destination buffer must be already allocated.
37-
To avoid any problem, size it to handle worst cases situations (input data not compressible)
38-
Worst case size is : "inputsize + 0.4%", with "0.4%" being at least 8 bytes.
39-
40-
*/
4129

4230
static jfieldID Lz4Compressor_clazz;
4331
static jfieldID Lz4Compressor_uncompressedDirectBuf;
@@ -107,5 +95,45 @@ JNIEXPORT jstring JNICALL
10795
Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_getLibraryName(
10896
JNIEnv *env, jclass class
10997
) {
110-
return (*env)->NewStringUTF(env, "revision:43");
98+
return (*env)->NewStringUTF(env, "revision:99");
99+
}
100+
101+
JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_lz4_Lz4Compressor_compressBytesDirectHC
102+
(JNIEnv *env, jobject thisj){
103+
const char* uncompressed_bytes = NULL;
104+
char* compressed_bytes = NULL;
105+
106+
// Get members of Lz4Compressor
107+
jobject clazz = (*env)->GetStaticObjectField(env, thisj, Lz4Compressor_clazz);
108+
jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_uncompressedDirectBuf);
109+
jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen);
110+
jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, Lz4Compressor_compressedDirectBuf);
111+
jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, Lz4Compressor_directBufferSize);
112+
113+
// Get the input direct buffer
114+
LOCK_CLASS(env, clazz, "Lz4Compressor");
115+
uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf);
116+
UNLOCK_CLASS(env, clazz, "Lz4Compressor");
117+
118+
if (uncompressed_bytes == 0) {
119+
return (jint)0;
120+
}
121+
122+
// Get the output direct buffer
123+
LOCK_CLASS(env, clazz, "Lz4Compressor");
124+
compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf);
125+
UNLOCK_CLASS(env, clazz, "Lz4Compressor");
126+
127+
if (compressed_bytes == 0) {
128+
return (jint)0;
129+
}
130+
131+
compressed_direct_buf_len = LZ4_compressHC(uncompressed_bytes, compressed_bytes, uncompressed_direct_buf_len);
132+
if (compressed_direct_buf_len < 0){
133+
THROW(env, "java/lang/InternalError", "LZ4_compressHC failed");
134+
}
135+
136+
(*env)->SetIntField(env, thisj, Lz4Compressor_uncompressedDirectBufLen, 0);
137+
138+
return (jint)compressed_direct_buf_len;
111139
}

0 commit comments

Comments
 (0)