Skip to content

Commit 0544383

Browse files
committed
HADOOP-6346. Add support for specifying unpack pattern regex to RunJar.unJar. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@889018 13f79535-47bb-0310-9956-ffa450edef68
1 parent 1654c0d commit 0544383

File tree

5 files changed

+243
-36
lines changed

5 files changed

+243
-36
lines changed

CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ Trunk (unreleased changes)
4343

4444
HADOOP-6400. Log errors getting Unix UGI. (Todd Lipcon via tomwhite)
4545

46+
HADOOP-6346. Add support for specifying unpack pattern regex to
47+
RunJar.unJar. (Todd Lipcon via tomwhite)
48+
4649
OPTIMIZATIONS
4750

4851
BUG FIXES

src/java/org/apache/hadoop/conf/Configuration.java

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import java.util.concurrent.CopyOnWriteArrayList;
4848
import java.util.regex.Matcher;
4949
import java.util.regex.Pattern;
50+
import java.util.regex.PatternSyntaxException;
5051

5152
import javax.xml.parsers.DocumentBuilder;
5253
import javax.xml.parsers.DocumentBuilderFactory;
@@ -831,6 +832,45 @@ public <T extends Enum<T>> T getEnum(String name, T defaultValue) {
831832
: Enum.valueOf(defaultValue.getDeclaringClass(), val);
832833
}
833834

835+
/**
836+
* Get the value of the <code>name</code> property as a <code>Pattern</code>.
837+
* If no such property is specified, or if the specified value is not a valid
838+
* <code>Pattern</code>, then <code>DefaultValue</code> is returned.
839+
*
840+
* @param name property name
841+
* @param defaultValue default value
842+
* @return property value as a compiled Pattern, or defaultValue
843+
*/
844+
public Pattern getPattern(String name, Pattern defaultValue) {
845+
String valString = get(name);
846+
if (null == valString || "".equals(valString)) {
847+
return defaultValue;
848+
}
849+
try {
850+
return Pattern.compile(valString);
851+
} catch (PatternSyntaxException pse) {
852+
LOG.warn("Regular expression '" + valString + "' for property '" +
853+
name + "' not valid. Using default", pse);
854+
return defaultValue;
855+
}
856+
}
857+
858+
/**
859+
* Set the given property to <code>Pattern</code>.
860+
* If the pattern is passed as null, sets the empty pattern which results in
861+
* further calls to getPattern(...) returning the default value.
862+
*
863+
* @param name property name
864+
* @param pattern new value
865+
*/
866+
public void setPattern(String name, Pattern pattern) {
867+
if (null == pattern) {
868+
set(name, null);
869+
} else {
870+
set(name, pattern.pattern());
871+
}
872+
}
873+
834874
/**
835875
* A class that represents a set of positive integer ranges. It parses
836876
* strings of the form: "2-3,5,7-" where ranges are separated by comma and

src/java/org/apache/hadoop/util/RunJar.java

Lines changed: 63 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,43 +18,66 @@
1818

1919
package org.apache.hadoop.util;
2020

21-
import java.util.jar.*;
22-
import java.lang.reflect.*;
21+
import java.lang.reflect.Array;
22+
import java.lang.reflect.Method;
23+
import java.lang.reflect.InvocationTargetException;
2324
import java.net.URL;
2425
import java.net.URLClassLoader;
25-
import java.io.*;
26-
import java.util.*;
27-
26+
import java.io.FileOutputStream;
27+
import java.io.IOException;
28+
import java.io.InputStream;
29+
import java.io.OutputStream;
30+
import java.io.File;
31+
import java.util.regex.Pattern;
32+
import java.util.Arrays;
33+
import java.util.ArrayList;
34+
import java.util.Enumeration;
35+
import java.util.jar.JarFile;
36+
import java.util.jar.JarEntry;
37+
import java.util.jar.Manifest;
2838
import org.apache.hadoop.conf.Configuration;
2939
import org.apache.hadoop.fs.FileUtil;
40+
import org.apache.hadoop.io.IOUtils;
3041

3142
/** Run a Hadoop job jar. */
3243
public class RunJar {
3344

34-
/** Unpack a jar file into a directory. */
45+
/** Pattern that matches any string */
46+
public static final Pattern MATCH_ANY = Pattern.compile(".*");
47+
48+
/**
49+
* Unpack a jar file into a directory.
50+
*
51+
* This version unpacks all files inside the jar regardless of filename.
52+
*/
3553
public static void unJar(File jarFile, File toDir) throws IOException {
54+
unJar(jarFile, toDir, MATCH_ANY);
55+
}
56+
57+
/**
58+
* Unpack matching files from a jar. Entries inside the jar that do
59+
* not match the given pattern will be skipped.
60+
*
61+
* @param jarFile the .jar file to unpack
62+
* @param toDir the destination directory into which to unpack the jar
63+
* @param unpackRegex the pattern to match jar entries against
64+
*/
65+
public static void unJar(File jarFile, File toDir, Pattern unpackRegex)
66+
throws IOException {
3667
JarFile jar = new JarFile(jarFile);
3768
try {
38-
Enumeration entries = jar.entries();
69+
Enumeration<JarEntry> entries = jar.entries();
3970
while (entries.hasMoreElements()) {
4071
JarEntry entry = (JarEntry)entries.nextElement();
41-
if (!entry.isDirectory()) {
72+
if (!entry.isDirectory() &&
73+
unpackRegex.matcher(entry.getName()).matches()) {
4274
InputStream in = jar.getInputStream(entry);
4375
try {
4476
File file = new File(toDir, entry.getName());
45-
if (!file.getParentFile().mkdirs()) {
46-
if (!file.getParentFile().isDirectory()) {
47-
throw new IOException("Mkdirs failed to create " +
48-
file.getParentFile().toString());
49-
}
50-
}
77+
ensureDirectory(file.getParentFile());
5178
OutputStream out = new FileOutputStream(file);
5279
try {
53-
byte[] buffer = new byte[8192];
54-
int i;
55-
while ((i = in.read(buffer)) != -1) {
56-
out.write(buffer, 0, i);
57-
}
80+
IOUtils.copyBytes(in, out, 8192);
5881
} finally {
5982
out.close();
6083
}
@@ -68,6 +91,18 @@ public static void unJar(File jarFile, File toDir) throws IOException {
6891
}
6992
}
7093

94+
/**
95+
* Ensure the existence of a given directory.
96+
*
97+
* @throws IOException if it cannot be created and does not already exist
98+
*/
99+
private static void ensureDirectory(File dir) throws IOException {
100+
if (!dir.mkdirs() && !dir.isDirectory()) {
101+
throw new IOException("Mkdirs failed to create " +
102+
dir.toString());
103+
}
104+
}
105+
71106
/** Run a Hadoop job jar. If the main class is not in the jar's manifest,
72107
* then it must be provided on the command line. */
73108
public static void main(String[] args) throws Throwable {
@@ -107,22 +142,14 @@ public static void main(String[] args) throws Throwable {
107142
mainClassName = mainClassName.replaceAll("/", ".");
108143

109144
File tmpDir = new File(new Configuration().get("hadoop.tmp.dir"));
110-
boolean b = tmpDir.mkdirs();
111-
if (!b && !tmpDir.isDirectory()) {
112-
System.err.println("Mkdirs failed to create " + tmpDir);
113-
System.exit(-1);
114-
}
145+
ensureDirectory(tmpDir);
146+
115147
final File workDir = File.createTempFile("hadoop-unjar", "", tmpDir);
116-
b = workDir.delete();
117-
if (!b) {
148+
if (!workDir.delete()) {
118149
System.err.println("Delete failed for " + workDir);
119150
System.exit(-1);
120151
}
121-
b = workDir.mkdirs();
122-
if (!b && !workDir.isDirectory()) {
123-
System.err.println("Mkdirs failed to create " + workDir);
124-
System.exit(-1);
125-
}
152+
ensureDirectory(workDir);
126153

127154
Runtime.getRuntime().addShutdownHook(new Thread() {
128155
public void run() {
@@ -134,15 +161,15 @@ public void run() {
134161
});
135162

136163
unJar(file, workDir);
137-
164+
138165
ArrayList<URL> classPath = new ArrayList<URL>();
139-
classPath.add(new File(workDir+"/").toURL());
140-
classPath.add(file.toURL());
141-
classPath.add(new File(workDir, "classes/").toURL());
166+
classPath.add(new File(workDir+"/").toURI().toURL());
167+
classPath.add(file.toURI().toURL());
168+
classPath.add(new File(workDir, "classes/").toURI().toURL());
142169
File[] libs = new File(workDir, "lib").listFiles();
143170
if (libs != null) {
144171
for (int i = 0; i < libs.length; i++) {
145-
classPath.add(libs[i].toURL());
172+
classPath.add(libs[i].toURI().toURL());
146173
}
147174
}
148175

src/test/core/org/apache/hadoop/conf/TestConfiguration.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.ArrayList;
2626
import java.util.HashMap;
2727
import java.util.Random;
28+
import java.util.regex.Pattern;
2829

2930
import junit.framework.TestCase;
3031

@@ -364,6 +365,33 @@ public void testEnum() throws IOException {
364365
assertTrue(fail);
365366
}
366367

368+
public void testPattern() throws IOException {
369+
out = new BufferedWriter(new FileWriter(CONFIG));
370+
startConfig();
371+
appendProperty("test.pattern1", "");
372+
appendProperty("test.pattern2", "(");
373+
appendProperty("test.pattern3", "a+b");
374+
endConfig();
375+
Path fileResource = new Path(CONFIG);
376+
conf.addResource(fileResource);
377+
378+
Pattern defaultPattern = Pattern.compile("x+");
379+
// Return default if missing
380+
assertEquals(defaultPattern.pattern(),
381+
conf.getPattern("xxxxx", defaultPattern).pattern());
382+
// Return null if empty and default is null
383+
assertNull(conf.getPattern("test.pattern1", null));
384+
// Return default for empty
385+
assertEquals(defaultPattern.pattern(),
386+
conf.getPattern("test.pattern1", defaultPattern).pattern());
387+
// Return default for malformed
388+
assertEquals(defaultPattern.pattern(),
389+
conf.getPattern("test.pattern2", defaultPattern).pattern());
390+
// Works for correct patterns
391+
assertEquals("a+b",
392+
conf.getPattern("test.pattern3", defaultPattern).pattern());
393+
}
394+
367395
public void testReload() throws IOException {
368396
out=new BufferedWriter(new FileWriter(CONFIG));
369397
startConfig();
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.util;
19+
20+
import junit.framework.TestCase;
21+
import java.io.File;
22+
import java.io.FileOutputStream;
23+
import java.io.IOException;
24+
import java.util.jar.JarOutputStream;
25+
import java.util.regex.Pattern;
26+
import java.util.zip.ZipEntry;
27+
28+
import org.junit.After;
29+
import org.junit.Before;
30+
import org.junit.Test;
31+
import org.apache.hadoop.fs.FileUtil;
32+
33+
public class TestRunJar extends TestCase {
34+
private File TEST_ROOT_DIR;
35+
36+
private static final String TEST_JAR_NAME="test-runjar.jar";
37+
38+
@Before
39+
protected void setUp()
40+
throws Exception {
41+
TEST_ROOT_DIR =
42+
new File(System.getProperty("test.build.data", "/tmp"), getClass()
43+
.getSimpleName());
44+
if (!TEST_ROOT_DIR.exists()) {
45+
TEST_ROOT_DIR.mkdirs();
46+
}
47+
48+
makeTestJar();
49+
}
50+
51+
@After
52+
protected void tearDown()
53+
throws Exception {
54+
FileUtil.fullyDelete(TEST_ROOT_DIR);
55+
}
56+
57+
/**
58+
* Construct a jar with two files in it in our
59+
* test dir.
60+
*/
61+
private void makeTestJar() throws IOException {
62+
File jarFile = new File(TEST_ROOT_DIR, TEST_JAR_NAME);
63+
JarOutputStream jstream =
64+
new JarOutputStream(new FileOutputStream(jarFile));
65+
jstream.putNextEntry(new ZipEntry("foobar.txt"));
66+
jstream.closeEntry();
67+
jstream.putNextEntry(new ZipEntry("foobaz.txt"));
68+
jstream.closeEntry();
69+
jstream.close();
70+
}
71+
72+
/**
73+
* Test default unjarring behavior - unpack everything
74+
*/
75+
@Test
76+
public void testUnJar() throws Exception {
77+
File unjarDir = new File(TEST_ROOT_DIR, "unjar-all");
78+
assertFalse("unjar dir shouldn't exist at test start",
79+
new File(unjarDir, "foobar.txt").exists());
80+
81+
// Unjar everything
82+
RunJar.unJar(new File(TEST_ROOT_DIR, TEST_JAR_NAME),
83+
unjarDir);
84+
assertTrue("foobar unpacked",
85+
new File(unjarDir, "foobar.txt").exists());
86+
assertTrue("foobaz unpacked",
87+
new File(unjarDir, "foobaz.txt").exists());
88+
89+
}
90+
91+
/**
92+
* Test unjarring a specific regex
93+
*/
94+
public void testUnJarWithPattern() throws Exception {
95+
File unjarDir = new File(TEST_ROOT_DIR, "unjar-pattern");
96+
assertFalse("unjar dir shouldn't exist at test start",
97+
new File(unjarDir, "foobar.txt").exists());
98+
99+
// Unjar only a regex
100+
RunJar.unJar(new File(TEST_ROOT_DIR, TEST_JAR_NAME),
101+
unjarDir,
102+
Pattern.compile(".*baz.*"));
103+
assertFalse("foobar not unpacked",
104+
new File(unjarDir, "foobar.txt").exists());
105+
assertTrue("foobaz unpacked",
106+
new File(unjarDir, "foobaz.txt").exists());
107+
108+
}
109+
}

0 commit comments

Comments
 (0)