16
16
import org .apache .tika .parser .AutoDetectParser ;
17
17
import org .apache .tika .parser .Parser ;
18
18
import org .apache .tika .parser .ParserDecorator ;
19
- import org .elasticsearch .SpecialPermission ;
20
- import org .elasticsearch .bootstrap .FilePermissionUtils ;
21
- import org .elasticsearch .core .PathUtils ;
22
- import org .elasticsearch .core .SuppressForbidden ;
23
- import org .elasticsearch .jdk .JarHell ;
24
19
25
20
import java .io .ByteArrayInputStream ;
26
21
import java .io .IOException ;
27
- import java .io .UncheckedIOException ;
28
- import java .lang .reflect .ReflectPermission ;
29
- import java .net .URISyntaxException ;
30
- import java .net .URL ;
31
- import java .net .URLClassLoader ;
32
- import java .nio .file .Files ;
33
- import java .nio .file .Path ;
34
- import java .security .AccessControlContext ;
35
- import java .security .AccessController ;
36
- import java .security .PermissionCollection ;
37
- import java .security .Permissions ;
38
- import java .security .PrivilegedActionException ;
39
- import java .security .PrivilegedExceptionAction ;
40
- import java .security .ProtectionDomain ;
41
- import java .security .SecurityPermission ;
42
22
import java .util .Arrays ;
43
23
import java .util .HashSet ;
44
- import java .util .LinkedHashSet ;
45
- import java .util .PropertyPermission ;
46
24
import java .util .Set ;
47
25
48
26
/**
@@ -90,24 +68,8 @@ final class TikaImpl {
90
68
* parses with tika, throwing any exception hit while parsing the document
91
69
*/
92
70
static String parse (final byte content [], final Metadata metadata , final int limit ) throws TikaException , IOException {
93
- // check that its not unprivileged code like a script
94
- SpecialPermission .check ();
95
-
96
71
try {
97
- return AccessController .doPrivileged (
98
- (PrivilegedExceptionAction <String >) () -> TIKA_INSTANCE .parseToString (new ByteArrayInputStream (content ), metadata , limit ),
99
- RESTRICTED_CONTEXT
100
- );
101
- } catch (PrivilegedActionException e ) {
102
- // checked exception from tika: unbox it
103
- Throwable cause = e .getCause ();
104
- if (cause instanceof TikaException tikaException ) {
105
- throw tikaException ;
106
- } else if (cause instanceof IOException ioException ) {
107
- throw ioException ;
108
- } else {
109
- throw new AssertionError (cause );
110
- }
72
+ return TIKA_INSTANCE .parseToString (new ByteArrayInputStream (content ), metadata , limit );
111
73
} catch (LinkageError e ) {
112
74
if (e .getMessage ().contains ("bouncycastle" )) {
113
75
/*
@@ -119,76 +81,4 @@ static String parse(final byte content[], final Metadata metadata, final int lim
119
81
throw new RuntimeException (e );
120
82
}
121
83
}
122
-
123
- // apply additional containment for parsers, this is intersected with the current permissions
124
- // its hairy, but worth it so we don't have some XML flaw reading random crap from the FS
125
- private static final AccessControlContext RESTRICTED_CONTEXT = isUsingSecurityManager ()
126
- ? new AccessControlContext (new ProtectionDomain [] { new ProtectionDomain (null , getRestrictedPermissions ()) })
127
- : null ;
128
-
129
- private static boolean isUsingSecurityManager () {
130
- return false ;
131
- }
132
-
133
- // compute some minimal permissions for parsers. they only get r/w access to the java temp directory,
134
- // the ability to load some resources from JARs, and read sysprops
135
- @ SuppressForbidden (reason = "adds access to tmp directory" )
136
- static PermissionCollection getRestrictedPermissions () {
137
- Permissions perms = new Permissions ();
138
-
139
- // property/env access needed for parsing
140
- perms .add (new PropertyPermission ("*" , "read" ));
141
- perms .add (new RuntimePermission ("getenv.TIKA_CONFIG" ));
142
-
143
- try {
144
- // add permissions for resource access:
145
- // classpath
146
- addReadPermissions (perms , JarHell .parseClassPath ());
147
- // plugin jars
148
- if (TikaImpl .class .getClassLoader () instanceof URLClassLoader urlClassLoader ) {
149
- URL [] urls = urlClassLoader .getURLs ();
150
- Set <URL > set = new LinkedHashSet <>(Arrays .asList (urls ));
151
- if (set .size () != urls .length ) {
152
- throw new AssertionError ("duplicate jars: " + Arrays .toString (urls ));
153
- }
154
- addReadPermissions (perms , set );
155
- }
156
- // jvm's java.io.tmpdir (needs read/write)
157
- FilePermissionUtils .addDirectoryPath (
158
- perms ,
159
- "java.io.tmpdir" ,
160
- PathUtils .get (System .getProperty ("java.io.tmpdir" )),
161
- "read,readlink,write,delete" ,
162
- false
163
- );
164
- } catch (IOException e ) {
165
- throw new UncheckedIOException (e );
166
- }
167
- // current hacks needed for POI/PDFbox issues:
168
- perms .add (new SecurityPermission ("putProviderProperty.BC" ));
169
- perms .add (new SecurityPermission ("insertProvider" ));
170
- perms .add (new ReflectPermission ("suppressAccessChecks" ));
171
- perms .add (new RuntimePermission ("accessClassInPackage.sun.java2d.cmm.kcms" ));
172
- // xmlbeans, use by POI, needs to get the context classloader
173
- perms .add (new RuntimePermission ("getClassLoader" ));
174
- perms .setReadOnly ();
175
- return perms ;
176
- }
177
-
178
- // add resources to (what is typically) a jar, but might not be (e.g. in tests/IDE)
179
- @ SuppressForbidden (reason = "adds access to jar resources" )
180
- static void addReadPermissions (Permissions perms , Set <URL > resources ) throws IOException {
181
- try {
182
- for (URL url : resources ) {
183
- Path path = PathUtils .get (url .toURI ());
184
- if (Files .isDirectory (path )) {
185
- FilePermissionUtils .addDirectoryPath (perms , "class.path" , path , "read,readlink" , false );
186
- } else {
187
- FilePermissionUtils .addSingleFilePath (perms , path , "read,readlink" );
188
- }
189
- }
190
- } catch (URISyntaxException bogus ) {
191
- throw new RuntimeException (bogus );
192
- }
193
- }
194
84
}
0 commit comments