Skip to content

Commit ad209c2

Browse files
authored
Merge pull request #46 from tokenmill/handle-auth-for-elasticsearch
Handle auth for elasticsearch
2 parents af2461d + 6548949 commit ad209c2

File tree

2 files changed

+38
-1
lines changed

2 files changed

+38
-1
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ Framework writes its configuration and stores crawled data to ElasticSearch. Bef
2828

2929
Crawling Framework is a Java lib which will have to be extended to run Storm Crawler topology, thus Java (JDK8, Maven) infrastructure will be needed.
3030

31+
### Using password protected ElasticSearch
32+
33+
Some providers hide ElasticSearch under authentification step (Which makes sense). Just set environment variables `ES_USERNAME` and `ES_PASSWORD` accordingly, everything else can remain the same. Authentification step will be done implicitly if proper credentials are there
34+
3135
## Configuring and Running a crawl
3236

3337
See [Crawling Framework Example](https://github.com/tokenmill/crawling-framework-example) project's documentation.

elasticsearch/src/main/java/lt/tokenmill/crawling/es/ElasticConnection.java

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
package lt.tokenmill.crawling.es;
22

33
import org.apache.http.HttpHost;
4+
import org.apache.http.auth.AuthScope;
5+
import org.apache.http.auth.UsernamePasswordCredentials;
6+
import org.apache.http.client.CredentialsProvider;
7+
import org.apache.http.impl.client.BasicCredentialsProvider;
48
import org.elasticsearch.action.DocWriteRequest;
59
import org.elasticsearch.action.bulk.BulkItemResponse;
610
import org.elasticsearch.action.bulk.BulkProcessor;
@@ -38,6 +42,30 @@ private ElasticConnection(BulkProcessor processor, RestHighLevelClient restHighL
3842
this.restClientBuilder = restClient;
3943
}
4044

45+
private static class ESCredentials{
46+
private final String username;
47+
private final String password;
48+
private final CredentialsProvider credentialsProvider;
49+
public ESCredentials(){
50+
this.username = System.getenv("ES_USERNAME");
51+
this.password = System.getenv("ES_PASSWORD");
52+
this.credentialsProvider = new BasicCredentialsProvider();
53+
}
54+
55+
public boolean hasCredentials(){
56+
return this.username != null && this.password != null;
57+
}
58+
59+
public CredentialsProvider getCredentials(){
60+
this.credentialsProvider.setCredentials(
61+
AuthScope.ANY,
62+
new UsernamePasswordCredentials(this.username, this.password)
63+
);
64+
65+
return this.credentialsProvider;
66+
}
67+
}
68+
4169
public static Builder builder() {
4270
return new Builder();
4371
}
@@ -109,8 +137,13 @@ private static ElasticConnection getConnection(String hostname, int restPort, St
109137
System.setProperty("es.set.netty.runtime.available.processors", "false");
110138

111139
TimeValue flushInterval = TimeValue.parseTimeValue(flushIntervalString, TimeValue.timeValueSeconds(5), "flush");
112-
140+
ESCredentials credentials = new ESCredentials();
113141
RestClientBuilder restClient = RestClient.builder(new HttpHost(hostname, restPort, restScheme));
142+
if(credentials.hasCredentials()) {
143+
LOG.info("Found credentials. Applying");
144+
restClient.setHttpClientConfigCallback(b -> b.setDefaultCredentialsProvider(credentials.getCredentials()));
145+
}
146+
114147
RestHighLevelClient restHighLevelClient = new RestHighLevelClient(restClient);
115148

116149
BulkProcessor bulkProcessor = BulkProcessor.builder(restHighLevelClient::bulkAsync, listener)

0 commit comments

Comments
 (0)