Skip to content

Commit ebfce87

Browse files
committed
HSearch+ES/Wikipedia demo: introduce Hibernate Search (embedded Lucene)
1 parent c8116f0 commit ebfce87

File tree

7 files changed

+157
-12
lines changed

7 files changed

+157
-12
lines changed

hibernate-search/hsearch-elasticsearch-wikipedia/pom.xml

+6
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
<apt.version>1.1.3</apt.version>
2727
<!-- Override the version of Hibernate ORM in Spring Boot (5.1 or something as I'm writing this) -->
2828
<hibernate.version>5.2.10.Final</hibernate.version>
29+
<hibernate.search.version>5.8.0.Beta1</hibernate.search.version>
2930
</properties>
3031

3132
<dependencies>
@@ -59,6 +60,11 @@
5960
<groupId>com.zaxxer</groupId>
6061
<artifactId>HikariCP</artifactId>
6162
</dependency>
63+
<dependency>
64+
<groupId>org.hibernate</groupId>
65+
<artifactId>hibernate-search-orm</artifactId>
66+
<version>${hibernate.search.version}</version>
67+
</dependency>
6268
</dependencies>
6369

6470
<build>

hibernate-search/hsearch-elasticsearch-wikipedia/src/main/java/org/hibernate/search/demos/wikipedia/data/Page.java

+47
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,53 @@
99
import javax.persistence.ManyToOne;
1010
import javax.persistence.SequenceGenerator;
1111

12+
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
13+
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
14+
import org.apache.lucene.analysis.core.StopFilterFactory;
15+
import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
16+
import org.apache.lucene.analysis.en.PorterStemFilterFactory;
17+
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
1218
import org.hibernate.annotations.Type;
19+
import org.hibernate.search.annotations.Analyzer;
20+
import org.hibernate.search.annotations.AnalyzerDef;
21+
import org.hibernate.search.annotations.CharFilterDef;
22+
import org.hibernate.search.annotations.Field;
23+
import org.hibernate.search.annotations.Indexed;
24+
import org.hibernate.search.annotations.IndexedEmbedded;
25+
import org.hibernate.search.annotations.TokenFilterDef;
26+
import org.hibernate.search.annotations.TokenizerDef;
1327

1428
@Entity
29+
@Indexed(index = "page")
30+
/*
31+
* Note: analyzer definitions are globally available,
32+
* you don't need to repeat them for each single entity.
33+
*/
34+
@AnalyzerDef(
35+
name = "cleaned_text",
36+
charFilters = {
37+
@CharFilterDef(
38+
factory = HTMLStripCharFilterFactory.class
39+
)
40+
},
41+
tokenizer = @TokenizerDef(
42+
factory = WhitespaceTokenizerFactory.class
43+
),
44+
filters = {
45+
@TokenFilterDef(
46+
factory = ASCIIFoldingFilterFactory.class
47+
),
48+
@TokenFilterDef(
49+
factory = LowerCaseFilterFactory.class
50+
),
51+
@TokenFilterDef(
52+
factory = StopFilterFactory.class
53+
),
54+
@TokenFilterDef(
55+
factory = PorterStemFilterFactory.class
56+
)
57+
}
58+
)
1559
public class Page {
1660

1761
@Id
@@ -20,13 +64,16 @@ public class Page {
2064
private Long id;
2165

2266
@Basic(optional = false)
67+
@Field(analyzer = @Analyzer(definition = "cleaned_text"))
2368
private String title;
2469

2570
@Basic(optional = false)
2671
@Type(type = "text")
72+
@Field(analyzer = @Analyzer(definition = "cleaned_text"))
2773
private String content;
2874

2975
@ManyToOne
76+
@IndexedEmbedded
3077
private User lastContributor;
3178

3279
public Long getId() {

hibernate-search/hsearch-elasticsearch-wikipedia/src/main/java/org/hibernate/search/demos/wikipedia/data/User.java

+26
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,33 @@
88
import javax.persistence.SequenceGenerator;
99
import javax.persistence.Table;
1010

11+
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
12+
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
13+
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
14+
import org.hibernate.search.annotations.Analyzer;
15+
import org.hibernate.search.annotations.AnalyzerDef;
16+
import org.hibernate.search.annotations.Field;
17+
import org.hibernate.search.annotations.Indexed;
18+
import org.hibernate.search.annotations.TokenFilterDef;
19+
import org.hibernate.search.annotations.TokenizerDef;
20+
1121
@Entity
1222
@Table(name = "user_") // "user" is an SQL keyword
23+
@Indexed(index = "user")
24+
@AnalyzerDef(
25+
name = "cleaned_keyword",
26+
tokenizer = @TokenizerDef(
27+
factory = KeywordTokenizerFactory.class
28+
),
29+
filters = {
30+
@TokenFilterDef(
31+
factory = ASCIIFoldingFilterFactory.class
32+
),
33+
@TokenFilterDef(
34+
factory = LowerCaseFilterFactory.class
35+
)
36+
}
37+
)
1338
public class User {
1439

1540
@Id
@@ -18,6 +43,7 @@ public class User {
1843
private Long id;
1944

2045
@Basic(optional = false)
46+
@Field(analyzer = @Analyzer(definition = "cleaned_keyword"))
2147
private String username;
2248

2349
public Long getId() {

hibernate-search/hsearch-elasticsearch-wikipedia/src/main/java/org/hibernate/search/demos/wikipedia/data/dao/hibernate/HibernatePageDaoImpl.java

+25-11
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
package org.hibernate.search.demos.wikipedia.data.dao.hibernate;
22

3+
import org.apache.lucene.search.Query;
4+
import org.apache.lucene.search.Sort;
35
import org.hibernate.search.demos.wikipedia.data.Page;
4-
import org.hibernate.search.demos.wikipedia.data.QPage;
56
import org.hibernate.search.demos.wikipedia.data.dao.PageDao;
67
import org.hibernate.search.demos.wikipedia.util.SearchResult;
8+
import org.hibernate.search.jpa.FullTextEntityManager;
9+
import org.hibernate.search.jpa.FullTextQuery;
10+
import org.hibernate.search.jpa.Search;
11+
import org.hibernate.search.query.dsl.QueryBuilder;
712
import org.springframework.stereotype.Repository;
813

9-
import com.querydsl.jpa.impl.JPAQuery;
10-
1114

1215
@Repository
1316
public class HibernatePageDaoImpl extends AbstractHibernateDao implements PageDao {
@@ -33,15 +36,26 @@ public Page getById(Long id) {
3336
}
3437

3538
@Override
39+
@SuppressWarnings("unchecked")
3640
public SearchResult<Page> search(String term, int offset, int limit) {
37-
JPAQuery<Page> query = query()
38-
.select( QPage.page )
39-
.from( QPage.page )
40-
.where( QPage.page.title.likeIgnoreCase( "%" + term + "%" )
41-
.or( QPage.page.content.likeIgnoreCase( "%" + term + "%" ) ) )
42-
.offset( offset )
43-
.limit( limit );
44-
return new SearchResult<>( query.fetchCount(), query.fetch() );
41+
FullTextEntityManager fullTextEm = Search.getFullTextEntityManager( getEm() );
42+
QueryBuilder queryBuilder = fullTextEm.getSearchFactory().buildQueryBuilder()
43+
.forEntity( Page.class ).get();
44+
45+
Query luceneQuery = queryBuilder.keyword()
46+
.onField( "title" ).boostedTo( 2.0f )
47+
.andField( "content" )
48+
.matching( term )
49+
.createQuery();
50+
51+
Sort scoreSort = queryBuilder.sort().byScore().createSort();
52+
53+
FullTextQuery query = fullTextEm.createFullTextQuery( luceneQuery, Page.class )
54+
.setFirstResult( offset )
55+
.setMaxResults( limit )
56+
.setSort( scoreSort );
57+
58+
return new SearchResult<>( query.getResultSize(), query.getResultList() );
4559
}
4660

4761
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package org.hibernate.search.demos.wikipedia.endpoint;
2+
3+
import javax.persistence.EntityManager;
4+
import javax.persistence.PersistenceContext;
5+
import javax.ws.rs.POST;
6+
import javax.ws.rs.Path;
7+
import javax.ws.rs.core.Response;
8+
9+
import org.hibernate.CacheMode;
10+
import org.hibernate.search.demos.wikipedia.data.Page;
11+
import org.hibernate.search.demos.wikipedia.data.User;
12+
import org.hibernate.search.jpa.FullTextEntityManager;
13+
import org.hibernate.search.jpa.Search;
14+
import org.springframework.stereotype.Service;
15+
import org.springframework.transaction.annotation.Transactional;
16+
17+
@Service
18+
@Path("/admin")
19+
@Transactional(readOnly = true)
20+
public class AdminEndpoint {
21+
22+
@PersistenceContext
23+
private EntityManager em;
24+
25+
@POST
26+
@Path("/reindex")
27+
public Response reindex() {
28+
FullTextEntityManager fullTextEm = Search.getFullTextEntityManager( em );
29+
fullTextEm.createIndexer( Page.class, User.class )
30+
.purgeAllOnStart( true )
31+
.typesToIndexInParallel( 2 )
32+
.batchSizeToLoadObjects( 25 )
33+
.idFetchSize( 150 )
34+
.threadsToLoadObjects( 10 )
35+
.cacheMode( CacheMode.IGNORE ) // Cache is likely to do more harm than good in our case (very few relations)
36+
.start();
37+
return Response.accepted().build();
38+
}
39+
40+
}

hibernate-search/hsearch-elasticsearch-wikipedia/src/main/java/org/hibernate/search/demos/wikipedia/endpoint/config/JerseyConfig.java

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.hibernate.search.demos.wikipedia.endpoint.config;
22

33
import org.glassfish.jersey.server.ResourceConfig;
4+
import org.hibernate.search.demos.wikipedia.endpoint.AdminEndpoint;
45
import org.hibernate.search.demos.wikipedia.endpoint.PageEndpoint;
56
import org.springframework.stereotype.Component;
67

@@ -13,5 +14,6 @@ public JerseyConfig() {
1314

1415
private void registerEndpoints() {
1516
register( PageEndpoint.class );
17+
register( AdminEndpoint.class );
1618
}
1719
}

hibernate-search/hsearch-elasticsearch-wikipedia/src/main/resources/application.yaml

+11-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
# See https://docs.spring.io/spring-boot/docs/current/reference/html/common-application-properties.html
33
# for a list of available properties
44

5+
app:
6+
data.path: /home/${user}/tmp/hsearch-elasticsearch-wikipedia
7+
58
spring.datasource:
69
type: com.zaxxer.hikari.HikariDataSource
710
url: jdbc:postgresql://localhost:5432/hsearch_es_wikipedia
@@ -13,11 +16,18 @@ spring.datasource:
1316
spring.jpa.properties:
1417
hibernate:
1518
dialect: org.hibernate.dialect.PostgreSQL95Dialect
19+
hibernate.search:
20+
lucene_version: LUCENE_5_5_4
21+
default:
22+
directory_provider: filesystem
23+
indexBase: ${app.data.path}/lucene/
1624

1725
spring.jackson:
1826
serialization:
1927
INDENT_OUTPUT: true
2028
default-property-inclusion: NON_NULL
2129

2230
logging.level:
23-
org.hibernate.SQL: DEBUG
31+
org.hibernate.SQL: DEBUG
32+
org.hibernate.search.batchindexing: INFO
33+
org.hibernate.search.fulltext_query: DEBUG

0 commit comments

Comments
 (0)