Skip to content

Commit f87b52b

Browse files
authored
Add vision ocr for pdf/tiff (GoogleCloudPlatform#1078)
* Add vision ocr for pdf/tiff * Update samples with latest library * Move samples into beta directory * Update project pom * Update batch size to 2
1 parent c4262fc commit f87b52b

File tree

4 files changed

+217
-33
lines changed

4 files changed

+217
-33
lines changed

vision/beta/cloud-client/README.md

+6
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,9 @@ mvn exec:java -DDetect -Dexec.args="web-entities-include-geo ./resources/landmar
8888
```
8989
mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg"
9090
```
91+
92+
#### OCR
93+
```
94+
mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \
95+
gs://<BUCKET_ID>/"
96+
```

vision/beta/cloud-client/pom.xml

+6-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,12 @@
4040
<dependency>
4141
<groupId>com.google.cloud</groupId>
4242
<artifactId>google-cloud-vision</artifactId>
43-
<version>1.22.0</version>
43+
<version>1.24.1</version>
44+
</dependency>
45+
<dependency>
46+
<groupId>com.google.cloud</groupId>
47+
<artifactId>google-cloud-storage</artifactId>
48+
<version>1.24.1</version>
4449
</dependency>
4550
<!-- [END dependencies] -->
4651

vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java

+178-32
Original file line numberDiff line numberDiff line change
@@ -16,44 +16,63 @@
1616

1717
package com.example.vision;
1818

19-
import com.google.cloud.vision.v1p1beta1.AnnotateImageRequest;
20-
import com.google.cloud.vision.v1p1beta1.AnnotateImageResponse;
21-
import com.google.cloud.vision.v1p1beta1.BatchAnnotateImagesResponse;
22-
import com.google.cloud.vision.v1p1beta1.Block;
23-
import com.google.cloud.vision.v1p1beta1.ColorInfo;
24-
import com.google.cloud.vision.v1p1beta1.CropHint;
25-
import com.google.cloud.vision.v1p1beta1.CropHintsAnnotation;
26-
import com.google.cloud.vision.v1p1beta1.DominantColorsAnnotation;
27-
import com.google.cloud.vision.v1p1beta1.EntityAnnotation;
28-
import com.google.cloud.vision.v1p1beta1.FaceAnnotation;
29-
import com.google.cloud.vision.v1p1beta1.Feature;
30-
import com.google.cloud.vision.v1p1beta1.Feature.Type;
31-
import com.google.cloud.vision.v1p1beta1.Image;
32-
import com.google.cloud.vision.v1p1beta1.ImageAnnotatorClient;
33-
import com.google.cloud.vision.v1p1beta1.ImageContext;
34-
import com.google.cloud.vision.v1p1beta1.ImageSource;
35-
import com.google.cloud.vision.v1p1beta1.LocationInfo;
36-
import com.google.cloud.vision.v1p1beta1.Page;
37-
import com.google.cloud.vision.v1p1beta1.Paragraph;
38-
import com.google.cloud.vision.v1p1beta1.SafeSearchAnnotation;
39-
import com.google.cloud.vision.v1p1beta1.Symbol;
40-
import com.google.cloud.vision.v1p1beta1.TextAnnotation;
41-
import com.google.cloud.vision.v1p1beta1.WebDetection;
42-
import com.google.cloud.vision.v1p1beta1.WebDetection.WebEntity;
43-
import com.google.cloud.vision.v1p1beta1.WebDetection.WebImage;
44-
import com.google.cloud.vision.v1p1beta1.WebDetection.WebLabel;
45-
import com.google.cloud.vision.v1p1beta1.WebDetection.WebPage;
46-
import com.google.cloud.vision.v1p1beta1.WebDetectionParams;
47-
import com.google.cloud.vision.v1p1beta1.Word;
48-
19+
import com.google.api.gax.longrunning.OperationFuture;
20+
import com.google.cloud.storage.Blob;
21+
import com.google.cloud.storage.Bucket;
22+
import com.google.cloud.storage.Storage;
23+
import com.google.cloud.storage.Storage.BlobListOption;
24+
import com.google.cloud.storage.StorageOptions;
25+
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse;
26+
import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder;
27+
import com.google.cloud.vision.v1p2beta1.AnnotateImageRequest;
28+
import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse;
29+
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest;
30+
import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse;
31+
import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse;
32+
import com.google.cloud.vision.v1p2beta1.BatchAnnotateImagesResponse;
33+
import com.google.cloud.vision.v1p2beta1.Block;
34+
import com.google.cloud.vision.v1p2beta1.ColorInfo;
35+
import com.google.cloud.vision.v1p2beta1.CropHint;
36+
import com.google.cloud.vision.v1p2beta1.CropHintsAnnotation;
37+
import com.google.cloud.vision.v1p2beta1.DominantColorsAnnotation;
38+
import com.google.cloud.vision.v1p2beta1.EntityAnnotation;
39+
import com.google.cloud.vision.v1p2beta1.FaceAnnotation;
40+
import com.google.cloud.vision.v1p2beta1.Feature;
41+
import com.google.cloud.vision.v1p2beta1.Feature.Type;
42+
import com.google.cloud.vision.v1p2beta1.GcsDestination;
43+
import com.google.cloud.vision.v1p2beta1.GcsSource;
44+
import com.google.cloud.vision.v1p2beta1.Image;
45+
import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient;
46+
import com.google.cloud.vision.v1p2beta1.ImageContext;
47+
import com.google.cloud.vision.v1p2beta1.ImageSource;
48+
import com.google.cloud.vision.v1p2beta1.InputConfig;
49+
import com.google.cloud.vision.v1p2beta1.LocationInfo;
50+
import com.google.cloud.vision.v1p2beta1.OperationMetadata;
51+
import com.google.cloud.vision.v1p2beta1.OutputConfig;
52+
import com.google.cloud.vision.v1p2beta1.Page;
53+
import com.google.cloud.vision.v1p2beta1.Paragraph;
54+
import com.google.cloud.vision.v1p2beta1.SafeSearchAnnotation;
55+
import com.google.cloud.vision.v1p2beta1.Symbol;
56+
import com.google.cloud.vision.v1p2beta1.TextAnnotation;
57+
import com.google.cloud.vision.v1p2beta1.WebDetection;
58+
import com.google.cloud.vision.v1p2beta1.WebDetection.WebEntity;
59+
import com.google.cloud.vision.v1p2beta1.WebDetection.WebImage;
60+
import com.google.cloud.vision.v1p2beta1.WebDetection.WebLabel;
61+
import com.google.cloud.vision.v1p2beta1.WebDetection.WebPage;
62+
import com.google.cloud.vision.v1p2beta1.WebDetectionParams;
63+
import com.google.cloud.vision.v1p2beta1.Word;
4964
import com.google.protobuf.ByteString;
65+
import com.google.protobuf.util.JsonFormat;
5066

5167
import java.io.FileInputStream;
5268
import java.io.IOException;
5369
import java.io.PrintStream;
5470
import java.util.ArrayList;
5571
import java.util.Arrays;
5672
import java.util.List;
73+
import java.util.concurrent.TimeUnit;
74+
import java.util.regex.Matcher;
75+
import java.util.regex.Pattern;
5776

5877
public class Detect {
5978

@@ -78,11 +97,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
7897
out.println("Usage:");
7998
out.printf(
8099
"\tmvn exec:java -DDetect -Dexec.args=\"<command> <path-to-image>\"\n"
100+
+ "\tmvn exec:java -DDetect -Dexec.args=\"ocr <path-to-file> <path-to-destination>\""
101+
+ "\n"
81102
+ "Commands:\n"
82103
+ "\tfaces | labels | landmarks | logos | text | safe-search | properties"
83-
+ "| web | web-entities | web-entities-include-geo | crop \n"
104+
+ "| web | web-entities | web-entities-include-geo | crop | ocr \n"
84105
+ "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
85-
+ "resource (gs://...)\n");
106+
+ "resource (gs://...)\n"
107+
+ "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n"
108+
+ "Path to Destination\n\tA path to the remote destination on Cloud Storage for the"
109+
+ " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n");
86110
return;
87111
}
88112
String command = args[0];
@@ -162,6 +186,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
162186
} else {
163187
detectDocumentText(path, out);
164188
}
189+
} else if (command.equals("ocr")) {
190+
String destPath = args.length > 2 ? args[2] : "";
191+
detectDocumentsGcs(path, destPath);
165192
}
166193
}
167194

@@ -1277,4 +1304,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws
12771304
}
12781305
}
12791306
// [END vision_detect_document_uri]
1307+
1308+
// [START vision_async_detect_document_ocr]
1309+
/**
1310+
* Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage.
1311+
*
1312+
* @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document
1313+
* text on.
1314+
* @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the
1315+
* results on.
1316+
* @throws Exception on errors while closing the client.
1317+
*/
1318+
public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws
1319+
Exception {
1320+
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
1321+
List<AsyncAnnotateFileRequest> requests = new ArrayList<>();
1322+
1323+
// Set the GCS source path for the remote file.
1324+
GcsSource gcsSource = GcsSource.newBuilder()
1325+
.setUri(gcsSourcePath)
1326+
.build();
1327+
1328+
// Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions)
1329+
// types
1330+
InputConfig inputConfig = InputConfig.newBuilder()
1331+
.setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff"
1332+
.setGcsSource(gcsSource)
1333+
.build();
1334+
1335+
// Set the GCS destination path for where to save the results.
1336+
GcsDestination gcsDestination = GcsDestination.newBuilder()
1337+
.setUri(gcsDestinationPath)
1338+
.build();
1339+
1340+
// Create the configuration for the output with the batch size.
1341+
// The batch size sets how many pages should be grouped into each json output file.
1342+
OutputConfig outputConfig = OutputConfig.newBuilder()
1343+
.setBatchSize(2)
1344+
.setGcsDestination(gcsDestination)
1345+
.build();
1346+
1347+
// Select the Feature required by the vision API
1348+
Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
1349+
1350+
// Build the OCR request
1351+
AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder()
1352+
.addFeatures(feature)
1353+
.setInputConfig(inputConfig)
1354+
.setOutputConfig(outputConfig)
1355+
.build();
1356+
1357+
requests.add(request);
1358+
1359+
// Perform the OCR request
1360+
OperationFuture<AsyncBatchAnnotateFilesResponse, OperationMetadata> response =
1361+
client.asyncBatchAnnotateFilesAsync(requests);
1362+
1363+
System.out.println("Waiting for the operation to finish.");
1364+
1365+
// Wait for the request to finish. (The result is not used, since the API saves the result to
1366+
// the specified location on GCS.)
1367+
List<AsyncAnnotateFileResponse> result = response.get(180, TimeUnit.SECONDS)
1368+
.getResponsesList();
1369+
1370+
// Once the request has completed and the output has been
1371+
// written to GCS, we can list all the output files.
1372+
Storage storage = StorageOptions.getDefaultInstance().getService();
1373+
1374+
// Get the destination location from the gcsDestinationPath
1375+
Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)");
1376+
Matcher matcher = pattern.matcher(gcsDestinationPath);
1377+
1378+
if (matcher.find()) {
1379+
String bucketName = matcher.group(1);
1380+
String prefix = matcher.group(2);
1381+
1382+
// Get the list of objects with the given prefix from the GCS bucket
1383+
Bucket bucket = storage.get(bucketName);
1384+
com.google.api.gax.paging.Page<Blob> pageList = bucket.list(BlobListOption.prefix(prefix));
1385+
1386+
Blob firstOutputFile = null;
1387+
1388+
// List objects with the given prefix.
1389+
System.out.println("Output files:");
1390+
for (Blob blob : pageList.iterateAll()) {
1391+
System.out.println(blob.getName());
1392+
1393+
// Process the first output file from GCS.
1394+
// Since we specified batch size = 2, the first response contains
1395+
// the first two pages of the input file.
1396+
if (firstOutputFile == null) {
1397+
firstOutputFile = blob;
1398+
}
1399+
}
1400+
1401+
// Get the contents of the file and convert the JSON contents to an AnnotateFileResponse
1402+
// object. If the Blob is small read all its content in one request
1403+
// (Note: the file is a .json file)
1404+
// Storage guide: https://cloud.google.com/storage/docs/downloading-objects
1405+
String jsonContents = new String(firstOutputFile.getContent());
1406+
Builder builder = AnnotateFileResponse.newBuilder();
1407+
JsonFormat.parser().merge(jsonContents, builder);
1408+
1409+
// Build the AnnotateFileResponse object
1410+
AnnotateFileResponse annotateFileResponse = builder.build();
1411+
1412+
// Parse through the object to get the actual response for the first page of the input file.
1413+
AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0);
1414+
1415+
// Here we print the full text from the first page.
1416+
// The response contains more information:
1417+
// annotation/pages/blocks/paragraphs/words/symbols
1418+
// including confidence score and bounding boxes
1419+
System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText());
1420+
} else {
1421+
System.out.println("No MATCH");
1422+
}
1423+
}
1424+
}
1425+
// [END vision_async_detect_document_ocr]
12801426
}

vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java

+27
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818

1919
import static com.google.common.truth.Truth.assertThat;
2020

21+
import com.google.api.gax.paging.Page;
22+
import com.google.cloud.storage.Blob;
23+
import com.google.cloud.storage.Storage;
24+
import com.google.cloud.storage.Storage.BlobListOption;
25+
import com.google.cloud.storage.StorageOptions;
2126
import java.io.ByteArrayOutputStream;
2227
import java.io.IOException;
2328
import java.io.PrintStream;
@@ -37,6 +42,7 @@ public class DetectIT {
3742
private Detect app;
3843
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
3944
private static final String BUCKET = PROJECT_ID;
45+
private static final String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT";
4046

4147
@Before
4248
public void setUp() throws IOException {
@@ -348,4 +354,25 @@ public void testDocumentTextGcs() throws Exception {
348354
assertThat(got).contains("37%");
349355
assertThat(got).contains("Word text: class (confidence:");
350356
}
357+
358+
@Test
359+
public void testDetectDocumentsGcs() throws Exception {
360+
// Act
361+
String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf",
362+
"gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"};
363+
Detect.argsHelper(args, out);
364+
365+
// Assert
366+
String got = bout.toString();
367+
assertThat(got).contains("HODGE'S GENERAL CONJECTURE");
368+
369+
Storage storage = StorageOptions.getDefaultInstance().getService();
370+
371+
Page<Blob> blobs = storage.list(BUCKET, BlobListOption.currentDirectory(),
372+
BlobListOption.prefix(OUTPUT_PREFIX + "/"));
373+
374+
for (Blob blob : blobs.iterateAll()) {
375+
blob.delete();
376+
}
377+
}
351378
}

0 commit comments

Comments
 (0)