Add vision ocr for pdf/tiff (GoogleCloudPlatform#1078)

nnegrey · web-flow · commit f87b52b85aca · 2018-04-03T15:00:19.000-07:00
* Add vision ocr for pdf/tiff

* Update samples with latest library

* Move samples into beta directory

* Update project pom

* Update batch size to 2
diff --git a/vision/beta/cloud-client/README.md b/vision/beta/cloud-client/README.md
@@ -88,3 +88,9 @@ mvn exec:java -DDetect -Dexec.args="web-entities-include-geo ./resources/landmar
 ```
 mvn exec:java -DDetect -Dexec.args="crop ./resources/landmark.jpg"
 ```
+
+#### OCR
+```
+mvn exec:java -DDetect -Dexec.args="ocr gs://java-docs-samples-testing/vision/HodgeConj.pdf \
+   gs://<BUCKET_ID>/"
+```
diff --git a/vision/beta/cloud-client/pom.xml b/vision/beta/cloud-client/pom.xml
@@ -40,7 +40,12 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-vision</artifactId>
-      <version>1.22.0</version>
+      <version>1.24.1</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.cloud</groupId>
+      <artifactId>google-cloud-storage</artifactId>
+      <version>1.24.1</version>
     </dependency>
     <!-- [END dependencies] -->
 
diff --git a/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java b/vision/beta/cloud-client/src/main/java/com/example/vision/Detect.java
@@ -16,44 +16,63 @@
 
 package com.example.vision;
 
-import com.google.cloud.vision.v1p1beta1.AnnotateImageRequest;
-import com.google.cloud.vision.v1p1beta1.AnnotateImageResponse;
-import com.google.cloud.vision.v1p1beta1.BatchAnnotateImagesResponse;
-import com.google.cloud.vision.v1p1beta1.Block;
-import com.google.cloud.vision.v1p1beta1.ColorInfo;
-import com.google.cloud.vision.v1p1beta1.CropHint;
-import com.google.cloud.vision.v1p1beta1.CropHintsAnnotation;
-import com.google.cloud.vision.v1p1beta1.DominantColorsAnnotation;
-import com.google.cloud.vision.v1p1beta1.EntityAnnotation;
-import com.google.cloud.vision.v1p1beta1.FaceAnnotation;
-import com.google.cloud.vision.v1p1beta1.Feature;
-import com.google.cloud.vision.v1p1beta1.Feature.Type;
-import com.google.cloud.vision.v1p1beta1.Image;
-import com.google.cloud.vision.v1p1beta1.ImageAnnotatorClient;
-import com.google.cloud.vision.v1p1beta1.ImageContext;
-import com.google.cloud.vision.v1p1beta1.ImageSource;
-import com.google.cloud.vision.v1p1beta1.LocationInfo;
-import com.google.cloud.vision.v1p1beta1.Page;
-import com.google.cloud.vision.v1p1beta1.Paragraph;
-import com.google.cloud.vision.v1p1beta1.SafeSearchAnnotation;
-import com.google.cloud.vision.v1p1beta1.Symbol;
-import com.google.cloud.vision.v1p1beta1.TextAnnotation;
-import com.google.cloud.vision.v1p1beta1.WebDetection;
-import com.google.cloud.vision.v1p1beta1.WebDetection.WebEntity;
-import com.google.cloud.vision.v1p1beta1.WebDetection.WebImage;
-import com.google.cloud.vision.v1p1beta1.WebDetection.WebLabel;
-import com.google.cloud.vision.v1p1beta1.WebDetection.WebPage;
-import com.google.cloud.vision.v1p1beta1.WebDetectionParams;
-import com.google.cloud.vision.v1p1beta1.Word;
-
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Bucket;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.Storage.BlobListOption;
+import com.google.cloud.storage.StorageOptions;
+import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse;
+import com.google.cloud.vision.v1p2beta1.AnnotateFileResponse.Builder;
+import com.google.cloud.vision.v1p2beta1.AnnotateImageRequest;
+import com.google.cloud.vision.v1p2beta1.AnnotateImageResponse;
+import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileRequest;
+import com.google.cloud.vision.v1p2beta1.AsyncAnnotateFileResponse;
+import com.google.cloud.vision.v1p2beta1.AsyncBatchAnnotateFilesResponse;
+import com.google.cloud.vision.v1p2beta1.BatchAnnotateImagesResponse;
+import com.google.cloud.vision.v1p2beta1.Block;
+import com.google.cloud.vision.v1p2beta1.ColorInfo;
+import com.google.cloud.vision.v1p2beta1.CropHint;
+import com.google.cloud.vision.v1p2beta1.CropHintsAnnotation;
+import com.google.cloud.vision.v1p2beta1.DominantColorsAnnotation;
+import com.google.cloud.vision.v1p2beta1.EntityAnnotation;
+import com.google.cloud.vision.v1p2beta1.FaceAnnotation;
+import com.google.cloud.vision.v1p2beta1.Feature;
+import com.google.cloud.vision.v1p2beta1.Feature.Type;
+import com.google.cloud.vision.v1p2beta1.GcsDestination;
+import com.google.cloud.vision.v1p2beta1.GcsSource;
+import com.google.cloud.vision.v1p2beta1.Image;
+import com.google.cloud.vision.v1p2beta1.ImageAnnotatorClient;
+import com.google.cloud.vision.v1p2beta1.ImageContext;
+import com.google.cloud.vision.v1p2beta1.ImageSource;
+import com.google.cloud.vision.v1p2beta1.InputConfig;
+import com.google.cloud.vision.v1p2beta1.LocationInfo;
+import com.google.cloud.vision.v1p2beta1.OperationMetadata;
+import com.google.cloud.vision.v1p2beta1.OutputConfig;
+import com.google.cloud.vision.v1p2beta1.Page;
+import com.google.cloud.vision.v1p2beta1.Paragraph;
+import com.google.cloud.vision.v1p2beta1.SafeSearchAnnotation;
+import com.google.cloud.vision.v1p2beta1.Symbol;
+import com.google.cloud.vision.v1p2beta1.TextAnnotation;
+import com.google.cloud.vision.v1p2beta1.WebDetection;
+import com.google.cloud.vision.v1p2beta1.WebDetection.WebEntity;
+import com.google.cloud.vision.v1p2beta1.WebDetection.WebImage;
+import com.google.cloud.vision.v1p2beta1.WebDetection.WebLabel;
+import com.google.cloud.vision.v1p2beta1.WebDetection.WebPage;
+import com.google.cloud.vision.v1p2beta1.WebDetectionParams;
+import com.google.cloud.vision.v1p2beta1.Word;
 import com.google.protobuf.ByteString;
+import com.google.protobuf.util.JsonFormat;
 
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class Detect {
 
@@ -78,11 +97,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
       out.println("Usage:");
       out.printf(
           "\tmvn exec:java -DDetect -Dexec.args=\"<command> <path-to-image>\"\n"
+              + "\tmvn exec:java -DDetect -Dexec.args=\"ocr <path-to-file> <path-to-destination>\""
+              + "\n"
               + "Commands:\n"
               + "\tfaces | labels | landmarks | logos | text | safe-search | properties"
-              + "| web | web-entities | web-entities-include-geo | crop \n"
+              + "| web | web-entities | web-entities-include-geo | crop | ocr \n"
               + "Path:\n\tA file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
-              + "resource (gs://...)\n");
+              + "resource (gs://...)\n"
+              + "Path to File:\n\tA path to the remote file on Cloud Storage (gs://...)\n"
+              + "Path to Destination\n\tA path to the remote destination on Cloud Storage for the"
+              + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n");
       return;
     }
     String command = args[0];
@@ -162,6 +186,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
       } else {
         detectDocumentText(path, out);
       }
+    } else if (command.equals("ocr")) {
+      String destPath = args.length > 2 ? args[2] : "";
+      detectDocumentsGcs(path, destPath);
     }
   }
 
@@ -1277,4 +1304,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws
     }
   }
   // [END vision_detect_document_uri]
+
+  // [START vision_async_detect_document_ocr]
+  /**
+   * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage.
+   *
+   * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document
+   *                      text on.
+   * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the
+   *                           results on.
+   * @throws Exception on errors while closing the client.
+   */
+  public static void detectDocumentsGcs(String gcsSourcePath, String gcsDestinationPath) throws
+      Exception {
+    try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
+      List<AsyncAnnotateFileRequest> requests = new ArrayList<>();
+
+      // Set the GCS source path for the remote file.
+      GcsSource gcsSource = GcsSource.newBuilder()
+          .setUri(gcsSourcePath)
+          .build();
+
+      // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions)
+      // types
+      InputConfig inputConfig = InputConfig.newBuilder()
+          .setMimeType("application/pdf") // Supported MimeTypes: "application/pdf", "image/tiff"
+          .setGcsSource(gcsSource)
+          .build();
+
+      // Set the GCS destination path for where to save the results.
+      GcsDestination gcsDestination = GcsDestination.newBuilder()
+          .setUri(gcsDestinationPath)
+          .build();
+
+      // Create the configuration for the output with the batch size.
+      // The batch size sets how many pages should be grouped into each json output file.
+      OutputConfig outputConfig = OutputConfig.newBuilder()
+          .setBatchSize(2)
+          .setGcsDestination(gcsDestination)
+          .build();
+
+      // Select the Feature required by the vision API
+      Feature feature = Feature.newBuilder().setType(Feature.Type.DOCUMENT_TEXT_DETECTION).build();
+
+      // Build the OCR request
+      AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest.newBuilder()
+          .addFeatures(feature)
+          .setInputConfig(inputConfig)
+          .setOutputConfig(outputConfig)
+          .build();
+
+      requests.add(request);
+
+      // Perform the OCR request
+      OperationFuture<AsyncBatchAnnotateFilesResponse, OperationMetadata> response =
+          client.asyncBatchAnnotateFilesAsync(requests);
+
+      System.out.println("Waiting for the operation to finish.");
+
+      // Wait for the request to finish. (The result is not used, since the API saves the result to
+      // the specified location on GCS.)
+      List<AsyncAnnotateFileResponse> result = response.get(180, TimeUnit.SECONDS)
+          .getResponsesList();
+
+      // Once the request has completed and the output has been
+      // written to GCS, we can list all the output files.
+      Storage storage = StorageOptions.getDefaultInstance().getService();
+
+      // Get the destination location from the gcsDestinationPath
+      Pattern pattern = Pattern.compile("gs://([^/]+)/(.+)");
+      Matcher matcher = pattern.matcher(gcsDestinationPath);
+
+      if (matcher.find()) {
+        String bucketName = matcher.group(1);
+        String prefix = matcher.group(2);
+
+        // Get the list of objects with the given prefix from the GCS bucket
+        Bucket bucket = storage.get(bucketName);
+        com.google.api.gax.paging.Page<Blob> pageList = bucket.list(BlobListOption.prefix(prefix));
+
+        Blob firstOutputFile = null;
+
+        // List objects with the given prefix.
+        System.out.println("Output files:");
+        for (Blob blob : pageList.iterateAll()) {
+          System.out.println(blob.getName());
+
+          // Process the first output file from GCS.
+          // Since we specified batch size = 2, the first response contains
+          // the first two pages of the input file.
+          if (firstOutputFile == null) {
+            firstOutputFile = blob;
+          }
+        }
+
+        // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse
+        // object. If the Blob is small read all its content in one request
+        // (Note: the file is a .json file)
+        // Storage guide: https://cloud.google.com/storage/docs/downloading-objects
+        String jsonContents = new String(firstOutputFile.getContent());
+        Builder builder = AnnotateFileResponse.newBuilder();
+        JsonFormat.parser().merge(jsonContents, builder);
+
+        // Build the AnnotateFileResponse object
+        AnnotateFileResponse annotateFileResponse = builder.build();
+
+        // Parse through the object to get the actual response for the first page of the input file.
+        AnnotateImageResponse annotateImageResponse = annotateFileResponse.getResponses(0);
+
+        // Here we print the full text from the first page.
+        // The response contains more information:
+        // annotation/pages/blocks/paragraphs/words/symbols
+        // including confidence score and bounding boxes
+        System.out.format("\nText: %s\n", annotateImageResponse.getFullTextAnnotation().getText());
+      } else {
+        System.out.println("No MATCH");
+      }
+    }
+  }
+  // [END vision_async_detect_document_ocr]
 }
diff --git a/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java b/vision/beta/cloud-client/src/test/java/com/example/vision/DetectIT.java
@@ -18,6 +18,11 @@
 
 import static com.google.common.truth.Truth.assertThat;
 
+import com.google.api.gax.paging.Page;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.Storage.BlobListOption;
+import com.google.cloud.storage.StorageOptions;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
@@ -37,6 +42,7 @@ public class DetectIT {
   private Detect app;
   private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
   private static final String BUCKET = PROJECT_ID;
+  private static final  String OUTPUT_PREFIX = "OCR_PDF_TEST_OUTPUT";
 
   @Before
   public void setUp() throws IOException {
@@ -348,4 +354,25 @@ public void testDocumentTextGcs() throws Exception {
     assertThat(got).contains("37%");
     assertThat(got).contains("Word text: class (confidence:");
   }
+
+  @Test
+  public void testDetectDocumentsGcs() throws Exception {
+    // Act
+    String[] args = {"ocr", "gs://" + BUCKET + "/vision/HodgeConj.pdf",
+        "gs://" + BUCKET + "/" + OUTPUT_PREFIX + "/"};
+    Detect.argsHelper(args, out);
+
+    // Assert
+    String got = bout.toString();
+    assertThat(got).contains("HODGE'S GENERAL CONJECTURE");
+
+    Storage storage = StorageOptions.getDefaultInstance().getService();
+
+    Page<Blob> blobs = storage.list(BUCKET, BlobListOption.currentDirectory(),
+        BlobListOption.prefix(OUTPUT_PREFIX + "/"));
+
+    for (Blob blob : blobs.iterateAll()) {
+      blob.delete();
+    }
+  }
 }