Skip to content

Beta feature : Vision : PDF/TIFF/GIF document feature detection #1349

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 20, 2019
Prev Previous commit
Next Next commit
Update after review comments
  • Loading branch information
nirupa-kumar committed Mar 18, 2019
commit c68ab2673413cba6377e6c4d0c444c27247310a0
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package com.example.vision;

// [START vision_detect_async_batch_annotate_images_beta]
// [START vision_async_batch_annotate_images_beta]

import com.google.api.core.ApiFuture;
import com.google.api.gax.paging.Page;
Expand Down Expand Up @@ -47,16 +47,14 @@

public class AsyncBatchAnnotateImagesGcs {

/**
* Performs asynchronous batch annotation of images on Google Cloud Storage
* @param gcsSourcePath The path to the remote file on Google Cloud Storage
* @param gcsDestinationPath The path to the annotated image file
* @throws Exception on errors
*/
// Performs asynchronous batch annotation of images on Google Cloud Storage

public static void asyncBatchAnnotateImagesGcs(String gcsSourcePath, String gcsDestinationPath)
throws Exception {

// String gcsSourcePath = "gs://YOUR_BUCKET_ID/path_to_your_data";
// String gcsDestinationPath = "gs://YOUR_BUCKET_ID/path_to_store_annotation";

try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
List<AnnotateImageRequest> requests = new ArrayList<>();
System.out.println("source: " + gcsSourcePath);
Expand Down Expand Up @@ -158,4 +156,4 @@ public static void asyncBatchAnnotateImagesGcs(String gcsSourcePath, String gcsD
}
}

// [END vision_detect_async_batch_annotate_images_beta]
// [END vision_async_batch_annotate_images_beta]
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package com.example.vision;

// [START vision_detect_batch_annotate_files_beta]
// [START vision_batch_annotate_files_beta]

import com.google.api.core.ApiFuture;
import com.google.cloud.vision.v1p4beta1.AnnotateFileRequest;
Expand All @@ -40,28 +40,20 @@
import java.util.Arrays;
import java.util.List;


public class DetectBatchAnnotateFiles {

/**
* Performs document feature detection on a local PDF/TIFF/GIF file.
* While your PDF file may have several pages, this API can process up to 5 pages only.
* @param filePath The local file path e.g. "path/to/file.pdf"
* @throws Exception on errors
*/
// Performs document feature detection on a local PDF/TIFF/GIF file.

public static void detectBatchAnnotateFiles(String filePath) {
// String filePath = "path/to/your_file";

// Initialize the client that will be used to send requests. This client only needs to be
// created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
//Annotate the first two pages and the last one (max 5 pages)
//First page starts at 1, and not 0. Last page is -1.
List<Integer> pages = Arrays.asList(1,2,-1);
// Annotate the first two pages and the last one (max 5 pages)
// First page starts at 1, and not 0. Last page is -1.
List<Integer> pages = Arrays.asList(1, 2, -1);
ByteString pdfBytes = ByteString.readFrom(new FileInputStream(filePath));
Feature feat = Feature.newBuilder().setType(Type.DOCUMENT_TEXT_DETECTION).build();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add empty line below

//Other supported mime types : 'image/tiff' or 'image/gif'
// Other supported mime types : 'image/tiff' or 'image/gif'
InputConfig inputConfig =
InputConfig.newBuilder().setMimeType("application/pdf").setContent(pdfBytes).build();
AnnotateFileRequest request =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add empty line above

Expand All @@ -77,8 +69,8 @@ public static void detectBatchAnnotateFiles(String filePath) {
ApiFuture<BatchAnnotateFilesResponse> future =
client.batchAnnotateFilesCallable().futureCall(batchAnnotateFilesRequest);
BatchAnnotateFilesResponse response = future.get();
client.close();
//Getting the first response

// Getting the first response
AnnotateFileResponse annotateFileResponse = response.getResponses(0);
// For full list of available annotations, see http://g.co/cloud/vision/docs
TextAnnotation textAnnotation = annotateFileResponse.getResponses(0).getFullTextAnnotation();
Expand Down Expand Up @@ -115,4 +107,4 @@ public static void detectBatchAnnotateFiles(String filePath) {
}
}
}
// [END vision_detect_batch_annotate_files_beta]
// [END vision_batch_annotate_files_beta]
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package com.example.vision;

// [START vision_fulltext_detection_pdf_gcs_beta]
// [START vision_batch_annotate_files_gcs_beta]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: empty line

import com.google.api.core.ApiFuture;
import com.google.cloud.vision.v1p4beta1.AnnotateFileRequest;
Expand All @@ -41,19 +41,11 @@

public class DetectBatchAnnotateFilesGcs {

/**
* Performs document feature detection on a remote PDF/TIFF/GIF file on Google Cloud Storage.
* While your PDF file may have several pages, this API can process up to 5 pages only.
*
* @param gcsPath The path to the remote file on Google Cloud Storage
* @throws Exception on errors
*/
public static void detectDocumentFeaturesGcs(String gcsPath) {
// Performs document feature detection on a remote PDF/TIFF/GIF file on Google Cloud Storage.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: empty line

public static void detectBatchAnnotateFilesGcs(String gcsPath) {
// String gcsPath = "gs://Your_BUCKET_ID/path_to_your_data";

// Initialize the client that will be used to send requests. This client only needs to be
// created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (ImageAnnotatorClient client = ImageAnnotatorClient.create()) {
// Annotate the first two pages and the last one (max 5 pages)
// First page starts at 1, and not 0. Last page is -1.
Expand Down Expand Up @@ -114,4 +106,4 @@ public static void detectDocumentFeaturesGcs(String gcsPath) {
}
}
}
// [END vision_fulltext_detection_pdf_gcs_beta]
// [END vision_batch_annotate_files_gcs_beta]
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public void testDetectDocumentFeatures() {
@Test
public void testDetectDocumentFeaturesGcs() throws Exception {
// Act
DetectBatchAnnotateFilesGcs.detectDocumentFeaturesGcs(
DetectBatchAnnotateFilesGcs.detectBatchAnnotateFilesGcs(
"gs://cloud-samples-data/video/kafka.pdf");

// Assert
Expand Down