16
16
17
17
package com .example .vision ;
18
18
19
- import com .google .cloud .vision .v1p1beta1 .AnnotateImageRequest ;
20
- import com .google .cloud .vision .v1p1beta1 .AnnotateImageResponse ;
21
- import com .google .cloud .vision .v1p1beta1 .BatchAnnotateImagesResponse ;
22
- import com .google .cloud .vision .v1p1beta1 .Block ;
23
- import com .google .cloud .vision .v1p1beta1 .ColorInfo ;
24
- import com .google .cloud .vision .v1p1beta1 .CropHint ;
25
- import com .google .cloud .vision .v1p1beta1 .CropHintsAnnotation ;
26
- import com .google .cloud .vision .v1p1beta1 .DominantColorsAnnotation ;
27
- import com .google .cloud .vision .v1p1beta1 .EntityAnnotation ;
28
- import com .google .cloud .vision .v1p1beta1 .FaceAnnotation ;
29
- import com .google .cloud .vision .v1p1beta1 .Feature ;
30
- import com .google .cloud .vision .v1p1beta1 .Feature .Type ;
31
- import com .google .cloud .vision .v1p1beta1 .Image ;
32
- import com .google .cloud .vision .v1p1beta1 .ImageAnnotatorClient ;
33
- import com .google .cloud .vision .v1p1beta1 .ImageContext ;
34
- import com .google .cloud .vision .v1p1beta1 .ImageSource ;
35
- import com .google .cloud .vision .v1p1beta1 .LocationInfo ;
36
- import com .google .cloud .vision .v1p1beta1 .Page ;
37
- import com .google .cloud .vision .v1p1beta1 .Paragraph ;
38
- import com .google .cloud .vision .v1p1beta1 .SafeSearchAnnotation ;
39
- import com .google .cloud .vision .v1p1beta1 .Symbol ;
40
- import com .google .cloud .vision .v1p1beta1 .TextAnnotation ;
41
- import com .google .cloud .vision .v1p1beta1 .WebDetection ;
42
- import com .google .cloud .vision .v1p1beta1 .WebDetection .WebEntity ;
43
- import com .google .cloud .vision .v1p1beta1 .WebDetection .WebImage ;
44
- import com .google .cloud .vision .v1p1beta1 .WebDetection .WebLabel ;
45
- import com .google .cloud .vision .v1p1beta1 .WebDetection .WebPage ;
46
- import com .google .cloud .vision .v1p1beta1 .WebDetectionParams ;
47
- import com .google .cloud .vision .v1p1beta1 .Word ;
48
-
19
+ import com .google .api .gax .longrunning .OperationFuture ;
20
+ import com .google .cloud .storage .Blob ;
21
+ import com .google .cloud .storage .Bucket ;
22
+ import com .google .cloud .storage .Storage ;
23
+ import com .google .cloud .storage .Storage .BlobListOption ;
24
+ import com .google .cloud .storage .StorageOptions ;
25
+ import com .google .cloud .vision .v1p2beta1 .AnnotateFileResponse ;
26
+ import com .google .cloud .vision .v1p2beta1 .AnnotateFileResponse .Builder ;
27
+ import com .google .cloud .vision .v1p2beta1 .AnnotateImageRequest ;
28
+ import com .google .cloud .vision .v1p2beta1 .AnnotateImageResponse ;
29
+ import com .google .cloud .vision .v1p2beta1 .AsyncAnnotateFileRequest ;
30
+ import com .google .cloud .vision .v1p2beta1 .AsyncAnnotateFileResponse ;
31
+ import com .google .cloud .vision .v1p2beta1 .AsyncBatchAnnotateFilesResponse ;
32
+ import com .google .cloud .vision .v1p2beta1 .BatchAnnotateImagesResponse ;
33
+ import com .google .cloud .vision .v1p2beta1 .Block ;
34
+ import com .google .cloud .vision .v1p2beta1 .ColorInfo ;
35
+ import com .google .cloud .vision .v1p2beta1 .CropHint ;
36
+ import com .google .cloud .vision .v1p2beta1 .CropHintsAnnotation ;
37
+ import com .google .cloud .vision .v1p2beta1 .DominantColorsAnnotation ;
38
+ import com .google .cloud .vision .v1p2beta1 .EntityAnnotation ;
39
+ import com .google .cloud .vision .v1p2beta1 .FaceAnnotation ;
40
+ import com .google .cloud .vision .v1p2beta1 .Feature ;
41
+ import com .google .cloud .vision .v1p2beta1 .Feature .Type ;
42
+ import com .google .cloud .vision .v1p2beta1 .GcsDestination ;
43
+ import com .google .cloud .vision .v1p2beta1 .GcsSource ;
44
+ import com .google .cloud .vision .v1p2beta1 .Image ;
45
+ import com .google .cloud .vision .v1p2beta1 .ImageAnnotatorClient ;
46
+ import com .google .cloud .vision .v1p2beta1 .ImageContext ;
47
+ import com .google .cloud .vision .v1p2beta1 .ImageSource ;
48
+ import com .google .cloud .vision .v1p2beta1 .InputConfig ;
49
+ import com .google .cloud .vision .v1p2beta1 .LocationInfo ;
50
+ import com .google .cloud .vision .v1p2beta1 .OperationMetadata ;
51
+ import com .google .cloud .vision .v1p2beta1 .OutputConfig ;
52
+ import com .google .cloud .vision .v1p2beta1 .Page ;
53
+ import com .google .cloud .vision .v1p2beta1 .Paragraph ;
54
+ import com .google .cloud .vision .v1p2beta1 .SafeSearchAnnotation ;
55
+ import com .google .cloud .vision .v1p2beta1 .Symbol ;
56
+ import com .google .cloud .vision .v1p2beta1 .TextAnnotation ;
57
+ import com .google .cloud .vision .v1p2beta1 .WebDetection ;
58
+ import com .google .cloud .vision .v1p2beta1 .WebDetection .WebEntity ;
59
+ import com .google .cloud .vision .v1p2beta1 .WebDetection .WebImage ;
60
+ import com .google .cloud .vision .v1p2beta1 .WebDetection .WebLabel ;
61
+ import com .google .cloud .vision .v1p2beta1 .WebDetection .WebPage ;
62
+ import com .google .cloud .vision .v1p2beta1 .WebDetectionParams ;
63
+ import com .google .cloud .vision .v1p2beta1 .Word ;
49
64
import com .google .protobuf .ByteString ;
65
+ import com .google .protobuf .util .JsonFormat ;
50
66
51
67
import java .io .FileInputStream ;
52
68
import java .io .IOException ;
53
69
import java .io .PrintStream ;
54
70
import java .util .ArrayList ;
55
71
import java .util .Arrays ;
56
72
import java .util .List ;
73
+ import java .util .concurrent .TimeUnit ;
74
+ import java .util .regex .Matcher ;
75
+ import java .util .regex .Pattern ;
57
76
58
77
public class Detect {
59
78
@@ -78,11 +97,16 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
78
97
out .println ("Usage:" );
79
98
out .printf (
80
99
"\t mvn exec:java -DDetect -Dexec.args=\" <command> <path-to-image>\" \n "
100
+ + "\t mvn exec:java -DDetect -Dexec.args=\" ocr <path-to-file> <path-to-destination>\" "
101
+ + "\n "
81
102
+ "Commands:\n "
82
103
+ "\t faces | labels | landmarks | logos | text | safe-search | properties"
83
- + "| web | web-entities | web-entities-include-geo | crop \n "
104
+ + "| web | web-entities | web-entities-include-geo | crop | ocr \n "
84
105
+ "Path:\n \t A file path (ex: ./resources/wakeupcat.jpg) or a URI for a Cloud Storage "
85
- + "resource (gs://...)\n " );
106
+ + "resource (gs://...)\n "
107
+ + "Path to File:\n \t A path to the remote file on Cloud Storage (gs://...)\n "
108
+ + "Path to Destination\n \t A path to the remote destination on Cloud Storage for the"
109
+ + " file to be saved. (gs://BUCKET_NAME/PREFIX/)\n " );
86
110
return ;
87
111
}
88
112
String command = args [0 ];
@@ -162,6 +186,9 @@ public static void argsHelper(String[] args, PrintStream out) throws Exception,
162
186
} else {
163
187
detectDocumentText (path , out );
164
188
}
189
+ } else if (command .equals ("ocr" )) {
190
+ String destPath = args .length > 2 ? args [2 ] : "" ;
191
+ detectDocumentsGcs (path , destPath );
165
192
}
166
193
}
167
194
@@ -1277,4 +1304,123 @@ public static void detectDocumentTextGcs(String gcsPath, PrintStream out) throws
1277
1304
}
1278
1305
}
1279
1306
// [END vision_detect_document_uri]
1307
+
1308
+ // [START vision_async_detect_document_ocr]
1309
+ /**
1310
+ * Performs document text OCR with PDF/TIFF as source files on Google Cloud Storage.
1311
+ *
1312
+ * @param gcsSourcePath The path to the remote file on Google Cloud Storage to detect document
1313
+ * text on.
1314
+ * @param gcsDestinationPath The path to the remote file on Google Cloud Storage to store the
1315
+ * results on.
1316
+ * @throws Exception on errors while closing the client.
1317
+ */
1318
+ public static void detectDocumentsGcs (String gcsSourcePath , String gcsDestinationPath ) throws
1319
+ Exception {
1320
+ try (ImageAnnotatorClient client = ImageAnnotatorClient .create ()) {
1321
+ List <AsyncAnnotateFileRequest > requests = new ArrayList <>();
1322
+
1323
+ // Set the GCS source path for the remote file.
1324
+ GcsSource gcsSource = GcsSource .newBuilder ()
1325
+ .setUri (gcsSourcePath )
1326
+ .build ();
1327
+
1328
+ // Create the configuration with the specified MIME (Multipurpose Internet Mail Extensions)
1329
+ // types
1330
+ InputConfig inputConfig = InputConfig .newBuilder ()
1331
+ .setMimeType ("application/pdf" ) // Supported MimeTypes: "application/pdf", "image/tiff"
1332
+ .setGcsSource (gcsSource )
1333
+ .build ();
1334
+
1335
+ // Set the GCS destination path for where to save the results.
1336
+ GcsDestination gcsDestination = GcsDestination .newBuilder ()
1337
+ .setUri (gcsDestinationPath )
1338
+ .build ();
1339
+
1340
+ // Create the configuration for the output with the batch size.
1341
+ // The batch size sets how many pages should be grouped into each json output file.
1342
+ OutputConfig outputConfig = OutputConfig .newBuilder ()
1343
+ .setBatchSize (2 )
1344
+ .setGcsDestination (gcsDestination )
1345
+ .build ();
1346
+
1347
+ // Select the Feature required by the vision API
1348
+ Feature feature = Feature .newBuilder ().setType (Feature .Type .DOCUMENT_TEXT_DETECTION ).build ();
1349
+
1350
+ // Build the OCR request
1351
+ AsyncAnnotateFileRequest request = AsyncAnnotateFileRequest .newBuilder ()
1352
+ .addFeatures (feature )
1353
+ .setInputConfig (inputConfig )
1354
+ .setOutputConfig (outputConfig )
1355
+ .build ();
1356
+
1357
+ requests .add (request );
1358
+
1359
+ // Perform the OCR request
1360
+ OperationFuture <AsyncBatchAnnotateFilesResponse , OperationMetadata > response =
1361
+ client .asyncBatchAnnotateFilesAsync (requests );
1362
+
1363
+ System .out .println ("Waiting for the operation to finish." );
1364
+
1365
+ // Wait for the request to finish. (The result is not used, since the API saves the result to
1366
+ // the specified location on GCS.)
1367
+ List <AsyncAnnotateFileResponse > result = response .get (180 , TimeUnit .SECONDS )
1368
+ .getResponsesList ();
1369
+
1370
+ // Once the request has completed and the output has been
1371
+ // written to GCS, we can list all the output files.
1372
+ Storage storage = StorageOptions .getDefaultInstance ().getService ();
1373
+
1374
+ // Get the destination location from the gcsDestinationPath
1375
+ Pattern pattern = Pattern .compile ("gs://([^/]+)/(.+)" );
1376
+ Matcher matcher = pattern .matcher (gcsDestinationPath );
1377
+
1378
+ if (matcher .find ()) {
1379
+ String bucketName = matcher .group (1 );
1380
+ String prefix = matcher .group (2 );
1381
+
1382
+ // Get the list of objects with the given prefix from the GCS bucket
1383
+ Bucket bucket = storage .get (bucketName );
1384
+ com .google .api .gax .paging .Page <Blob > pageList = bucket .list (BlobListOption .prefix (prefix ));
1385
+
1386
+ Blob firstOutputFile = null ;
1387
+
1388
+ // List objects with the given prefix.
1389
+ System .out .println ("Output files:" );
1390
+ for (Blob blob : pageList .iterateAll ()) {
1391
+ System .out .println (blob .getName ());
1392
+
1393
+ // Process the first output file from GCS.
1394
+ // Since we specified batch size = 2, the first response contains
1395
+ // the first two pages of the input file.
1396
+ if (firstOutputFile == null ) {
1397
+ firstOutputFile = blob ;
1398
+ }
1399
+ }
1400
+
1401
+ // Get the contents of the file and convert the JSON contents to an AnnotateFileResponse
1402
+ // object. If the Blob is small read all its content in one request
1403
+ // (Note: the file is a .json file)
1404
+ // Storage guide: https://cloud.google.com/storage/docs/downloading-objects
1405
+ String jsonContents = new String (firstOutputFile .getContent ());
1406
+ Builder builder = AnnotateFileResponse .newBuilder ();
1407
+ JsonFormat .parser ().merge (jsonContents , builder );
1408
+
1409
+ // Build the AnnotateFileResponse object
1410
+ AnnotateFileResponse annotateFileResponse = builder .build ();
1411
+
1412
+ // Parse through the object to get the actual response for the first page of the input file.
1413
+ AnnotateImageResponse annotateImageResponse = annotateFileResponse .getResponses (0 );
1414
+
1415
+ // Here we print the full text from the first page.
1416
+ // The response contains more information:
1417
+ // annotation/pages/blocks/paragraphs/words/symbols
1418
+ // including confidence score and bounding boxes
1419
+ System .out .format ("\n Text: %s\n " , annotateImageResponse .getFullTextAnnotation ().getText ());
1420
+ } else {
1421
+ System .out .println ("No MATCH" );
1422
+ }
1423
+ }
1424
+ }
1425
+ // [END vision_async_detect_document_ocr]
1280
1426
}
0 commit comments