Skip to content

Commit b80620b

Browse files
Merge pull request watson-developer-cloud#277 from samir-patel/master
[document-conversion] Adds index document API
2 parents fe13ea5 + a597028 commit b80620b

File tree

2 files changed

+228
-0
lines changed

2 files changed

+228
-0
lines changed

examples/document_conversion.v1.js

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,137 @@ document_conversion.convert({
2222
}
2323
}
2424
}, function (err, response) {
25+
console.log("----------\n");
26+
console.log("convert a single document\n");
27+
console.log("----------\n");
28+
if (err) {
29+
console.error(err);
30+
} else {
31+
console.log(JSON.stringify(response, null, 2));
32+
}
33+
});
34+
35+
// dry run of indexing a single document
36+
document_conversion.index({
37+
file: fs.createReadStream(__dirname + '/resources/sample-docx.docx'),
38+
config: {
39+
retrieve_and_rank: {
40+
dry_run: true
41+
}
42+
}
43+
}, function (err, response) {
44+
console.log("----------\n");
45+
console.log("dry run of indexing a single document\n");
46+
console.log("----------\n");
47+
if (err) {
48+
console.error(err);
49+
} else {
50+
console.log(JSON.stringify(response, null, 2));
51+
}
52+
});
53+
54+
// dry run of indexing only metadata
55+
document_conversion.index({
56+
metadata: {
57+
metadata: [
58+
{ name: 'id', value: '1' },
59+
{ name: 'SomeMetadataName', value: 'SomeMetadataValue' }
60+
]
61+
},
62+
config: {
63+
retrieve_and_rank: {
64+
dry_run: true
65+
}
66+
}
67+
}, function (err, response) {
68+
console.log("----------\n");
69+
console.log("dry run of indexing only metadata\n");
70+
console.log("----------\n");
71+
if (err) {
72+
console.error(err);
73+
} else {
74+
console.log(JSON.stringify(response, null, 2));
75+
}
76+
});
77+
78+
// dry run of indexing a single document with metadata and additional configuration for convert_document and field mapping
79+
document_conversion.index({
80+
file: fs.createReadStream(__dirname + '/resources/example.html'),
81+
metadata: {
82+
metadata: [
83+
{ name: 'id', value: '2' },
84+
{ name: 'Author', value: 'IBM' },
85+
{ name: 'Date Created', value: '2016-03-21' },
86+
{ name: 'Category', value: 'Example' }
87+
]
88+
},
89+
config: {
90+
convert_document: {
91+
normalized_html: {
92+
// Exclude all anchor tags "<a>"
93+
exclude_tags_completely: [ 'a' ]
94+
}
95+
},
96+
retrieve_and_rank: {
97+
dry_run: true,
98+
fields: {
99+
mappings: [
100+
{ from: 'Author', to: 'Created By' },
101+
{ from: 'Date Created', to: 'Created On' }
102+
],
103+
include: [
104+
'Created By',
105+
'Created On'
106+
],
107+
exclude: [
108+
'Category'
109+
]
110+
}
111+
}
112+
}
113+
}, function (err, response) {
114+
console.log("----------\n");
115+
console.log("dry run of indexing a single document with metadata and additional configuration for convert_document and field mappings\n");
116+
console.log("----------\n");
117+
if (err) {
118+
console.error(err);
119+
} else {
120+
console.log(JSON.stringify(response, null, 2));
121+
}
122+
});
123+
124+
// indexing a single document with metadata and additional configuration for convert_document and field mappings
125+
document_conversion.index({
126+
file: fs.createReadStream(__dirname + '/resources/example.html'),
127+
metadata: {
128+
metadata: [
129+
{ name: 'id', value: '3' },
130+
{ name: 'SomeMetadataName', value: 'SomeMetadataValue' }
131+
]
132+
},
133+
config: {
134+
convert_document: {
135+
normalized_html: {
136+
// Exclude all anchor tags "<a>"
137+
exclude_tags_completely: [ 'a' ]
138+
}
139+
},
140+
retrieve_and_rank: {
141+
dry_run: false,
142+
service_instance_id: 'INSERT YOUR RETRIEVE AND RANK SERVICE INSTANCE ID HERE',
143+
cluster_id: 'INSERT YOUR RETRIEVE AND RANK SERVICE SOLR CLUSTER ID HERE',
144+
search_collection: 'INSERT YOUR RETRIEVE AND RANK SERVICE SOLR SEARCH COLLECTION NAME HERE',
145+
fields: {
146+
mappings: [
147+
{ from: 'SomeMetadataName', to: 'Created By' }
148+
]
149+
}
150+
}
151+
}
152+
}, function (err, response) {
153+
console.log("----------\n");
154+
console.log("indexing a single document with metadata and additional configuration for convert_document and field mappings\n");
155+
console.log("----------\n");
25156
if (err) {
26157
console.error(err);
27158
} else {

services/document_conversion/v1.js

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,103 @@ DocumentConversion.prototype.convert = function(params, callback) {
134134
return requestFactory(parameters, callback);
135135
};
136136

137+
/**
138+
* One-off convert and index a document via index_document API
139+
*
140+
* NOTE: A SOLR cluster and search collection will have to be created through the Retrieve and Rank
141+
* service prior to using this API if actual indexing is performed (dry_run=false).
142+
*
143+
* @param {Object} params
144+
* @param {ReadableStream} [params.file] The document file to convert. May be a ReadableStream or Buffer
145+
* @param {Object} params.metadata Metadata array of Object's where each object contains 'name' and 'value'
146+
* @param {Object} params.config Configuration for the conversion and indexing. The conversion config needs
147+
to be in a 'convert_document' object. This can include configuration for 'pdf', 'word'
148+
and 'normalized_html' phases of the conversion process. The indexing config needs to be
149+
in a 'retrieve_and_rank' object. The 'retrieve_and_rank' object has the following fields:
150+
'dry_run' - boolean value, true if a dry run is to be performed, false to actually index,
151+
'service_instance_id' - The serviceGuid of your instance of the retrieve and rank
152+
service (required if dry_run=false), 'cluster_id' - The Solr cluster id for your retrieve
153+
and rank service instance (required if dry_run=false), 'search_collection' - The name of
154+
your Solr search collection from your retrieve and rank service instance (required if
155+
dry_run=false), and 'fields' - Configuration information for field 'mappings', fields
156+
to 'include', and fields to 'exclude' during indexing (exclude takes precedence over include)
157+
* @param {Function} callback
158+
*/
159+
DocumentConversion.prototype.index = function(params, callback) {
160+
params = params || {};
161+
if (!params.file && !params.metadata) {
162+
callback(new Error('Missing required parameters: file or metadata. At least one of those is required.'));
163+
return;
164+
}
165+
if (params.file && !isStream(params.file) && !Buffer.isBuffer(params.file) && !params.file.value) {
166+
callback(new Error('Missing required parameters: file is not a standard Node.js Stream or Buffer'));
167+
return;
168+
}
169+
if (!params.config) {
170+
callback(new Error('Missing required parameters: file or metadata. At least one of those is required.'));
171+
return;
172+
}
173+
174+
var parameters = {
175+
options: {
176+
method: 'POST',
177+
url: '/v1/index_document',
178+
json: true
179+
},
180+
defaultOptions: this._options
181+
};
182+
183+
// send the parameters as formData
184+
if (params.file && params.metadata) {
185+
fixupContentType(params);
186+
parameters.options.formData = {
187+
file: params.file,
188+
config: {
189+
value: JSON.stringify(params.config),
190+
options: {
191+
contentType: 'application/json; charset=utf-8'
192+
}
193+
},
194+
metadata: {
195+
value: JSON.stringify(params.metadata),
196+
options: {
197+
contentType: 'application/json; charset=utf-8'
198+
}
199+
}
200+
};
201+
} else if (params.file) {
202+
fixupContentType(params);
203+
parameters.options.formData = {
204+
file: params.file,
205+
config: {
206+
value: JSON.stringify(params.config),
207+
options: {
208+
contentType: 'application/json; charset=utf-8'
209+
}
210+
}
211+
};
212+
} else if (params.metadata) {
213+
parameters.options.formData = {
214+
config: {
215+
value: JSON.stringify(params.config),
216+
options: {
217+
contentType: 'application/json; charset=utf-8'
218+
}
219+
},
220+
metadata: {
221+
value: JSON.stringify(params.metadata),
222+
options: {
223+
contentType: 'application/json; charset=utf-8'
224+
}
225+
}
226+
};
227+
} else {
228+
callback(new Error('Missing required parameters: file or metadata. At least one of those is required.'));
229+
return;
230+
}
231+
232+
return requestFactory(parameters, callback);
233+
};
137234

138235

139236
// give a clear error message for the deprecated methods

0 commit comments

Comments
 (0)