@@ -158,27 +158,69 @@ const rebuildData = async ({
158
158
159
159
const deleteVectorIdList = mongoData . indexes . map ( ( index ) => index . dataId ) ;
160
160
161
- const { tokens } = await mongoSessionRun ( async ( session ) => {
162
- // update vector, update dataset.data rebuilding status, delete data from training
163
- const updateResult = await Promise . all (
164
- mongoData . indexes . map ( async ( index , i ) => {
165
- const result = await insertDatasetDataVector ( {
166
- query : index . text ,
167
- model : getVectorModel ( trainingData . model ) ,
168
- teamId : mongoData . teamId ,
169
- datasetId : mongoData . datasetId ,
170
- collectionId : mongoData . collectionId
171
- } ) ;
172
- mongoData . indexes [ i ] . dataId = result . insertId ;
173
- return result ;
174
- } )
175
- ) ;
161
+ // Find next rebuilding data to insert training queue
162
+ await mongoSessionRun ( async ( session ) => {
163
+ // get new mongoData insert to training
164
+ const newRebuildingData = await MongoDatasetData . findOneAndUpdate (
165
+ {
166
+ teamId : mongoData . teamId ,
167
+ datasetId : mongoData . datasetId ,
168
+ rebuilding : true
169
+ } ,
170
+ {
171
+ $unset : {
172
+ rebuilding : null
173
+ } ,
174
+ updateTime : new Date ( )
175
+ } ,
176
+ { session }
177
+ ) . select ( {
178
+ _id : 1 ,
179
+ collectionId : 1
180
+ } ) ;
176
181
177
- // Ensure that the training data is deleted after the Mongo update is successful
182
+ if ( newRebuildingData ) {
183
+ await MongoDatasetTraining . create (
184
+ [
185
+ {
186
+ teamId : mongoData . teamId ,
187
+ tmbId : trainingData . tmbId ,
188
+ datasetId : mongoData . datasetId ,
189
+ collectionId : newRebuildingData . collectionId ,
190
+ billId : trainingData . billId ,
191
+ mode : TrainingModeEnum . chunk ,
192
+ model : trainingData . model ,
193
+ q : '1' ,
194
+ dataId : newRebuildingData . _id
195
+ }
196
+ ] ,
197
+ { session }
198
+ ) ;
199
+ }
200
+ } ) ;
201
+
202
+ // update vector, update dataset_data rebuilding status, delete data from training
203
+ // 1. Insert new vector to dataset_data
204
+ const updateResult = await Promise . all (
205
+ mongoData . indexes . map ( async ( index , i ) => {
206
+ const result = await insertDatasetDataVector ( {
207
+ query : index . text ,
208
+ model : getVectorModel ( trainingData . model ) ,
209
+ teamId : mongoData . teamId ,
210
+ datasetId : mongoData . datasetId ,
211
+ collectionId : mongoData . collectionId
212
+ } ) ;
213
+ mongoData . indexes [ i ] . dataId = result . insertId ;
214
+ return result ;
215
+ } )
216
+ ) ;
217
+ const { tokens } = await mongoSessionRun ( async ( session ) => {
218
+ // 2. Ensure that the training data is deleted after the Mongo update is successful
178
219
await mongoData . save ( { session } ) ;
220
+ // 3. Delete the training data
179
221
await trainingData . deleteOne ( { session } ) ;
180
222
181
- // delete old vector
223
+ // 4. Delete old vector
182
224
await deleteDatasetDataVector ( {
183
225
teamId : mongoData . teamId ,
184
226
idList : deleteVectorIdList
@@ -189,59 +231,6 @@ const rebuildData = async ({
189
231
} ;
190
232
} ) ;
191
233
192
- // find next data insert to training queue
193
- const arr = new Array ( 5 ) . fill ( 0 ) ;
194
-
195
- for await ( const _ of arr ) {
196
- try {
197
- const hasNextData = await mongoSessionRun ( async ( session ) => {
198
- // get new mongoData insert to training
199
- const newRebuildingData = await MongoDatasetData . findOneAndUpdate (
200
- {
201
- teamId : mongoData . teamId ,
202
- datasetId : mongoData . datasetId ,
203
- rebuilding : true
204
- } ,
205
- {
206
- $unset : {
207
- rebuilding : null
208
- } ,
209
- updateTime : new Date ( )
210
- } ,
211
- { session }
212
- ) . select ( {
213
- _id : 1 ,
214
- collectionId : 1
215
- } ) ;
216
-
217
- if ( newRebuildingData ) {
218
- await MongoDatasetTraining . create (
219
- [
220
- {
221
- teamId : mongoData . teamId ,
222
- tmbId : trainingData . tmbId ,
223
- datasetId : mongoData . datasetId ,
224
- collectionId : newRebuildingData . collectionId ,
225
- billId : trainingData . billId ,
226
- mode : TrainingModeEnum . chunk ,
227
- model : trainingData . model ,
228
- q : '1' ,
229
- dataId : newRebuildingData . _id
230
- }
231
- ] ,
232
- { session }
233
- ) ;
234
- }
235
-
236
- return ! ! newRebuildingData ;
237
- } ) ;
238
-
239
- if ( ! hasNextData ) {
240
- break ;
241
- }
242
- } catch ( error ) { }
243
- }
244
-
245
234
return { tokens } ;
246
235
} ;
247
236
0 commit comments