10
10
11
11
import java .time .Instant ;
12
12
import java .util .Arrays ;
13
+ import java .util .HashMap ;
13
14
import java .util .List ;
14
15
import java .util .Map ;
15
16
import java .util .stream .Collectors ;
16
17
17
18
import org .opensearch .ExceptionsHelper ;
18
19
import org .opensearch .OpenSearchStatusException ;
19
20
import org .opensearch .action .ActionRequest ;
20
- import org .opensearch .action .bulk .BulkRequest ;
21
+ import org .opensearch .action .bulk .BulkItemResponse ;
21
22
import org .opensearch .action .bulk .BulkResponse ;
22
23
import org .opensearch .action .search .SearchRequest ;
23
24
import org .opensearch .action .search .SearchResponse ;
24
25
import org .opensearch .action .support .ActionFilters ;
25
26
import org .opensearch .action .support .HandledTransportAction ;
26
27
import org .opensearch .action .support .WriteRequest ;
27
- import org .opensearch .action .update .UpdateRequest ;
28
28
import org .opensearch .client .Client ;
29
29
import org .opensearch .cluster .service .ClusterService ;
30
30
import org .opensearch .common .inject .Inject ;
56
56
import org .opensearch .ml .task .MLTaskManager ;
57
57
import org .opensearch .ml .utils .RestActionUtils ;
58
58
import org .opensearch .ml .utils .TenantAwareHelper ;
59
+ import org .opensearch .remote .metadata .client .BulkDataObjectRequest ;
59
60
import org .opensearch .remote .metadata .client .SdkClient ;
60
61
import org .opensearch .remote .metadata .client .SearchDataObjectRequest ;
62
+ import org .opensearch .remote .metadata .client .UpdateDataObjectRequest ;
61
63
import org .opensearch .remote .metadata .common .SdkClientUtils ;
62
64
import org .opensearch .search .SearchHit ;
63
65
import org .opensearch .search .builder .SearchSourceBuilder ;
66
68
import org .opensearch .transport .TransportService ;
67
69
68
70
import com .google .common .annotations .VisibleForTesting ;
69
- import com .google .common .collect .ImmutableMap ;
70
71
71
72
import lombok .extern .log4j .Log4j2 ;
72
73
@@ -213,7 +214,13 @@ private void undeployModels(
213
214
return modelCacheMissForModelIds ;
214
215
});
215
216
if (response .getNodes ().isEmpty () || modelNotFoundInNodesCache ) {
216
- bulkSetModelIndexToUndeploy (modelIds , listener , response );
217
+ log
218
+ .warn (
219
+ "Model undeployment fallback: No active nodes found for models {}."
220
+ + " Proceeding with manual index update to UNDEPLOY state." ,
221
+ Arrays .toString (modelIds )
222
+ );
223
+ bulkSetModelIndexToUndeploy (modelIds , tenantId , listener , response );
217
224
return ;
218
225
}
219
226
listener .onResponse (new MLUndeployModelsResponse (response ));
@@ -222,34 +229,39 @@ private void undeployModels(
222
229
223
230
private void bulkSetModelIndexToUndeploy (
224
231
String [] modelIds ,
232
+ String tenantId ,
225
233
ActionListener <MLUndeployModelsResponse > listener ,
226
- MLUndeployModelNodesResponse response
234
+ MLUndeployModelNodesResponse mlUndeployModelNodesResponse
227
235
) {
228
- BulkRequest bulkUpdateRequest = new BulkRequest ();
236
+ BulkDataObjectRequest bulkRequest = BulkDataObjectRequest .builder ().globalIndex (ML_MODEL_INDEX ).build ();
237
+
229
238
for (String modelId : modelIds ) {
230
- UpdateRequest updateRequest = new UpdateRequest ();
231
239
232
- ImmutableMap .Builder <String , Object > builder = ImmutableMap .builder ();
233
- builder .put (MLModel .MODEL_STATE_FIELD , MLModelState .UNDEPLOYED .name ());
240
+ Map <String , Object > updateDocument = new HashMap <>();
234
241
235
- builder .put (MLModel .PLANNING_WORKER_NODES_FIELD , List .of ());
236
- builder .put (MLModel .PLANNING_WORKER_NODE_COUNT_FIELD , 0 );
242
+ updateDocument .put (MLModel .MODEL_STATE_FIELD , MLModelState .UNDEPLOYED .name ());
243
+ updateDocument .put (MLModel .PLANNING_WORKER_NODES_FIELD , List .of ());
244
+ updateDocument .put (MLModel .PLANNING_WORKER_NODE_COUNT_FIELD , 0 );
245
+ updateDocument .put (MLModel .LAST_UPDATED_TIME_FIELD , Instant .now ().toEpochMilli ());
246
+ updateDocument .put (MLModel .CURRENT_WORKER_NODE_COUNT_FIELD , 0 );
237
247
238
- builder .put (MLModel .LAST_UPDATED_TIME_FIELD , Instant .now ().toEpochMilli ());
239
- builder .put (MLModel .CURRENT_WORKER_NODE_COUNT_FIELD , 0 );
240
- updateRequest .index (ML_MODEL_INDEX ).id (modelId ).doc (builder .build ());
241
- bulkUpdateRequest .add (updateRequest );
248
+ UpdateDataObjectRequest updateRequest = UpdateDataObjectRequest
249
+ .builder ()
250
+ .id (modelId )
251
+ .tenantId (tenantId )
252
+ .dataObject (updateDocument )
253
+ .build ();
254
+ bulkRequest .add (updateRequest ).setRefreshPolicy (WriteRequest .RefreshPolicy .IMMEDIATE );
242
255
}
243
256
244
- bulkUpdateRequest .setRefreshPolicy (WriteRequest .RefreshPolicy .IMMEDIATE );
245
257
log .info ("No nodes running these models: {}" , Arrays .toString (modelIds ));
246
258
247
259
try (ThreadContext .StoredContext threadContext = client .threadPool ().getThreadContext ().stashContext ()) {
248
260
ActionListener <MLUndeployModelsResponse > listenerWithContextRestoration = ActionListener
249
261
.runBefore (listener , () -> threadContext .restore ());
262
+
250
263
ActionListener <BulkResponse > bulkResponseListener = ActionListener .wrap (br -> {
251
- log .debug ("Successfully set the following modelId(s) to UNDEPLOY in index: {}" , Arrays .toString (modelIds ));
252
- listenerWithContextRestoration .onResponse (new MLUndeployModelsResponse (response ));
264
+ listenerWithContextRestoration .onResponse (new MLUndeployModelsResponse (mlUndeployModelNodesResponse ));
253
265
}, e -> {
254
266
String modelsNotFoundMessage = String
255
267
.format ("Failed to set the following modelId(s) to UNDEPLOY in index: %s" , Arrays .toString (modelIds ));
@@ -262,7 +274,40 @@ private void bulkSetModelIndexToUndeploy(
262
274
listenerWithContextRestoration .onFailure (exception );
263
275
});
264
276
265
- client .bulk (bulkUpdateRequest , bulkResponseListener );
277
+ sdkClient .bulkDataObjectAsync (bulkRequest ).whenComplete ((response , exception ) -> {
278
+ if (exception != null ) {
279
+ Exception cause = SdkClientUtils .unwrapAndConvertToException (exception , OpenSearchStatusException .class );
280
+ bulkResponseListener .onFailure (cause );
281
+ return ;
282
+ }
283
+
284
+ try {
285
+ BulkResponse bulkResponse = BulkResponse .fromXContent (response .parser ());
286
+ log
287
+ .info (
288
+ "Executed {} bulk operations with {} failures, Took: {}" ,
289
+ bulkResponse .getItems ().length ,
290
+ bulkResponse .hasFailures ()
291
+ ? Arrays .stream (bulkResponse .getItems ()).filter (BulkItemResponse ::isFailed ).count ()
292
+ : 0 ,
293
+ bulkResponse .getTook ()
294
+ );
295
+ List <String > unemployedModelIds = Arrays
296
+ .stream (bulkResponse .getItems ())
297
+ .filter (bulkItemResponse -> !bulkItemResponse .isFailed ())
298
+ .map (BulkItemResponse ::getId )
299
+ .collect (Collectors .toList ());
300
+ log
301
+ .debug (
302
+ "Successfully set the following modelId(s) to UNDEPLOY in index: {}" ,
303
+ Arrays .toString (unemployedModelIds .toArray ())
304
+ );
305
+
306
+ bulkResponseListener .onResponse (bulkResponse );
307
+ } catch (Exception e ) {
308
+ bulkResponseListener .onFailure (e );
309
+ }
310
+ });
266
311
} catch (Exception e ) {
267
312
log .error ("Unexpected error while setting the following modelId(s) to UNDEPLOY in index: {}" , Arrays .toString (modelIds ), e );
268
313
listener .onFailure (e );
0 commit comments