"use strict"; var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault"); Object.defineProperty(exports, "__esModule", { value: true }); exports.MemoryUsageService = void 0; var _numeral = _interopRequireDefault(require("@elastic/numeral")); var _lodash = require("lodash"); var _mlIsDefined = require("@kbn/ml-is-defined"); /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ // @ts-expect-error numeral missing value const AD_EXTRA_MEMORY = (0, _numeral.default)('10MB').value(); // @ts-expect-error numeral missing value const DFA_EXTRA_MEMORY = (0, _numeral.default)('5MB').value(); const NODE_FIELDS = ['attributes', 'name', 'roles']; class MemoryUsageService { constructor(mlClient) { this.mlClient = mlClient; } async getMemorySizes(itemType, node, showClosedJobs = false) { let memories = []; switch (itemType) { case 'anomaly-detector': memories = await this.getADJobsSizes(); break; case 'data-frame-analytics': memories = await this.getDFAJobsSizes(); break; case 'trained-model': memories = await this.getTrainedModelsSizes(); break; default: memories = [...(await this.getADJobsSizes()), ...(await this.getDFAJobsSizes()), ...(await this.getTrainedModelsSizes())]; break; } return memories.filter(m => nodeFilter(m, node, showClosedJobs)); } async getADJobsSizes() { const jobs = await this.mlClient.getJobStats(); return jobs.jobs.map(this.getADJobMemorySize); } async getTrainedModelsSizes() { const [models, stats] = await Promise.all([this.mlClient.getTrainedModels(), this.mlClient.getTrainedModelsStats()]); const statsMap = stats.trained_model_stats.reduce((acc, cur) => { acc[cur.model_id] = cur; return acc; }, {}); return models.trained_model_configs.map(m => this.getTrainedModelMemorySize(m, statsMap[m.model_id])); } async getDFAJobsSizes() { const [jobs, jobsStats] = await Promise.all([this.mlClient.getDataFrameAnalytics(), this.mlClient.getDataFrameAnalyticsStats()]); const statsMap = jobsStats.data_frame_analytics.reduce((acc, cur) => { acc[cur.id] = cur; return acc; }, {}); return jobs.data_frame_analytics.map(j => this.getDFAJobMemorySize(j, statsMap[j.id])); } getADJobMemorySize(jobStats) { var _ref, _ref2, _jobStats$node; let memory = 0; switch (jobStats.model_size_stats.assignment_memory_basis) { case 'model_memory_limit': memory = (_ref = jobStats.model_size_stats.model_bytes_memory_limit) !== null && _ref !== void 0 ? _ref : 0; break; case 'current_model_bytes': memory = jobStats.model_size_stats.model_bytes; break; case 'peak_model_bytes': memory = (_ref2 = jobStats.model_size_stats.peak_model_bytes) !== null && _ref2 !== void 0 ? _ref2 : 0; break; } const size = memory + AD_EXTRA_MEMORY; const nodeName = (_jobStats$node = jobStats.node) === null || _jobStats$node === void 0 ? void 0 : _jobStats$node.name; return { id: jobStats.job_id, type: 'anomaly-detector', size, nodeNames: nodeName ? [nodeName] : [] }; } getDFAJobMemorySize(job, jobStats) { var _job$model_memory_lim, _jobStats$node2; const mml = (_job$model_memory_lim = job.model_memory_limit) !== null && _job$model_memory_lim !== void 0 ? _job$model_memory_lim : '0mb'; // @ts-expect-error numeral missing value const memory = (0, _numeral.default)(mml.toUpperCase()).value(); const size = memory + DFA_EXTRA_MEMORY; const nodeName = (_jobStats$node2 = jobStats.node) === null || _jobStats$node2 === void 0 ? void 0 : _jobStats$node2.name; return { id: jobStats.id, type: 'data-frame-analytics', size, nodeNames: nodeName ? [nodeName] : [] }; } getTrainedModelMemorySize(trainedModel, trainedModelStats) { var _trainedModelStats$de, _trainedModelStats$de2; const memory = trainedModelStats.model_size_stats.required_native_memory_bytes; const size = memory + AD_EXTRA_MEMORY; const nodes = (_trainedModelStats$de = (_trainedModelStats$de2 = trainedModelStats.deployment_stats) === null || _trainedModelStats$de2 === void 0 ? void 0 : _trainedModelStats$de2.nodes) !== null && _trainedModelStats$de !== void 0 ? _trainedModelStats$de : []; return { id: trainedModelStats.model_id, type: 'trained-model', size, nodeNames: nodes.map(n => Object.values(n.node)[0].name) }; } /** * Provides the ML nodes overview with allocated models. */ async getNodesOverview() { // TODO set node_id to ml:true when elasticsearch client is updated. const response = await this.mlClient.getMemoryStats(); const { trained_model_stats: trainedModelStats } = await this.mlClient.getTrainedModelsStats({ size: 10000 }); const mlNodes = Object.entries(response.nodes).filter(([, node]) => node.roles.includes('ml')); const nodeDeploymentStatsResponses = mlNodes.map(([nodeId, node]) => { const nodeFields = (0, _lodash.pick)(node, NODE_FIELDS); nodeFields.attributes = nodeFields.attributes; const allocatedModels = trainedModelStats.filter(d => (0, _mlIsDefined.isDefined)(d.deployment_stats) && (0, _mlIsDefined.isDefined)(d.deployment_stats.nodes) && d.deployment_stats.nodes.some(n => Object.keys(n.node)[0] === nodeId)).map(d => { const modelSizeState = d.model_size_stats; const deploymentStats = d.deployment_stats; if (!deploymentStats || !modelSizeState) { throw new Error('deploymentStats or modelSizeState not defined'); } const { nodes, ...rest } = deploymentStats; const { node: tempNode, ...nodeRest } = nodes.find(v => Object.keys(v.node)[0] === nodeId); return { model_id: d.model_id, ...rest, ...modelSizeState, node: nodeRest, key: `${rest.deployment_id}_${node.name}` }; }); const modelsMemoryUsage = allocatedModels.map(v => { return { model_id: v.model_id, model_size: v.required_native_memory_bytes }; }); const memoryRes = { adTotalMemory: node.mem.ml.anomaly_detectors_in_bytes, dfaTotalMemory: node.mem.ml.data_frame_analytics_in_bytes, trainedModelsTotalMemory: node.mem.ml.native_inference_in_bytes }; for (const key of Object.keys(memoryRes)) { if (memoryRes[key] > 0) { /** * The amount of memory needed to load the ML native code shared libraries. The assumption is that the first * ML job to run on a given node will do this, and then subsequent ML jobs on the same node will reuse the * same already-loaded code. */ memoryRes[key] += node.mem.ml.native_code_overhead_in_bytes; break; } } return { id: nodeId, ...nodeFields, allocated_models: allocatedModels, memory_overview: { machine_memory: { total: node.mem.adjusted_total_in_bytes, jvm: node.jvm.heap_max_in_bytes }, anomaly_detection: { total: memoryRes.adTotalMemory }, dfa_training: { total: memoryRes.dfaTotalMemory }, trained_models: { total: memoryRes.trainedModelsTotalMemory, by_model: modelsMemoryUsage }, ml_max_in_bytes: node.mem.ml.max_in_bytes } }; }); return { // TODO preserve _nodes from the response when getMemoryStats method is updated to support ml:true filter _nodes: { ...response._nodes, total: mlNodes.length, successful: mlNodes.length }, nodes: nodeDeploymentStatsResponses }; } } exports.MemoryUsageService = MemoryUsageService; function nodeFilter(m, node, showClosedJobs = false) { if (m.nodeNames.length === 0) { return showClosedJobs; } if (node === undefined) { return true; } return m.nodeNames.includes(node); }