"use strict"; var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault"); Object.defineProperty(exports, "__esModule", { value: true }); exports.DataVisualizer = void 0; var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty")); var _lodash = require("lodash"); var _mlAggUtils = require("@kbn/ml-agg-utils"); var _mlIsPopulatedObject = require("@kbn/ml-is-populated-object"); var _mlQueryUtils = require("@kbn/ml-query-utils"); var _mlAnomalyUtils = require("@kbn/ml-anomaly-utils"); var _datafeed_utils = require("../../../common/util/datafeed_utils"); /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ const SAMPLER_TOP_TERMS_THRESHOLD = 100000; const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000; const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200; const FIELDS_REQUEST_BATCH_SIZE = 10; class DataVisualizer { constructor(client) { (0, _defineProperty2.default)(this, "_asCurrentUser", void 0); this._asCurrentUser = client.asCurrentUser; } // Obtains overall stats on the fields in the supplied index pattern, returning an object // containing the total document count, and four arrays showing which of the supplied // aggregatable and non-aggregatable fields do or do not exist in documents. // Sampling will be used if supplied samplerShardSize > 0. async getOverallStats(indexPatternTitle, query, aggregatableFields, nonAggregatableFields, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings) { const stats = { totalCount: 0, aggregatableExistsFields: [], aggregatableNotExistsFields: [], nonAggregatableExistsFields: [], nonAggregatableNotExistsFields: [] }; // To avoid checking for the existence of too many aggregatable fields in one request, // split the check into multiple batches (max 200 fields per request). const batches = [[]]; (0, _lodash.each)(aggregatableFields, field => { let lastArray = (0, _lodash.last)(batches); if (lastArray.length === AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE) { lastArray = []; batches.push(lastArray); } lastArray.push(field); }); await Promise.all(batches.map(async fields => { const batchStats = await this.checkAggregatableFieldsExist(indexPatternTitle, query, fields, samplerShardSize, timeFieldName, earliestMs, latestMs, undefined, runtimeMappings); // Total count will be returned with each batch of fields. Just overwrite. stats.totalCount = batchStats.totalCount; // Add to the lists of fields which do and do not exist. stats.aggregatableExistsFields.push(...batchStats.aggregatableExistsFields); stats.aggregatableNotExistsFields.push(...batchStats.aggregatableNotExistsFields); })); await Promise.all(nonAggregatableFields.map(async field => { const existsInDocs = await this.checkNonAggregatableFieldExists(indexPatternTitle, query, field, timeFieldName, earliestMs, latestMs, runtimeMappings); const fieldData = { fieldName: field, existsInDocs, stats: {} }; if (existsInDocs === true) { stats.nonAggregatableExistsFields.push(fieldData); } else { stats.nonAggregatableNotExistsFields.push(fieldData); } })); return stats; } // Obtains binned histograms for supplied list of fields. The statistics for each field in the // returned array depend on the type of the field (keyword, number, date etc). // Sampling will be used if supplied samplerShardSize > 0. async getHistogramsForFields(indexPattern, query, fields, samplerShardSize, runtimeMappings) { return await (0, _mlAggUtils.fetchHistogramsForFields)(this._asCurrentUser, indexPattern, query, fields, samplerShardSize, runtimeMappings); } // Obtains statistics for supplied list of fields. The statistics for each field in the // returned array depend on the type of the field (keyword, number, date etc). // Sampling will be used if supplied samplerShardSize > 0. async getStatsForFields(indexPatternTitle, query, fields, samplerShardSize, timeFieldName, earliestMs, latestMs, intervalMs, maxExamples, runtimeMappings) { // Batch up fields by type, getting stats for multiple fields at a time. const batches = []; const batchedFields = {}; (0, _lodash.each)(fields, field => { if (field.fieldName === undefined) { // undefined fieldName is used for a document count request. // getDocumentCountStats requires timeField - don't add to batched requests if not defined if (timeFieldName !== undefined) { batches.push([field]); } } else { const fieldType = field.type; if (batchedFields[fieldType] === undefined) { batchedFields[fieldType] = [[]]; } let lastArray = (0, _lodash.last)(batchedFields[fieldType]); if (lastArray.length === FIELDS_REQUEST_BATCH_SIZE) { lastArray = []; batchedFields[fieldType].push(lastArray); } lastArray.push(field); } }); (0, _lodash.each)(batchedFields, lists => { batches.push(...lists); }); let results = []; await Promise.all(batches.map(async batch => { let batchStats = []; const first = batch[0]; switch (first.type) { case _mlAnomalyUtils.ML_JOB_FIELD_TYPES.NUMBER: // undefined fieldName is used for a document count request. if (first.fieldName !== undefined) { batchStats = await this.getNumericFieldsStats(indexPatternTitle, query, batch, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings); } else { // Will only ever be one document count card, // so no value in batching up the single request. if (intervalMs !== undefined) { const stats = await this.getDocumentCountStats(indexPatternTitle, query, timeFieldName, earliestMs, latestMs, intervalMs, runtimeMappings); batchStats.push(stats); } } break; case _mlAnomalyUtils.ML_JOB_FIELD_TYPES.KEYWORD: case _mlAnomalyUtils.ML_JOB_FIELD_TYPES.IP: batchStats = await this.getStringFieldsStats(indexPatternTitle, query, batch, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings); break; case _mlAnomalyUtils.ML_JOB_FIELD_TYPES.DATE: batchStats = await this.getDateFieldsStats(indexPatternTitle, query, batch, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings); break; case _mlAnomalyUtils.ML_JOB_FIELD_TYPES.BOOLEAN: batchStats = await this.getBooleanFieldsStats(indexPatternTitle, query, batch, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings); break; case _mlAnomalyUtils.ML_JOB_FIELD_TYPES.TEXT: default: // Use an exists filter on the the field name to get // examples of the field, so cannot batch up. await Promise.all(batch.map(async field => { const stats = await this.getFieldExamples(indexPatternTitle, query, field.fieldName, timeFieldName, earliestMs, latestMs, maxExamples, runtimeMappings); batchStats.push(stats); })); break; } results = [...results, ...batchStats]; })); return results; } async checkAggregatableFieldsExist(indexPatternTitle, query, aggregatableFields, samplerShardSize, timeFieldName, earliestMs, latestMs, datafeedConfig, runtimeMappings) { const index = indexPatternTitle; const size = 0; const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); const datafeedAggregations = (0, _datafeed_utils.getDatafeedAggregations)(datafeedConfig); // Value count aggregation faster way of checking if field exists than using // filter aggregation with exists query. const aggs = datafeedAggregations !== undefined ? { ...datafeedAggregations } : {}; // Combine runtime fields from the index pattern as well as the datafeed const combinedRuntimeMappings = { ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? runtimeMappings : {}), ...((0, _mlIsPopulatedObject.isPopulatedObject)(datafeedConfig) && (0, _mlIsPopulatedObject.isPopulatedObject)(datafeedConfig.runtime_mappings) ? datafeedConfig.runtime_mappings : {}) }; aggregatableFields.forEach((field, i) => { var _datafeedConfig$scrip; const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field, i); aggs[`${safeFieldName}_count`] = { filter: { exists: { field } } }; let cardinalityField; if (datafeedConfig !== null && datafeedConfig !== void 0 && (_datafeedConfig$scrip = datafeedConfig.script_fields) !== null && _datafeedConfig$scrip !== void 0 && _datafeedConfig$scrip.hasOwnProperty(field)) { cardinalityField = aggs[`${safeFieldName}_cardinality`] = { cardinality: { script: datafeedConfig === null || datafeedConfig === void 0 ? void 0 : datafeedConfig.script_fields[field].script } }; } else { cardinalityField = { cardinality: { field } }; } aggs[`${safeFieldName}_cardinality`] = cardinalityField; }); const searchBody = { query: { bool: { filter: filterCriteria } }, ...((0, _mlIsPopulatedObject.isPopulatedObject)(aggs) ? { aggs: (0, _mlAggUtils.buildSamplerAggregation)(aggs, samplerShardSize) } : {}), ...((0, _mlIsPopulatedObject.isPopulatedObject)(combinedRuntimeMappings) ? { runtime_mappings: combinedRuntimeMappings } : {}) }; const body = await this._asCurrentUser.search({ index, track_total_hits: true, size, body: searchBody }, { maxRetries: 0 }); const aggregations = body.aggregations; // @ts-expect-error incorrect search response type const totalCount = body.hits.total.value; const stats = { totalCount, aggregatableExistsFields: [], aggregatableNotExistsFields: [] }; const aggsPath = (0, _mlAggUtils.getSamplerAggregationsResponsePath)(samplerShardSize); const sampleCount = samplerShardSize > 0 ? (0, _lodash.get)(aggregations, ['sample', 'doc_count'], 0) : totalCount; aggregatableFields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field, i); const count = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0); if (count > 0) { const cardinality = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_cardinality`, 'value'], 0); stats.aggregatableExistsFields.push({ fieldName: field, existsInDocs: true, stats: { sampleCount, count, cardinality } }); } else { var _datafeedConfig$scrip2, _datafeedConfig$runti; if (datafeedConfig !== null && datafeedConfig !== void 0 && (_datafeedConfig$scrip2 = datafeedConfig.script_fields) !== null && _datafeedConfig$scrip2 !== void 0 && _datafeedConfig$scrip2.hasOwnProperty(field) || datafeedConfig !== null && datafeedConfig !== void 0 && (_datafeedConfig$runti = datafeedConfig.runtime_mappings) !== null && _datafeedConfig$runti !== void 0 && _datafeedConfig$runti.hasOwnProperty(field)) { const cardinality = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_cardinality`, 'value'], 0); stats.aggregatableExistsFields.push({ fieldName: field, existsInDocs: true, stats: { sampleCount, count, cardinality } }); } else { stats.aggregatableNotExistsFields.push({ fieldName: field, existsInDocs: false }); } } }); return stats; } async checkNonAggregatableFieldExists(indexPatternTitle, query, field, timeFieldName, earliestMs, latestMs, runtimeMappings) { const index = indexPatternTitle; const size = 0; const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); const searchBody = { query: { bool: { filter: filterCriteria } }, ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}) }; filterCriteria.push({ exists: { field } }); const body = await this._asCurrentUser.search({ index, size, body: searchBody }, { maxRetries: 0 }); // @ts-expect-error incorrect search response type return body.hits.total.value > 0; } async getDocumentCountStats(indexPatternTitle, query, timeFieldName, earliestMs, latestMs, intervalMs, runtimeMappings) { const index = indexPatternTitle; const size = 0; const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); // Don't use the sampler aggregation as this can lead to some potentially // confusing date histogram results depending on the date range of data amongst shards. const aggs = { eventRate: { date_histogram: { field: timeFieldName, fixed_interval: `${intervalMs}ms`, min_doc_count: 1 } } }; const searchBody = { query: { bool: { filter: filterCriteria } }, aggs, ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}) }; const body = await this._asCurrentUser.search({ index, size, body: searchBody }, { maxRetries: 0 }); const buckets = {}; const dataByTimeBucket = (0, _lodash.get)(body, ['aggregations', 'eventRate', 'buckets'], []); (0, _lodash.each)(dataByTimeBucket, dataForTime => { const time = dataForTime.key; buckets[time] = dataForTime.doc_count; }); return { documentCounts: { interval: intervalMs, buckets } }; } async getNumericFieldsStats(indexPatternTitle, query, fields, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings) { const index = indexPatternTitle; const size = 0; const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); // Build the percents parameter which defines the percentiles to query // for the metric distribution data. // Use a fixed percentile spacing of 5%. const MAX_PERCENT = 100; const PERCENTILE_SPACING = 5; let count = 0; const percents = Array.from(Array(MAX_PERCENT / PERCENTILE_SPACING), () => count += PERCENTILE_SPACING); const aggs = {}; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); aggs[`${safeFieldName}_field_stats`] = { filter: { exists: { field: field.fieldName } }, aggs: { actual_stats: { stats: { field: field.fieldName } } } }; aggs[`${safeFieldName}_percentiles`] = { percentiles: { field: field.fieldName, percents, keyed: false } }; const top = { terms: { field: field.fieldName, size: 10, order: { _count: 'desc' } } }; aggs[`${safeFieldName}_top`] = top; }); const searchBody = { query: { bool: { filter: filterCriteria } }, aggs: (0, _mlAggUtils.buildSamplerAggregation)(aggs, samplerShardSize), ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}) }; const body = await this._asCurrentUser.search({ index, size, body: searchBody }, { maxRetries: 0 }); const aggregations = body.aggregations; const aggsPath = (0, _mlAggUtils.getSamplerAggregationsResponsePath)(samplerShardSize); const batchStats = []; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); const docCount = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'], 0); const fieldStatsResp = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'], {}); const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { topAggsPath.push('top'); } const topValues = (0, _lodash.get)(aggregations, [...topAggsPath, 'buckets'], []); const stats = { fieldName: field.fieldName, count: docCount, min: (0, _lodash.get)(fieldStatsResp, 'min', 0), max: (0, _lodash.get)(fieldStatsResp, 'max', 0), avg: (0, _lodash.get)(fieldStatsResp, 'avg', 0), isTopValuesSampled: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, topValues, topValuesSampleSize: topValues.reduce((acc, curr) => acc + curr.doc_count, (0, _lodash.get)(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)), topValuesSamplerShardSize: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD ? SAMPLER_TOP_TERMS_SHARD_SIZE : samplerShardSize }; if (stats.count > 0) { const percentiles = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_percentiles`, 'values'], []); const medianPercentile = (0, _lodash.find)(percentiles, { key: 50 }); stats.median = medianPercentile !== undefined ? medianPercentile.value : 0; stats.distribution = this.processDistributionData(percentiles, PERCENTILE_SPACING, stats.min); } batchStats.push(stats); }); return batchStats; } async getStringFieldsStats(indexPatternTitle, query, fields, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings) { const index = indexPatternTitle; const size = 0; const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); const aggs = {}; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); const top = { terms: { field: field.fieldName, size: 10, order: { _count: 'desc' } } }; aggs[`${safeFieldName}_top`] = top; }); const searchBody = { query: { bool: { filter: filterCriteria } }, aggs: (0, _mlAggUtils.buildSamplerAggregation)(aggs, samplerShardSize), ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}) }; const body = await this._asCurrentUser.search({ index, size, body: searchBody }, { maxRetries: 0 }); const aggregations = body.aggregations; const aggsPath = (0, _mlAggUtils.getSamplerAggregationsResponsePath)(samplerShardSize); const batchStats = []; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); const topAggsPath = [...aggsPath, `${safeFieldName}_top`]; if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) { topAggsPath.push('top'); } const topValues = (0, _lodash.get)(aggregations, [...topAggsPath, 'buckets'], []); const stats = { fieldName: field.fieldName, isTopValuesSampled: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0, topValues, topValuesSampleSize: topValues.reduce((acc, curr) => acc + curr.doc_count, (0, _lodash.get)(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)), topValuesSamplerShardSize: field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD ? SAMPLER_TOP_TERMS_SHARD_SIZE : samplerShardSize }; batchStats.push(stats); }); return batchStats; } async getDateFieldsStats(indexPatternTitle, query, fields, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings) { const index = indexPatternTitle; const size = 0; const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); const aggs = {}; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); aggs[`${safeFieldName}_field_stats`] = { filter: { exists: { field: field.fieldName } }, aggs: { actual_stats: { stats: { field: field.fieldName } } } }; }); const searchBody = { query: { bool: { filter: filterCriteria } }, aggs: (0, _mlAggUtils.buildSamplerAggregation)(aggs, samplerShardSize), ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}) }; const body = await this._asCurrentUser.search({ index, size, body: searchBody }, { maxRetries: 0 }); const aggregations = body.aggregations; const aggsPath = (0, _mlAggUtils.getSamplerAggregationsResponsePath)(samplerShardSize); const batchStats = []; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); const docCount = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'], 0); const fieldStatsResp = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'], {}); batchStats.push({ fieldName: field.fieldName, count: docCount, earliest: (0, _lodash.get)(fieldStatsResp, 'min', 0), latest: (0, _lodash.get)(fieldStatsResp, 'max', 0) }); }); return batchStats; } async getBooleanFieldsStats(indexPatternTitle, query, fields, samplerShardSize, timeFieldName, earliestMs, latestMs, runtimeMappings) { const index = indexPatternTitle; const size = 0; const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); const aggs = {}; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); aggs[`${safeFieldName}_value_count`] = { filter: { exists: { field: field.fieldName } } }; aggs[`${safeFieldName}_values`] = { terms: { field: field.fieldName, size: 2 } }; }); const searchBody = { query: { bool: { filter: filterCriteria } }, aggs: (0, _mlAggUtils.buildSamplerAggregation)(aggs, samplerShardSize), ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}) }; const body = await this._asCurrentUser.search({ index, size, body: searchBody }, { maxRetries: 0 }); const aggregations = body.aggregations; const aggsPath = (0, _mlAggUtils.getSamplerAggregationsResponsePath)(samplerShardSize); const batchStats = []; fields.forEach((field, i) => { const safeFieldName = (0, _mlQueryUtils.getSafeAggregationName)(field.fieldName, i); const stats = { fieldName: field.fieldName, count: (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0), trueCount: 0, falseCount: 0 }; const valueBuckets = (0, _lodash.get)(aggregations, [...aggsPath, `${safeFieldName}_values`, 'buckets'], []); valueBuckets.forEach(bucket => { stats[`${bucket.key_as_string}Count`] = bucket.doc_count; }); batchStats.push(stats); }); return batchStats; } async getFieldExamples(indexPatternTitle, query, field, timeFieldName, earliestMs, latestMs, maxExamples, runtimeMappings) { const index = indexPatternTitle; // Request at least 100 docs so that we have a chance of obtaining // 'maxExamples' of the field. const size = Math.max(100, maxExamples); const filterCriteria = (0, _mlQueryUtils.buildBaseFilterCriteria)(timeFieldName, earliestMs, latestMs, query); // Use an exists filter to return examples of the field. filterCriteria.push({ exists: { field } }); const searchBody = { fields: [field], _source: false, query: { bool: { filter: filterCriteria } }, ...((0, _mlIsPopulatedObject.isPopulatedObject)(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}) }; const body = await this._asCurrentUser.search({ index, size, body: searchBody }, { maxRetries: 0 }); const stats = { fieldName: field, examples: [] }; // @ts-expect-error incorrect search response type if (body.hits.total.value > 0) { const hits = body.hits.hits; for (let i = 0; i < hits.length; i++) { // Use lodash get() to support field names containing dots. const doc = (0, _lodash.get)(hits[i].fields, field); // the results from fields query is always an array if (Array.isArray(doc) && doc.length > 0) { const example = doc[0]; if (example !== undefined && stats.examples.indexOf(example) === -1) { stats.examples.push(example); if (stats.examples.length === maxExamples) { break; } } } } } return stats; } processDistributionData(percentiles, percentileSpacing, minValue) { const distribution = { percentiles: [], minPercentile: 0, maxPercentile: 100 }; if (percentiles.length === 0) { return distribution; } let percentileBuckets = []; let lowerBound = minValue; if (lowerBound >= 0) { // By default return results for 0 - 90% percentiles. distribution.minPercentile = 0; distribution.maxPercentile = 90; percentileBuckets = percentiles.slice(0, percentiles.length - 2); // Look ahead to the last percentiles and process these too if // they don't add more than 50% to the value range. const lastValue = (0, _lodash.last)(percentileBuckets).value; const upperBound = lowerBound + 1.5 * (lastValue - lowerBound); const filteredLength = percentileBuckets.length; for (let i = filteredLength; i < percentiles.length; i++) { if (percentiles[i].value < upperBound) { percentileBuckets.push(percentiles[i]); distribution.maxPercentile += percentileSpacing; } else { break; } } } else { // By default return results for 5 - 95% percentiles. const dataMin = lowerBound; lowerBound = percentiles[0].value; distribution.minPercentile = 5; distribution.maxPercentile = 95; percentileBuckets = percentiles.slice(1, percentiles.length - 1); // Add in 0-5 and 95-100% if they don't add more // than 25% to the value range at either end. const lastValue = (0, _lodash.last)(percentileBuckets).value; const maxDiff = 0.25 * (lastValue - lowerBound); if (lowerBound - dataMin < maxDiff) { percentileBuckets.splice(0, 0, percentiles[0]); distribution.minPercentile = 0; lowerBound = dataMin; } if (percentiles[percentiles.length - 1].value - lastValue < maxDiff) { percentileBuckets.push(percentiles[percentiles.length - 1]); distribution.maxPercentile = 100; } } // Combine buckets with the same value. const totalBuckets = percentileBuckets.length; let lastBucketValue = lowerBound; let numEqualValueBuckets = 0; for (let i = 0; i < totalBuckets; i++) { const bucket = percentileBuckets[i]; // Results from the percentiles aggregation can have precision rounding // artifacts e.g returning 200 and 200.000000000123, so check for equality // around double floating point precision i.e. 15 sig figs. if (bucket.value.toPrecision(15) !== lastBucketValue.toPrecision(15)) { // Create a bucket for any 'equal value' buckets which had a value <= last bucket if (numEqualValueBuckets > 0) { distribution.percentiles.push({ percent: numEqualValueBuckets * percentileSpacing, minValue: lastBucketValue, maxValue: lastBucketValue }); } distribution.percentiles.push({ percent: percentileSpacing, minValue: lastBucketValue, maxValue: bucket.value }); lastBucketValue = bucket.value; numEqualValueBuckets = 0; } else { numEqualValueBuckets++; if (i === totalBuckets - 1) { // If at the last bucket, create a final bucket for the equal value buckets. distribution.percentiles.push({ percent: numEqualValueBuckets * percentileSpacing, minValue: lastBucketValue, maxValue: lastBucketValue }); } } } return distribution; } } exports.DataVisualizer = DataVisualizer;