"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.fetchFrequentItemSets = fetchFrequentItemSets; exports.getFrequentItemSetsAggFields = getFrequentItemSetsAggFields; exports.getShouldClauses = getShouldClauses; exports.groupDuplicates = groupDuplicates; var _lodash = require("lodash"); var _d3Array = require("d3-array"); var _mlRandomSamplerUtils = require("@kbn/ml-random-sampler-utils"); var _constants = require("../../../common/constants"); /* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ function groupDuplicates(cps, uniqueFields) { const groups = []; for (const cp of cps) { const compareAttributes = (0, _lodash.pick)(cp, uniqueFields); const groupIndex = groups.findIndex(g => (0, _lodash.isEqual)(g.keys, compareAttributes)); if (groupIndex === -1) { groups.push({ keys: compareAttributes, group: [cp] }); } else { groups[groupIndex].group.push(cp); } } return groups; } function getShouldClauses(significantTerms) { return Array.from((0, _d3Array.group)(significantTerms, ({ fieldName }) => fieldName), ([field, values]) => ({ terms: { [field]: values.map(d => d.fieldValue) } })); } function getFrequentItemSetsAggFields(significantTerms) { return Array.from((0, _d3Array.group)(significantTerms, ({ fieldName }) => fieldName), ([field, values]) => ({ field, include: values.map(d => String(d.fieldValue)) })); } async function fetchFrequentItemSets(client, index, searchQuery, significantTerms, timeFieldName, deviationMin, deviationMax, logger, // The default value of 1 means no sampling will be used sampleProbability = 1, emitError, abortSignal) { // Sort significant terms by ascending p-value, necessary to apply the field limit correctly. const sortedSignificantTerms = significantTerms.slice().sort((a, b) => { var _a$pValue, _b$pValue; return ((_a$pValue = a.pValue) !== null && _a$pValue !== void 0 ? _a$pValue : 0) - ((_b$pValue = b.pValue) !== null && _b$pValue !== void 0 ? _b$pValue : 0); }); const query = { bool: { minimum_should_match: 2, filter: [searchQuery, { range: { [timeFieldName]: { gte: deviationMin, lt: deviationMax } } }], should: getShouldClauses(sortedSignificantTerms) } }; const frequentItemSetsAgg = { fi: { frequent_item_sets: { minimum_set_size: 2, size: 200, minimum_support: 0.001, fields: getFrequentItemSetsAggFields(sortedSignificantTerms) } } }; const { wrap, unwrap } = (0, _mlRandomSamplerUtils.createRandomSamplerWrapper)({ probability: sampleProbability, seed: _constants.RANDOM_SAMPLER_SEED }); const esBody = { query, aggs: wrap(frequentItemSetsAgg), size: 0, track_total_hits: true }; const body = await client.search({ index, size: 0, body: esBody }, { signal: abortSignal, maxRetries: 0 }); if (body.aggregations === undefined) { logger.error(`Failed to fetch frequent_item_sets, got: \n${JSON.stringify(body, null, 2)}`); emitError(`Failed to fetch frequent_item_sets.`); return { fields: [], df: [], totalDocCount: 0 }; } const totalDocCountFi = body.hits.total.value; const frequentItemSets = unwrap(body.aggregations); const shape = frequentItemSets.fi.buckets.length; let maximum = shape; if (maximum > 50000) { maximum = 50000; } const fiss = frequentItemSets.fi.buckets; fiss.length = maximum; const results = []; fiss.forEach(fis => { const result = { set: {}, size: 0, maxPValue: 0, doc_count: 0, support: 0, total_doc_count: 0 }; let maxPValue; Object.entries(fis.key).forEach(([key, value]) => { var _sortedSignificantTer; result.set[key] = value[0]; const pValue = (_sortedSignificantTer = sortedSignificantTerms.find(t => t.fieldName === key && t.fieldValue === value[0])) === null || _sortedSignificantTer === void 0 ? void 0 : _sortedSignificantTer.pValue; if (pValue !== undefined && pValue !== null) { var _maxPValue; maxPValue = Math.max((_maxPValue = maxPValue) !== null && _maxPValue !== void 0 ? _maxPValue : 0, pValue); } }); if (maxPValue === undefined) { return; } result.size = Object.keys(result.set).length; result.maxPValue = maxPValue; result.doc_count = fis.doc_count; result.support = fis.support; result.total_doc_count = totalDocCountFi; results.push(result); }); results.sort((a, b) => { return b.doc_count - a.doc_count; }); const uniqueFields = (0, _lodash.uniq)(results.flatMap(r => Object.keys(r.set))); return { fields: uniqueFields, df: results, totalDocCount: totalDocCountFi }; }