|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the Elastic License |
| 4 | + * 2.0; you may not use this file except in compliance with the Elastic License |
| 5 | + * 2.0. |
| 6 | + */ |
| 7 | + |
| 8 | +package org.elasticsearch.compute.operator; |
| 9 | + |
| 10 | +import org.apache.lucene.util.BytesRef; |
| 11 | +import org.elasticsearch.compute.data.Block; |
| 12 | +import org.elasticsearch.compute.data.BlockFactory; |
| 13 | +import org.elasticsearch.compute.data.BlockUtils; |
| 14 | +import org.elasticsearch.compute.data.BytesRefBlock; |
| 15 | +import org.elasticsearch.compute.data.DoubleBlock; |
| 16 | +import org.elasticsearch.compute.data.Page; |
| 17 | +import org.elasticsearch.core.Releasables; |
| 18 | +import org.elasticsearch.xpack.ml.aggs.MlAggsHelper; |
| 19 | +import org.elasticsearch.xpack.ml.aggs.changepoint.ChangePointDetector; |
| 20 | +import org.elasticsearch.xpack.ml.aggs.changepoint.ChangeType; |
| 21 | + |
| 22 | +import java.util.ArrayList; |
| 23 | +import java.util.Deque; |
| 24 | +import java.util.LinkedList; |
| 25 | +import java.util.List; |
| 26 | + |
| 27 | +/** |
| 28 | + * Find spikes, dips and change point in a list of values. |
| 29 | + * <p> |
| 30 | + * Warning: this operator cannot handle large amounts of data! It buffers all |
| 31 | + * data that is passed to it, runs the change point detector on the data (which |
| 32 | + * is a compute-heavy process), and then outputs all data with the change points. |
| 33 | + */ |
| 34 | +public class ChangePointOperator implements Operator { |
| 35 | + |
| 36 | + public static final int INPUT_VALUE_COUNT_LIMIT = 1000; |
| 37 | + |
| 38 | + public record Factory(int channel, String sourceText, int sourceLine, int sourceColumn) implements OperatorFactory { |
| 39 | + @Override |
| 40 | + public Operator get(DriverContext driverContext) { |
| 41 | + return new ChangePointOperator(driverContext, channel, sourceText, sourceLine, sourceColumn); |
| 42 | + } |
| 43 | + |
| 44 | + @Override |
| 45 | + public String describe() { |
| 46 | + return "ChangePointOperator[channel=" + channel + "]"; |
| 47 | + } |
| 48 | + } |
| 49 | + |
| 50 | + private final DriverContext driverContext; |
| 51 | + private final int channel; |
| 52 | + private final String sourceText; |
| 53 | + private final int sourceLine; |
| 54 | + private final int sourceColumn; |
| 55 | + |
| 56 | + private final Deque<Page> inputPages; |
| 57 | + private final Deque<Page> outputPages; |
| 58 | + private boolean finished; |
| 59 | + private Warnings warnings; |
| 60 | + |
| 61 | + // TODO: make org.elasticsearch.xpack.esql.core.tree.Source available here |
| 62 | + // (by modularizing esql-core) and use that instead of the individual fields. |
| 63 | + public ChangePointOperator(DriverContext driverContext, int channel, String sourceText, int sourceLine, int sourceColumn) { |
| 64 | + this.driverContext = driverContext; |
| 65 | + this.channel = channel; |
| 66 | + this.sourceText = sourceText; |
| 67 | + this.sourceLine = sourceLine; |
| 68 | + this.sourceColumn = sourceColumn; |
| 69 | + |
| 70 | + finished = false; |
| 71 | + inputPages = new LinkedList<>(); |
| 72 | + outputPages = new LinkedList<>(); |
| 73 | + warnings = null; |
| 74 | + } |
| 75 | + |
| 76 | + @Override |
| 77 | + public boolean needsInput() { |
| 78 | + return finished == false; |
| 79 | + } |
| 80 | + |
| 81 | + @Override |
| 82 | + public void addInput(Page page) { |
| 83 | + inputPages.add(page); |
| 84 | + } |
| 85 | + |
| 86 | + @Override |
| 87 | + public void finish() { |
| 88 | + if (finished == false) { |
| 89 | + finished = true; |
| 90 | + createOutputPages(); |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + @Override |
| 95 | + public boolean isFinished() { |
| 96 | + return finished && outputPages.isEmpty(); |
| 97 | + } |
| 98 | + |
| 99 | + @Override |
| 100 | + public Page getOutput() { |
| 101 | + if (finished == false || outputPages.isEmpty()) { |
| 102 | + return null; |
| 103 | + } |
| 104 | + return outputPages.removeFirst(); |
| 105 | + } |
| 106 | + |
| 107 | + private void createOutputPages() { |
| 108 | + int valuesCount = 0; |
| 109 | + for (Page page : inputPages) { |
| 110 | + valuesCount += page.getPositionCount(); |
| 111 | + } |
| 112 | + boolean tooManyValues = valuesCount > INPUT_VALUE_COUNT_LIMIT; |
| 113 | + if (tooManyValues) { |
| 114 | + valuesCount = INPUT_VALUE_COUNT_LIMIT; |
| 115 | + } |
| 116 | + |
| 117 | + List<Double> values = new ArrayList<>(valuesCount); |
| 118 | + List<Integer> bucketIndexes = new ArrayList<>(valuesCount); |
| 119 | + int valuesIndex = 0; |
| 120 | + boolean hasNulls = false; |
| 121 | + boolean hasMultivalued = false; |
| 122 | + for (Page inputPage : inputPages) { |
| 123 | + Block inputBlock = inputPage.getBlock(channel); |
| 124 | + for (int i = 0; i < inputBlock.getPositionCount() && valuesIndex < valuesCount; i++) { |
| 125 | + Object value = BlockUtils.toJavaObject(inputBlock, i); |
| 126 | + if (value == null) { |
| 127 | + hasNulls = true; |
| 128 | + valuesIndex++; |
| 129 | + } else if (value instanceof List<?>) { |
| 130 | + hasMultivalued = true; |
| 131 | + valuesIndex++; |
| 132 | + } else { |
| 133 | + values.add(((Number) value).doubleValue()); |
| 134 | + bucketIndexes.add(valuesIndex++); |
| 135 | + } |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + MlAggsHelper.DoubleBucketValues bucketValues = new MlAggsHelper.DoubleBucketValues( |
| 140 | + null, |
| 141 | + values.stream().mapToDouble(Double::doubleValue).toArray(), |
| 142 | + bucketIndexes.stream().mapToInt(Integer::intValue).toArray() |
| 143 | + ); |
| 144 | + ChangeType changeType = ChangePointDetector.getChangeType(bucketValues); |
| 145 | + int changePointIndex = changeType.changePoint(); |
| 146 | + |
| 147 | + BlockFactory blockFactory = driverContext.blockFactory(); |
| 148 | + int pageStartIndex = 0; |
| 149 | + while (inputPages.isEmpty() == false) { |
| 150 | + Page inputPage = inputPages.peek(); |
| 151 | + Page outputPage; |
| 152 | + Block changeTypeBlock = null; |
| 153 | + Block changePvalueBlock = null; |
| 154 | + boolean success = false; |
| 155 | + try { |
| 156 | + if (pageStartIndex <= changePointIndex && changePointIndex < pageStartIndex + inputPage.getPositionCount()) { |
| 157 | + try ( |
| 158 | + BytesRefBlock.Builder changeTypeBlockBuilder = blockFactory.newBytesRefBlockBuilder(inputPage.getPositionCount()); |
| 159 | + DoubleBlock.Builder pvalueBlockBuilder = blockFactory.newDoubleBlockBuilder(inputPage.getPositionCount()) |
| 160 | + ) { |
| 161 | + for (int i = 0; i < inputPage.getPositionCount(); i++) { |
| 162 | + if (pageStartIndex + i == changePointIndex) { |
| 163 | + changeTypeBlockBuilder.appendBytesRef(new BytesRef(changeType.getWriteableName())); |
| 164 | + pvalueBlockBuilder.appendDouble(changeType.pValue()); |
| 165 | + } else { |
| 166 | + changeTypeBlockBuilder.appendNull(); |
| 167 | + pvalueBlockBuilder.appendNull(); |
| 168 | + } |
| 169 | + } |
| 170 | + changeTypeBlock = changeTypeBlockBuilder.build(); |
| 171 | + changePvalueBlock = pvalueBlockBuilder.build(); |
| 172 | + } |
| 173 | + } else { |
| 174 | + changeTypeBlock = blockFactory.newConstantNullBlock(inputPage.getPositionCount()); |
| 175 | + changePvalueBlock = blockFactory.newConstantNullBlock(inputPage.getPositionCount()); |
| 176 | + } |
| 177 | + |
| 178 | + outputPage = inputPage.appendBlocks(new Block[] { changeTypeBlock, changePvalueBlock }); |
| 179 | + success = true; |
| 180 | + } finally { |
| 181 | + if (success == false) { |
| 182 | + Releasables.closeExpectNoException(changeTypeBlock, changePvalueBlock); |
| 183 | + } |
| 184 | + } |
| 185 | + |
| 186 | + inputPages.removeFirst(); |
| 187 | + outputPages.add(outputPage); |
| 188 | + pageStartIndex += inputPage.getPositionCount(); |
| 189 | + } |
| 190 | + |
| 191 | + if (changeType instanceof ChangeType.Indeterminable indeterminable) { |
| 192 | + warnings(false).registerException(new IllegalArgumentException(indeterminable.getReason())); |
| 193 | + } |
| 194 | + if (tooManyValues) { |
| 195 | + warnings(true).registerException( |
| 196 | + new IllegalArgumentException("too many values; keeping only first " + INPUT_VALUE_COUNT_LIMIT + " values") |
| 197 | + ); |
| 198 | + } |
| 199 | + if (hasNulls) { |
| 200 | + warnings(true).registerException(new IllegalArgumentException("values contain nulls; skipping them")); |
| 201 | + } |
| 202 | + if (hasMultivalued) { |
| 203 | + warnings(true).registerException( |
| 204 | + new IllegalArgumentException( |
| 205 | + "values contains multivalued entries; skipping them (please consider reducing them with e.g. MV_AVG or MV_SUM)" |
| 206 | + ) |
| 207 | + ); |
| 208 | + } |
| 209 | + } |
| 210 | + |
| 211 | + @Override |
| 212 | + public void close() { |
| 213 | + for (Page page : inputPages) { |
| 214 | + page.releaseBlocks(); |
| 215 | + } |
| 216 | + for (Page page : outputPages) { |
| 217 | + page.releaseBlocks(); |
| 218 | + } |
| 219 | + } |
| 220 | + |
| 221 | + @Override |
| 222 | + public String toString() { |
| 223 | + return "ChangePointOperator[channel=" + channel + "]"; |
| 224 | + } |
| 225 | + |
| 226 | + private Warnings warnings(boolean onlyWarnings) { |
| 227 | + if (warnings == null) { |
| 228 | + if (onlyWarnings) { |
| 229 | + this.warnings = Warnings.createOnlyWarnings(driverContext.warningsMode(), sourceLine, sourceColumn, sourceText); |
| 230 | + } else { |
| 231 | + this.warnings = Warnings.createWarnings(driverContext.warningsMode(), sourceLine, sourceColumn, sourceText); |
| 232 | + } |
| 233 | + } |
| 234 | + return warnings; |
| 235 | + } |
| 236 | +} |
0 commit comments