View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.statistics.descriptive;
18  
19  import java.util.Arrays;
20  import java.util.Objects;
21  import java.util.function.IntToDoubleFunction;
22  import java.util.function.IntToLongFunction;
23  import org.apache.commons.numbers.arrays.Selection;
24  
25  /**
26   * Provides quantile computation.
27   *
28   * <p>For values of length {@code n}:
29   * <ul>
30   * <li>The result is {@code NaN} if {@code n = 0}.</li>
31   * <li>The result is {@code values[0]} if {@code n = 1}.</li>
32   * <li>Otherwise the result is computed using the {@link EstimationMethod}.</li>
33   * </ul>
34   *
35   * <p>Computation of multiple quantiles will handle duplicate and unordered
36   * probabilities. Passing ordered probabilities is recommended if the order is already
37   * known as this can improve efficiency; for example using uniform spacing through the
38   * array data, or to identify extreme values from the data such as {@code [0.001, 0.999]}.
39   *
40   * <p>This implementation respects the ordering imposed by
41   * {@link Double#compare(double, double)} for {@code NaN} values. If a {@code NaN} occurs
42   * in the selected positions in the fully sorted values then the result is {@code NaN}.
43   *
44   * <p>The {@link NaNPolicy} can be used to change the behaviour on {@code NaN} values.
45   *
46   * <p>Instances of this class are immutable and thread-safe.
47   *
48   * <p><strong>Support for {@code long} arrays</strong>
49   *
50   * <p>The result on {@code long} values can be returned as a {@code double} or
51   * a {@code long} using a {@link StatisticResult}.
52   *
53   * <p>The {@code double} result is computed within 1 ULP of the exact result. In some
54   * cases this may be outside the range defined by the minimum and maximum of the input
55   * array following rounding to a 53-bit floating point representation. For example a
56   * quantile of an array containing only {@link Long#MAX_VALUE} as a {@code double} is
57   * 2<sup>63</sup>, which is the closest representation of 2<sup>63</sup> - 1.
58   *
59   * <p>The {@code long} result is returned using the nearest whole number.
60   * In the event of ties the result is rounded towards positive infinity.
61   * This value will always be within the range defined by the minimum and maximum
62   * of the input array. Due to interpolation it may be a value not observed in
63   * the input values.
64   *
65   * <p>Interpolation between two {@code long} values requires extended precision
66   * floating-point arithmetic. This can be avoided using a discontinuous {@link EstimationMethod}.
67   * In this case the {@code long} quantile will be a value observed in the input values.
68   *
69   * <p>If the array length {@code n} is zero the result as a {@code double} is
70   * {@code NaN} and the result as a {@code long} will raise an {@link ArithmeticException}.
71   *
72   * <p>Multiple quantile results required as only one of the primitive values can be converted
73   * to a primitive array using a stream, for example:
74   *
75   * <pre>{@code
76   * long[] values = ...
77   * double[] p = Quantile.probabilities(10);
78   * Quantile q = Quantile.withDefaults();
79   * long[] result = Arrays.stream(q.evaluate(values, p))
80   *                       .mapToLong(StatisticResult::getAsLong)
81   *                       .toArray();
82   * }</pre>
83   *
84   * @see #with(NaNPolicy)
85   * @see <a href="https://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a>
86   * @since 1.1
87   */
88  public final class Quantile {
89      /** Message when the probability is not in the range {@code [0, 1]}. */
90      private static final String INVALID_PROBABILITY = "Invalid probability: ";
91      /** Message when no probabilities are provided for the varargs method. */
92      private static final String NO_PROBABILITIES_SPECIFIED = "No probabilities specified";
93      /** Message when the size is not valid. */
94      private static final String INVALID_SIZE = "Invalid size: ";
95      /** Message when the number of probabilities in a range is not valid. */
96      private static final String INVALID_NUMBER_OF_PROBABILITIES = "Invalid number of probabilities: ";
97  
98      /** Default instance. Method 8 is recommended by Hyndman and Fan. */
99      private static final Quantile DEFAULT = new Quantile(false, NaNPolicy.INCLUDE, EstimationMethod.HF8);
100 
101     /** Flag to indicate if the data should be copied. */
102     private final boolean copy;
103     /** NaN policy for floating point data. */
104     private final NaNPolicy nanPolicy;
105     /** Transformer for NaN data. */
106     private final NaNTransformer nanTransformer;
107     /** Estimation type used to determine the value from the quantile. */
108     private final EstimationMethod estimationType;
109 
110     /**
111      * @param copy Flag to indicate if the data should be copied.
112      * @param nanPolicy NaN policy.
113      * @param estimationType Estimation type used to determine the value from the quantile.
114      */
115     private Quantile(boolean copy, NaNPolicy nanPolicy, EstimationMethod estimationType) {
116         this.copy = copy;
117         this.nanPolicy = nanPolicy;
118         this.estimationType = estimationType;
119         nanTransformer = NaNTransformers.createNaNTransformer(nanPolicy, copy);
120     }
121 
122     /**
123      * Return a new instance with the default options.
124      *
125      * <ul>
126      * <li>{@linkplain #withCopy(boolean) Copy = false}</li>
127      * <li>{@linkplain #with(NaNPolicy) NaN policy = include}</li>
128      * <li>{@linkplain #with(EstimationMethod) Estimation method = HF8}</li>
129      * </ul>
130      *
131      * <p>Note: The default options configure for processing in-place and including
132      * {@code NaN} values in the data. This is the most efficient mode and has the
133      * smallest memory consumption.
134      *
135      * @return the quantile implementation
136      * @see #withCopy(boolean)
137      * @see #with(NaNPolicy)
138      * @see #with(EstimationMethod)
139      */
140     public static Quantile withDefaults() {
141         return DEFAULT;
142     }
143 
144     /**
145      * Return an instance with the configured copy behaviour. If {@code false} then
146      * the input array will be modified by the call to evaluate the quantiles; otherwise
147      * the computation uses a copy of the data.
148      *
149      * @param v Value.
150      * @return an instance
151      */
152     public Quantile withCopy(boolean v) {
153         return new Quantile(v, nanPolicy, estimationType);
154     }
155 
156     /**
157      * Return an instance with the configured {@link NaNPolicy}.
158      *
159      * <p>Note: This implementation respects the ordering imposed by
160      * {@link Double#compare(double, double)} for {@code NaN} values: {@code NaN} is
161      * considered greater than all other values, and all {@code NaN} values are equal. The
162      * {@link NaNPolicy} changes the computation of the statistic in the presence of
163      * {@code NaN} values.
164      *
165      * <ul>
166      * <li>{@link NaNPolicy#INCLUDE}: {@code NaN} values are moved to the end of the data;
167      * the size of the data <em>includes</em> the {@code NaN} values and the quantile will be
168      * {@code NaN} if any value used for quantile interpolation is {@code NaN}.</li>
169      * <li>{@link NaNPolicy#EXCLUDE}: {@code NaN} values are moved to the end of the data;
170      * the size of the data <em>excludes</em> the {@code NaN} values and the quantile will
171      * never be {@code NaN} for non-zero size. If all data are {@code NaN} then the size is zero
172      * and the result is {@code NaN}.</li>
173      * <li>{@link NaNPolicy#ERROR}: An exception is raised if the data contains {@code NaN}
174      * values.</li>
175      * </ul>
176      *
177      * <p>Note that the result is identical for all policies if no {@code NaN} values are present.
178      *
179      * @param v Value.
180      * @return an instance
181      */
182     public Quantile with(NaNPolicy v) {
183         return new Quantile(copy, Objects.requireNonNull(v), estimationType);
184     }
185 
186     /**
187      * Return an instance with the configured {@link EstimationMethod}.
188      *
189      * @param v Value.
190      * @return an instance
191      */
192     public Quantile with(EstimationMethod v) {
193         return new Quantile(copy, nanPolicy, Objects.requireNonNull(v));
194     }
195 
196     /**
197      * Generate {@code n} evenly spaced probabilities in the range {@code [0, 1]}.
198      *
199      * <pre>
200      * 1/(n + 1), 2/(n + 1), ..., n/(n + 1)
201      * </pre>
202      *
203      * @param n Number of probabilities.
204      * @return the probabilities
205      * @throws IllegalArgumentException if {@code n < 1}
206      */
207     public static double[] probabilities(int n) {
208         checkNumberOfProbabilities(n);
209         final double c1 = n + 1.0;
210         final double[] p = new double[n];
211         for (int i = 0; i < n; i++) {
212             p[i] = (i + 1.0) / c1;
213         }
214         return p;
215     }
216 
217     /**
218      * Generate {@code n} evenly spaced probabilities in the range {@code [p1, p2]}.
219      *
220      * <pre>
221      * w = p2 - p1
222      * p1 + w/(n + 1), p1 + 2w/(n + 1), ..., p1 + nw/(n + 1)
223      * </pre>
224      *
225      * @param n Number of probabilities.
226      * @param p1 Lower probability.
227      * @param p2 Upper probability.
228      * @return the probabilities
229      * @throws IllegalArgumentException if {@code n < 1}; if the probabilities are not in the
230      * range {@code [0, 1]}; or {@code p2 <= p1}.
231      */
232     public static double[] probabilities(int n, double p1, double p2) {
233         checkProbability(p1);
234         checkProbability(p2);
235         if (p2 <= p1) {
236             throw new IllegalArgumentException("Invalid range: [" + p1 + ", " + p2 + "]");
237         }
238         final double[] p = probabilities(n);
239         for (int i = 0; i < n; i++) {
240             p[i] = (1 - p[i]) * p1 + p[i] * p2;
241         }
242         return p;
243     }
244 
245     /**
246      * Evaluate the {@code p}-th quantile of the values.
247      *
248      * <p>Note: This method may partially sort the input values if not configured to
249      * {@link #withCopy(boolean) copy} the input data.
250      *
251      * <p><strong>Performance</strong>
252      *
253      * <p>It is not recommended to use this method for repeat calls for different quantiles
254      * within the same values. The {@link #evaluate(double[], double...)} method should be used
255      * which provides better performance.
256      *
257      * @param values Values.
258      * @param p Probability for the quantile to compute.
259      * @return the quantile
260      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
261      * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
262      * @see #evaluate(double[], double...)
263      * @see #with(NaNPolicy)
264      */
265     public double evaluate(double[] values, double p) {
266         return compute(values, 0, values.length, p);
267     }
268 
269     /**
270      * Evaluate the {@code p}-th quantile of the specified range of values.
271      *
272      * <p>Note: This method may partially sort the input values if not configured to
273      * {@link #withCopy(boolean) copy} the input data.
274      *
275      * <p><strong>Performance</strong>
276      *
277      * <p>It is not recommended to use this method for repeat calls for different quantiles
278      * within the same values. The {@link #evaluateRange(double[], int, int, double...)} method should be used
279      * which provides better performance.
280      *
281      * @param values Values.
282      * @param from Inclusive start of the range.
283      * @param to Exclusive end of the range.
284      * @param p Probability for the quantile to compute.
285      * @return the quantile
286      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]};
287      * or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
288      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
289      * @see #evaluateRange(double[], int, int, double...)
290      * @see #with(NaNPolicy)
291      * @since 1.2
292      */
293     public double evaluateRange(double[] values, int from, int to, double p) {
294         Statistics.checkFromToIndex(from, to, values.length);
295         return compute(values, from, to, p);
296     }
297 
298     /**
299      * Compute the {@code p}-th quantile of the specified range of values.
300      *
301      * @param values Values.
302      * @param from Inclusive start of the range.
303      * @param to Exclusive end of the range.
304      * @param p Probability for the quantile to compute.
305      * @return the quantile
306      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
307      */
308     private double compute(double[] values, int from, int to, double p) {
309         checkProbability(p);
310         // Floating-point data handling
311         final int[] bounds = new int[2];
312         final double[] x = nanTransformer.apply(values, from, to, bounds);
313         final int start = bounds[0];
314         final int end = bounds[1];
315         final int n = end - start;
316         // Special cases
317         if (n <= 1) {
318             return n == 0 ? Double.NaN : x[start];
319         }
320 
321         final double pos = estimationType.index(p, n);
322         final int ip = (int) pos;
323         final int i = start + ip;
324 
325         // Partition and compute
326         if (pos > ip) {
327             Selection.select(x, start, end, new int[] {i, i + 1});
328             return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
329         }
330         Selection.select(x, start, end, i);
331         return x[i];
332     }
333 
334     /**
335      * Evaluate the {@code p}-th quantiles of the values.
336      *
337      * <p>Note: This method may partially sort the input values if not configured to
338      * {@link #withCopy(boolean) copy} the input data.
339      *
340      * @param values Values.
341      * @param p Probabilities for the quantiles to compute.
342      * @return the quantiles
343      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
344      * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
345      * @see #with(NaNPolicy)
346      */
347     public double[] evaluate(double[] values, double... p) {
348         return compute(values, 0, values.length, p);
349     }
350 
351     /**
352      * Evaluate the {@code p}-th quantiles of the specified range of values.
353      *
354      * <p>Note: This method may partially sort the input values if not configured to
355      * {@link #withCopy(boolean) copy} the input data.
356      *
357      * @param values Values.
358      * @param from Inclusive start of the range.
359      * @param to Exclusive end of the range.
360      * @param p Probabilities for the quantiles to compute.
361      * @return the quantiles
362      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
363      * no probabilities are specified; or if the values contain NaN and the configuration is {@link NaNPolicy#ERROR}
364      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
365      * @see #with(NaNPolicy)
366      * @since 1.2
367      */
368     public double[] evaluateRange(double[] values, int from, int to, double... p) {
369         Statistics.checkFromToIndex(from, to, values.length);
370         return compute(values, from, to, p);
371     }
372 
373     /**
374      * Compute the {@code p}-th quantiles of the specified range of values.
375      *
376      * @param values Values.
377      * @param from Inclusive start of the range.
378      * @param to Exclusive end of the range.
379      * @param p Probabilities for the quantiles to compute.
380      * @return the quantiles
381      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
382      * or no probabilities are specified.
383      */
384     private double[] compute(double[] values, int from, int to, double... p) {
385         checkProbabilities(p);
386         // Floating-point data handling
387         final int[] bounds = new int[2];
388         final double[] x = nanTransformer.apply(values, from, to, bounds);
389         final int start = bounds[0];
390         final int end = bounds[1];
391         final int n = end - start;
392         // Special cases
393         final double[] q = new double[p.length];
394         if (n <= 1) {
395             Arrays.fill(q, n == 0 ? Double.NaN : x[start]);
396             return q;
397         }
398 
399         // Collect interpolation positions. We use the output q as storage.
400         final int[] indices = computeIndices(n, p, q, start);
401 
402         // Partition
403         Selection.select(x, start, end, indices);
404 
405         // Compute
406         for (int k = 0; k < p.length; k++) {
407             // ip in [0, n); i in [start, end)
408             final int ip = (int) q[k];
409             final int i = start + ip;
410             if (q[k] > ip) {
411                 q[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
412             } else {
413                 q[k] = x[i];
414             }
415         }
416         return q;
417     }
418 
419     /**
420      * Evaluate the {@code p}-th quantile of the values.
421      *
422      * <p>Note: This method may partially sort the input values if not configured to
423      * {@link #withCopy(boolean) copy} the input data.
424      *
425      * <p><strong>Performance</strong>
426      *
427      * <p>It is not recommended to use this method for repeat calls for different quantiles
428      * within the same values. The {@link #evaluate(int[], double...)} method should be used
429      * which provides better performance.
430      *
431      * @param values Values.
432      * @param p Probability for the quantile to compute.
433      * @return the quantile
434      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
435      * @see #evaluate(int[], double...)
436      */
437     public double evaluate(int[] values, double p) {
438         return compute(values, 0, values.length, p);
439     }
440 
441     /**
442      * Evaluate the {@code p}-th quantile of the specified range of values.
443      *
444      * <p>Note: This method may partially sort the input values if not configured to
445      * {@link #withCopy(boolean) copy} the input data.
446      *
447      * <p><strong>Performance</strong>
448      *
449      * <p>It is not recommended to use this method for repeat calls for different quantiles
450      * within the same values. The {@link #evaluateRange(int[], int, int, double...)} method should be used
451      * which provides better performance.
452      *
453      * @param values Values.
454      * @param from Inclusive start of the range.
455      * @param to Exclusive end of the range.
456      * @param p Probability for the quantile to compute.
457      * @return the quantile
458      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
459      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
460      * @see #evaluateRange(int[], int, int, double...)
461      * @since 1.2
462      */
463     public double evaluateRange(int[] values, int from, int to, double p) {
464         Statistics.checkFromToIndex(from, to, values.length);
465         return compute(values, from, to, p);
466     }
467 
468     /**
469      * Compute the {@code p}-th quantile of the specified range of values.
470      *
471      * @param values Values.
472      * @param from Inclusive start of the range.
473      * @param to Exclusive end of the range.
474      * @param p Probability for the quantile to compute.
475      * @return the quantile
476      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
477      */
478     private double compute(int[] values, int from, int to, double p) {
479         checkProbability(p);
480         final int n = to - from;
481         // Special cases
482         if (n <= 1) {
483             return n == 0 ? Double.NaN : values[from];
484         }
485 
486         // Create the range
487         final int[] x;
488         final int start;
489         final int end;
490         if (copy) {
491             x = Statistics.copy(values, from, to);
492             start = 0;
493             end = n;
494         } else {
495             x = values;
496             start = from;
497             end = to;
498         }
499 
500         final double pos = estimationType.index(p, n);
501         final int ip = (int) pos;
502         final int i = start + ip;
503 
504         // Partition and compute
505         if (pos > ip) {
506             Selection.select(x, start, end, new int[] {i, i + 1});
507             return Interpolation.interpolate((double) x[i], (double) x[i + 1], pos - ip);
508         }
509         Selection.select(x, start, end, i);
510         return x[i];
511     }
512 
513     /**
514      * Evaluate the {@code p}-th quantiles of the values.
515      *
516      * <p>Note: This method may partially sort the input values if not configured to
517      * {@link #withCopy(boolean) copy} the input data.
518      *
519      * @param values Values.
520      * @param p Probabilities for the quantiles to compute.
521      * @return the quantiles
522      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
523      * or no probabilities are specified.
524      */
525     public double[] evaluate(int[] values, double... p) {
526         return compute(values, 0, values.length, p);
527     }
528 
529     /**
530      * Evaluate the {@code p}-th quantiles of the specified range of values..
531      *
532      * <p>Note: This method may partially sort the input values if not configured to
533      * {@link #withCopy(boolean) copy} the input data.
534      *
535      * @param values Values.
536      * @param from Inclusive start of the range.
537      * @param to Exclusive end of the range.
538      * @param p Probabilities for the quantiles to compute.
539      * @return the quantiles
540      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
541      * or no probabilities are specified.
542      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
543      * @since 1.2
544      */
545     public double[] evaluateRange(int[] values, int from, int to, double... p) {
546         Statistics.checkFromToIndex(from, to, values.length);
547         return compute(values, from, to, p);
548     }
549 
550     /**
551      * Evaluate the {@code p}-th quantiles of the specified range of values..
552      *
553      * <p>Note: This method may partially sort the input values if not configured to
554      * {@link #withCopy(boolean) copy} the input data.
555      *
556      * @param values Values.
557      * @param from Inclusive start of the range.
558      * @param to Exclusive end of the range.
559      * @param p Probabilities for the quantiles to compute.
560      * @return the quantiles
561      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
562      * or no probabilities are specified.
563      */
564     private double[] compute(int[] values, int from, int to, double... p) {
565         checkProbabilities(p);
566         final int n = to - from;
567         // Special cases
568         final double[] q = new double[p.length];
569         if (n <= 1) {
570             Arrays.fill(q, n == 0 ? Double.NaN : values[from]);
571             return q;
572         }
573 
574         // Create the range
575         final int[] x;
576         final int start;
577         final int end;
578         if (copy) {
579             x = Statistics.copy(values, from, to);
580             start = 0;
581             end = n;
582         } else {
583             x = values;
584             start = from;
585             end = to;
586         }
587 
588         // Collect interpolation positions. We use the output q as storage.
589         final int[] indices = computeIndices(n, p, q, start);
590 
591         // Partition
592         Selection.select(x, start, end, indices);
593 
594         // Compute
595         for (int k = 0; k < p.length; k++) {
596             // ip in [0, n); i in [start, end)
597             final int ip = (int) q[k];
598             final int i = start + ip;
599             if (q[k] > ip) {
600                 q[k] = Interpolation.interpolate((double) x[i], (double) x[i + 1], q[k] - ip);
601             } else {
602                 q[k] = x[i];
603             }
604         }
605         return q;
606     }
607 
608     /**
609      * Evaluate the {@code p}-th quantile of the values.
610      *
611      * <p>Note: This method may partially sort the input values if not configured to
612      * {@link #withCopy(boolean) copy} the input data.
613      *
614      * <p><strong>Performance</strong>
615      *
616      * <p>It is not recommended to use this method for repeat calls for different
617      * quantiles within the same values. The {@link #evaluate(long[], double...)} method
618      * should be used which provides better performance.
619      *
620      * @param values Values.
621      * @param p Probability for the quantile to compute.
622      * @return the quantile
623      * @throws IllegalArgumentException if the probability {@code p} is not in the range
624      * {@code [0, 1]}
625      * @see #evaluate(long[], double...)
626      * @since 1.3
627      */
628     public StatisticResult evaluate(long[] values, double p) {
629         return compute(values, 0, values.length, p);
630     }
631 
632     /**
633      * Evaluate the {@code p}-th quantile of the specified range of values.
634      *
635      * <p>Note: This method may partially sort the input values if not configured to
636      * {@link #withCopy(boolean) copy} the input data.
637      *
638      * <p><strong>Performance</strong>
639      *
640      * <p>It is not recommended to use this method for repeat calls for different quantiles
641      * within the same values. The {@link #evaluateRange(long[], int, int, double...)} method should be used
642      * which provides better performance.
643      *
644      * @param values Values.
645      * @param from Inclusive start of the range.
646      * @param to Exclusive end of the range.
647      * @param p Probability for the quantile to compute.
648      * @return the quantile
649      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
650      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
651      * @see #evaluateRange(long[], int, int, double...)
652      * @since 1.3
653      */
654     public StatisticResult evaluateRange(long[] values, int from, int to, double p) {
655         Statistics.checkFromToIndex(from, to, values.length);
656         return compute(values, from, to, p);
657     }
658 
659     /**
660      * Compute the {@code p}-th quantile of the specified range of values.
661      *
662      * @param values Values.
663      * @param from Inclusive start of the range.
664      * @param to Exclusive end of the range.
665      * @param p Probability for the quantile to compute.
666      * @return the quantile
667      * @throws IllegalArgumentException if the probability {@code p} is not in the range {@code [0, 1]}
668      * @since 1.3
669      */
670     private StatisticResult compute(long[] values, int from, int to, double p) {
671         checkProbability(p);
672         final int n = to - from;
673         // Special cases
674         if (n <= 1) {
675             return n == 0 ?
676                 () -> Double.NaN :
677                 Statistics.createStatisticResult(values[from]);
678         }
679 
680         // Create the range
681         final long[] x;
682         final int start;
683         final int end;
684         if (copy) {
685             x = Statistics.copy(values, from, to);
686             start = 0;
687             end = n;
688         } else {
689             x = values;
690             start = from;
691             end = to;
692         }
693 
694         final double pos = estimationType.index(p, n);
695         final int ip = (int) pos;
696         final int i = start + ip;
697 
698         // Partition and compute
699         if (pos > ip) {
700             Selection.select(x, start, end, new int[] {i, i + 1});
701             return Interpolation.interpolate(x[i], x[i + 1], pos - ip);
702         }
703         Selection.select(x, start, end, i);
704         return Statistics.createStatisticResult(x[i]);
705     }
706 
707     /**
708      * Evaluate the {@code p}-th quantiles of the values.
709      *
710      * <p>Note: This method may partially sort the input values if not configured to
711      * {@link #withCopy(boolean) copy} the input data.
712      *
713      * @param values Values.
714      * @param p Probabilities for the quantiles to compute.
715      * @return the quantiles
716      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
717      * or no probabilities are specified.
718      * @since 1.3
719      */
720     public StatisticResult[] evaluate(long[] values, double... p) {
721         return compute(values, 0, values.length, p);
722     }
723 
724     /**
725      * Evaluate the {@code p}-th quantiles of the specified range of values..
726      *
727      * <p>Note: This method may partially sort the input values if not configured to
728      * {@link #withCopy(boolean) copy} the input data.
729      *
730      * @param values Values.
731      * @param from Inclusive start of the range.
732      * @param to Exclusive end of the range.
733      * @param p Probabilities for the quantiles to compute.
734      * @return the quantiles
735      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
736      * or no probabilities are specified.
737      * @throws IndexOutOfBoundsException if the sub-range is out of bounds
738      * @since 1.3
739      */
740     public StatisticResult[] evaluateRange(long[] values, int from, int to, double... p) {
741         Statistics.checkFromToIndex(from, to, values.length);
742         return compute(values, from, to, p);
743     }
744 
745     /**
746      * Evaluate the {@code p}-th quantiles of the specified range of values..
747      *
748      * <p>Note: This method may partially sort the input values if not configured to
749      * {@link #withCopy(boolean) copy} the input data.
750      *
751      * @param values Values.
752      * @param from Inclusive start of the range.
753      * @param to Exclusive end of the range.
754      * @param p Probabilities for the quantiles to compute.
755      * @return the quantiles
756      * @throws IllegalArgumentException if any probability {@code p} is not in the range {@code [0, 1]};
757      * or no probabilities are specified.
758      */
759     private StatisticResult[] compute(long[] values, int from, int to, double... p) {
760         checkProbabilities(p);
761         final int n = to - from;
762         // Special cases
763         final StatisticResult[] result = new StatisticResult[p.length];
764         if (n <= 1) {
765             final StatisticResult r = n == 0 ?
766                 () -> Double.NaN :
767                 Statistics.createStatisticResult(values[from]);
768             Arrays.fill(result, r);
769             return result;
770         }
771 
772         // Create the range
773         final long[] x;
774         final int start;
775         final int end;
776         if (copy) {
777             x = Statistics.copy(values, from, to);
778             start = 0;
779             end = n;
780         } else {
781             x = values;
782             start = from;
783             end = to;
784         }
785 
786         // Collect interpolation positions
787         final double[] q = new double[p.length];
788         final int[] indices = computeIndices(n, p, q, start);
789 
790         // Partition
791         Selection.select(x, start, end, indices);
792 
793         // Compute
794         for (int k = 0; k < p.length; k++) {
795             // ip in [0, n); i in [start, end)
796             final int ip = (int) q[k];
797             final int i = start + ip;
798             if (q[k] > ip) {
799                 result[k] = Interpolation.interpolate(x[i], x[i + 1], q[k] - ip);
800             } else {
801                 result[k] = Statistics.createStatisticResult(x[i]);
802             }
803         }
804         return result;
805     }
806 
807     /**
808      * Evaluate the {@code p}-th quantile of the sorted values provided as a {@code double}.
809      *
810      * <p>This method can be used when the values of known size are already sorted.
811      * It can be used for primitive types not supported by other evaluation methods.
812      * Numeric types {@code byte}, {@code short} and {@code float} can be converted to
813      * type {@code double} without loss of precision.
814      *
815      * <pre>{@code
816      * short[] x = ...
817      * Arrays.sort(x);
818      * double q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.05);
819      * }</pre>
820      *
821      * <p>If the sorted array is a {@code long} datatype this method can lose information
822      * about the precision of the quantiles due to primitive type conversion. Use
823      * the method {@link #evaluateAsLong(int, IntToLongFunction, double)} to compute
824      * the {@code long} quantile result.
825      *
826      * @param n Size of the values.
827      * @param values Values function.
828      * @param p Probability for the quantile to compute.
829      * @return the quantile
830      * @throws IllegalArgumentException if {@code size < 0}; or if the probability {@code p} is
831      * not in the range {@code [0, 1]}.
832      * @see #evaluateAsLong(int, IntToLongFunction, double)
833      */
834     public double evaluate(int n, IntToDoubleFunction values, double p) {
835         checkSize(n);
836         checkProbability(p);
837         // Special case
838         if (n <= 1) {
839             return n == 0 ? Double.NaN : values.applyAsDouble(0);
840         }
841         final double pos = estimationType.index(p, n);
842         final int i = (int) pos;
843         final double v1 = values.applyAsDouble(i);
844         if (pos > i) {
845             final double v2 = values.applyAsDouble(i + 1);
846             return Interpolation.interpolate(v1, v2, pos - i);
847         }
848         return v1;
849     }
850 
851     /**
852      * Evaluate the {@code p}-th quantiles of the sorted values provided as a {@code double}.
853      *
854      * <p>This method can be used when the values of known size are already sorted.
855      * It can be used for primitive types not supported by other evaluation methods.
856      * Numeric types {@code byte}, {@code short} and {@code float} can be converted to
857      * type {@code double} without loss of precision.
858      *
859      * <pre>{@code
860      * short[] x = ...
861      * Arrays.sort(x);
862      * double[] q = Quantile.withDefaults().evaluate(x.length, i -> x[i], 0.25, 0.5, 0.75);
863      * }</pre>
864      *
865      * <p>If the sorted array is a {@code long} datatype this method can lose information
866      * about the precision of the quantiles due to primitive type conversion. Use
867      * the method {@link #evaluateAsLong(int, IntToLongFunction, double...)} to compute
868      * the {@code long} quantile result.
869      *
870      * @param n Size of the values.
871      * @param values Values function.
872      * @param p Probabilities for the quantiles to compute.
873      * @return the quantiles
874      * @throws IllegalArgumentException if {@code size < 0}; if any probability {@code p} is
875      * not in the range {@code [0, 1]}; or no probabilities are specified.
876      * @see #evaluateAsLong(int, IntToLongFunction, double...)
877      */
878     public double[] evaluate(int n, IntToDoubleFunction values, double... p) {
879         checkSize(n);
880         checkProbabilities(p);
881         // Special case
882         final double[] q = new double[p.length];
883         if (n <= 1) {
884             Arrays.fill(q, n == 0 ? Double.NaN : values.applyAsDouble(0));
885             return q;
886         }
887         for (int k = 0; k < p.length; k++) {
888             final double pos = estimationType.index(p[k], n);
889             final int i = (int) pos;
890             final double v1 = values.applyAsDouble(i);
891             if (pos > i) {
892                 final double v2 = values.applyAsDouble(i + 1);
893                 q[k] = Interpolation.interpolate(v1, v2, pos - i);
894             } else {
895                 q[k] = v1;
896             }
897         }
898         return q;
899     }
900 
901     /**
902      * Evaluate the {@code p}-th quantile of the sorted values provided as a {@code long}.
903      *
904      * <p>This method can be used when the values of known size are already sorted.
905      *
906      * <pre>{@code
907      * long[] x = ...
908      * Arrays.sort(x);
909      * StatisticResult q = Quantile.withDefaults()
910      *                             .evaluateAsLong(x.length, i -> x[i], 0.05);
911      * }</pre>
912      *
913      * <p>Note: It is not recommended to sort data for use only in the quantile computation.
914      * The {@link #evaluate(long[], double)} method will partially sort the data as required
915      * and in most cases will be more efficient.
916      *
917      * @param n Size of the values.
918      * @param values Values function.
919      * @param p Probability for the quantile to compute.
920      * @return the quantile
921      * @throws IllegalArgumentException if {@code size < 0}; or if the probability {@code p} is
922      * not in the range {@code [0, 1]}.
923      * @since 1.3
924      */
925     public StatisticResult evaluateAsLong(int n, IntToLongFunction values, double p) {
926         checkSize(n);
927         checkProbability(p);
928         // Special case
929         if (n <= 1) {
930             return n == 0 ?
931                 () -> Double.NaN :
932                 Statistics.createStatisticResult(values.applyAsLong(0));
933         }
934         final double pos = estimationType.index(p, n);
935         final int i = (int) pos;
936         final long v1 = values.applyAsLong(i);
937         if (pos > i) {
938             final long v2 = values.applyAsLong(i + 1);
939             return Interpolation.interpolate(v1, v2, pos - i);
940         }
941         return Statistics.createStatisticResult(v1);
942     }
943 
944     /**
945      * Evaluate the {@code p}-th quantiles of the sorted values provided as a {@code long}.
946      *
947      * <p>This method can be used when the values of known size are already sorted.
948      *
949      * <pre>{@code
950      * long[] x = ...
951      * Arrays.sort(x);
952      * StatisticResult[] q = Quantile.withDefaults()
953      *                               .evaluateAsLong(x.length, i -> x[i], 0.25, 0.5, 0.75);
954      * }</pre>
955      *
956      * <p>Note: It is not recommended to sort data for use only in the quantile computation.
957      * The {@link #evaluate(long[], double...)} method will partially sort the data as required
958      * and in most cases will be more efficient.
959      *
960      * @param n Size of the values.
961      * @param values Values function.
962      * @param p Probabilities for the quantiles to compute.
963      * @return the quantiles
964      * @throws IllegalArgumentException if {@code size < 0}; if any probability {@code p} is
965      * not in the range {@code [0, 1]}; or no probabilities are specified.
966      * @since 1.3
967      */
968     public StatisticResult[] evaluateAsLong(int n, IntToLongFunction values, double... p) {
969         checkSize(n);
970         checkProbabilities(p);
971         // Special case
972         final StatisticResult[] result = new StatisticResult[p.length];
973         if (n <= 1) {
974             final StatisticResult r = n == 0 ?
975                 () -> Double.NaN :
976                 Statistics.createStatisticResult(values.applyAsLong(0));
977             Arrays.fill(result, r);
978             return result;
979         }
980         for (int k = 0; k < p.length; k++) {
981             final double pos = estimationType.index(p[k], n);
982             final int i = (int) pos;
983             final long v1 = values.applyAsLong(i);
984             if (pos > i) {
985                 final long v2 = values.applyAsLong(i + 1);
986                 result[k] = Interpolation.interpolate(v1, v2, pos - i);
987             } else {
988                 result[k] = Statistics.createStatisticResult(v1);
989             }
990         }
991         return result;
992     }
993 
994     /**
995      * Check the probability {@code p} is in the range {@code [0, 1]}.
996      *
997      * @param p Probability for the quantile to compute.
998      * @throws IllegalArgumentException if the probability is not in the range {@code [0, 1]}
999      */
1000     private static void checkProbability(double p) {
1001         // Logic negation will detect NaN
1002         if (!(p >= 0 && p <= 1)) {
1003             throw new IllegalArgumentException(INVALID_PROBABILITY + p);
1004         }
1005     }
1006 
1007     /**
1008      * Check the probabilities {@code p} are in the range {@code [0, 1]}.
1009      *
1010      * @param p Probabilities for the quantiles to compute.
1011      * @throws IllegalArgumentException if any probabilities {@code p} is not in the range {@code [0, 1]};
1012      * or no probabilities are specified.
1013      */
1014     private static void checkProbabilities(double... p) {
1015         if (p.length == 0) {
1016             throw new IllegalArgumentException(NO_PROBABILITIES_SPECIFIED);
1017         }
1018         for (final double pp : p) {
1019             checkProbability(pp);
1020         }
1021     }
1022 
1023     /**
1024      * Check the {@code size} is positive.
1025      *
1026      * @param n Size of the values.
1027      * @throws IllegalArgumentException if {@code size < 0}
1028      */
1029     private static void checkSize(int n) {
1030         if (n < 0) {
1031             throw new IllegalArgumentException(INVALID_SIZE + n);
1032         }
1033     }
1034 
1035     /**
1036      * Check the number of probabilities {@code n} is strictly positive.
1037      *
1038      * @param n Number of probabilities.
1039      * @throws IllegalArgumentException if {@code c < 1}
1040      */
1041     private static void checkNumberOfProbabilities(int n) {
1042         if (n < 1) {
1043             throw new IllegalArgumentException(INVALID_NUMBER_OF_PROBABILITIES + n);
1044         }
1045     }
1046 
1047     /**
1048      * Compute the indices required for quantile interpolation.
1049      *
1050      * <p>The zero-based interpolation index in {@code [0, n)} is
1051      * saved into the working array {@code q} for each {@code p}.
1052      *
1053      * <p>The indices are incremented by the provided {@code offset} to allow
1054      * addressing sub-ranges of a larger array.
1055      *
1056      * @param n Size of the data.
1057      * @param p Probabilities for the quantiles to compute.
1058      * @param q Working array for quantiles in {@code [0, n)}.
1059      * @param offset Array offset.
1060      * @return the indices in {@code [offset, offset + n)}
1061      */
1062     private int[] computeIndices(int n, double[] p, double[] q, int offset) {
1063         final int[] indices = new int[p.length << 1];
1064         int count = 0;
1065         for (int k = 0; k < p.length; k++) {
1066             final double pos = estimationType.index(p[k], n);
1067             q[k] = pos;
1068             final int i = (int) pos;
1069             indices[count++] = offset + i;
1070             if (pos > i) {
1071                 // Require the next index for interpolation
1072                 indices[count++] = offset + i + 1;
1073             }
1074         }
1075         if (count < indices.length) {
1076             return Arrays.copyOf(indices, count);
1077         }
1078         return indices;
1079     }
1080 
1081     /**
1082      * Enumerates estimation methods for a quantile. Provides the nine quantile algorithms
1083      * defined in Hyndman and Fan (1996)[1] as {@code HF1 - HF9}.
1084      *
1085      * <p>Samples quantiles are defined by:
1086      *
1087      * <p>\[ Q(p) = (1 - \gamma) x_j + \gamma x_{j+1} \]
1088      *
1089      * <p>where \( \frac{j-m}{n} \leq p \le \frac{j-m+1}{n} \), \( x_j \) is the \( j \)th
1090      * order statistic, \( n \) is the sample size, the value of \( \gamma \) is a function
1091      * of \( j = \lfloor np+m \rfloor \) and \( g = np + m - j \), and \( m \) is a constant
1092      * determined by the sample quantile type.
1093      *
1094      * <p>Note that the real-valued position \( np + m \) is a 1-based index and
1095      * \( j \in [1, n] \). If the real valued position is computed as beyond the lowest or
1096      * highest values in the sample, this implementation will return the minimum or maximum
1097      * observation respectively.
1098      *
1099      * <p>Types 1, 2, and 3 are discontinuous functions of \( p \); types 4 to 9 are continuous
1100      * functions of \( p \).
1101      *
1102      * <p>For the continuous functions, the probability \( p_k \) is provided for the \( k \)-th order
1103      * statistic in size \( n \). Samples quantiles are equivalently obtained to \( Q(p) \) by
1104      * linear interpolation between points \( (p_k, x_k) \) and \( (p_{k+1}, x_{k+1}) \) for
1105      * any \( p_k \leq p \leq p_{k+1} \).
1106      *
1107      * <ol>
1108      * <li>Hyndman and Fan (1996)
1109      *     <i>Sample Quantiles in Statistical Packages.</i>
1110      *     The American Statistician, 50, 361-365.
1111      *     <a href="https://www.jstor.org/stable/2684934">doi.org/10.2307/2684934</a></li>
1112      * <li><a href="https://en.wikipedia.org/wiki/Quantile">Quantile (Wikipedia)</a></li>
1113      * </ol>
1114      */
1115     public enum EstimationMethod {
1116         /**
1117          * Inverse of the empirical distribution function.
1118          *
1119          * <p>\( m = 0 \). \( \gamma = 0 \) if \( g = 0 \), and 1 otherwise.
1120          */
1121         HF1 {
1122             @Override
1123             double position0(double p, int n) {
1124                 // position = np + 0. This is 1-based so adjust to 0-based.
1125                 return Math.ceil(n * p) - 1;
1126             }
1127         },
1128         /**
1129          * Similar to {@link #HF1} with averaging at discontinuities.
1130          *
1131          * <p>\( m = 0 \). \( \gamma = 0.5 \) if \( g = 0 \), and 1 otherwise.
1132          */
1133         HF2 {
1134             @Override
1135             double position0(double p, int n) {
1136                 final double pos = n * p;
1137                 // Average at discontinuities
1138                 final int j = (int) pos;
1139                 final double g = pos - j;
1140                 if (g == 0) {
1141                     return j - 0.5;
1142                 }
1143                 // As HF1 : ceil(j + g) - 1
1144                 return j;
1145             }
1146         },
1147         /**
1148          * The observation closest to \( np \). Ties are resolved to the nearest even order statistic.
1149          *
1150          * <p>\( m = -1/2 \). \( \gamma = 0 \) if \( g = 0 \) and \( j \) is even, and 1 otherwise.
1151          */
1152         HF3 {
1153             @Override
1154             double position0(double p, int n) {
1155                 // Let rint do the work for ties to even
1156                 return Math.rint(n * p) - 1;
1157             }
1158         },
1159         /**
1160          * Linear interpolation of the inverse of the empirical CDF.
1161          *
1162          * <p>\( m = 0 \). \( p_k = \frac{k}{n} \).
1163          */
1164         HF4 {
1165             @Override
1166             double position0(double p, int n) {
1167                 // np + 0 - 1
1168                 return n * p - 1;
1169             }
1170         },
1171         /**
1172          * A piecewise linear function where the knots are the values midway through the steps of
1173          * the empirical CDF. Proposed by Hazen (1914) and popular amongst hydrologists.
1174          *
1175          * <p>\( m = 1/2 \). \( p_k = \frac{k - 1/2}{n} \).
1176          */
1177         HF5 {
1178             @Override
1179             double position0(double p, int n) {
1180                 // np + 0.5 - 1
1181                 return n * p - 0.5;
1182             }
1183         },
1184         /**
1185          * Linear interpolation of the expectations for the order statistics for the uniform
1186          * distribution on [0,1]. Proposed by Weibull (1939).
1187          *
1188          * <p>\( m = p \). \( p_k = \frac{k}{n + 1} \).
1189          *
1190          * <p>This method computes the quantile as per the Apache Commons Math Percentile
1191          * legacy implementation.
1192          */
1193         HF6 {
1194             @Override
1195             double position0(double p, int n) {
1196                 // np + p - 1
1197                 return (n + 1) * p - 1;
1198             }
1199         },
1200         /**
1201          * Linear interpolation of the modes for the order statistics for the uniform
1202          * distribution on [0,1]. Proposed by Gumbull (1939).
1203          *
1204          * <p>\( m = 1 - p \). \( p_k = \frac{k - 1}{n - 1} \).
1205          */
1206         HF7 {
1207             @Override
1208             double position0(double p, int n) {
1209                 // np + 1-p - 1
1210                 return (n - 1) * p;
1211             }
1212         },
1213         /**
1214          * Linear interpolation of the approximate medians for order statistics.
1215          *
1216          * <p>\( m = (p + 1)/3 \). \( p_k = \frac{k - 1/3}{n + 1/3} \).
1217          *
1218          * <p>As per Hyndman and Fan (1996) this approach is most recommended as it provides
1219          * an approximate median-unbiased estimate regardless of distribution.
1220          */
1221         HF8 {
1222             @Override
1223             double position0(double p, int n) {
1224                 return n * p + (p + 1) / 3 - 1;
1225             }
1226         },
1227         /**
1228          * Quantile estimates are approximately unbiased for the expected order statistics if
1229          * \( x \) is normally distributed.
1230          *
1231          * <p>\( m = p/4 + 3/8 \). \( p_k = \frac{k - 3/8}{n + 1/4} \).
1232          */
1233         HF9 {
1234             @Override
1235             double position0(double p, int n) {
1236                 // np + p/4 + 3/8 - 1
1237                 return (n + 0.25) * p - 0.625;
1238             }
1239         };
1240 
1241         /**
1242          * Finds the real-valued position for calculation of the quantile.
1243          *
1244          * <p>Return {@code i + g} such that the quantile value from sorted data is:
1245          *
1246          * <p>value = data[i] + g * (data[i+1] - data[i])
1247          *
1248          * <p>Warning: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
1249          *
1250          * <p>Note: In contrast to the definition of Hyndman and Fan in the class header
1251          * which uses a 1-based position, this is a zero based index. This change is for
1252          * convenience when addressing array positions.
1253          *
1254          * @param p p<sup>th</sup> quantile.
1255          * @param n Size.
1256          * @return a real-valued position (0-based) into the range {@code [0, n)}
1257          */
1258         abstract double position0(double p, int n);
1259 
1260         /**
1261          * Finds the index {@code i} and fractional part {@code g} of a real-valued position
1262          * to interpolate the quantile.
1263          *
1264          * <p>Return {@code i + g} such that the quantile value from sorted data is:
1265          *
1266          * <p>value = data[i] + g * (data[i+1] - data[i])
1267          *
1268          * <p>Note: Interpolation should not use {@code data[i+1]} unless {@code g != 0}.
1269          *
1270          * @param p p<sup>th</sup> quantile.
1271          * @param n Size.
1272          * @return index (in [0, n-1])
1273          */
1274         final double index(double p, int n) {
1275             final double pos = position0(p, n);
1276             // Bounds check in [0, n-1]
1277             if (pos < 0) {
1278                 return 0;
1279             }
1280             if (pos > n - 1.0) {
1281                 return n - 1.0;
1282             }
1283             return pos;
1284         }
1285     }
1286 }