7#include "robuststatistics.h"
10namespace Mathematics::RobustStatistics
12using namespace Mathematics::GSLHelpers;
21template<
typename Base =
double>
22double Pn_from_sorted_data(
const Base sorted_data[],
28template<
typename Base>
29double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
30 const std::vector<Base> &data,
33 auto const size = data.size();
34 auto const theData = data.data();
40 auto work = std::make_unique<double[]>(size);
41 auto const adjustedMad = 9.0 * gslMAD(theData, stride, size, work.get());
42 auto const median = gslMedianFromSortedData(theData, stride, size);
44 auto const begin = Mathematics::GSLHelpers::make_strided_iter(data.cbegin(), stride);
45 auto const end = Mathematics::GSLHelpers::make_strided_iter(data.cend(), stride);
47 auto ys = std::vector<Base>(size / stride);
48 std::transform(begin, end, ys.begin(), [ = ](Base x)
50 return (x - median) / adjustedMad;
52 auto const indicator = [](Base y)
54 return std::fabs(y) < 1 ? 1 : 0;
56 auto const top = std::transform_reduce(begin, end, ys.begin(), 0, std::plus{},
59 return indicator(y) * pow(x - median, 2) * pow(1 - pow(y, 2), 4);
61 auto const bottomSum = std::transform_reduce(begin, end, 0, std::plus{},
64 return indicator(y) * (1.0 - pow(y, 2)) * (1.0 - 5.0 * pow(y, 2));
67 auto const bottom = bottomSum * (bottomSum - 1);
69 return (size / stride) * (top / bottom);
74 auto work = std::make_unique<double[]>(size);
75 return gslMAD(theData, stride, size, work.get());
77 case SCALE_SESTIMATOR:
79 auto work = std::make_unique<Base[]>(size);
80 return gslSnFromSortedData(theData, stride, size, work.get());
82 case SCALE_QESTIMATOR:
84 auto work = std::make_unique<Base[]>(3 * size);
85 auto intWork = std::make_unique<int[]>(5 * size);
87 return gslQnFromSortedData(theData, stride, size, work.get(), intWork.get());
89 case SCALE_PESTIMATOR:
91 auto work = std::make_unique<Base[]>(3 * size);
92 auto intWork = std::make_unique<int[]>(5 * size);
94 return Pn_from_sorted_data(theData, stride, size, work.get(), intWork.get());
99 return gslVariance(theData, stride, size);
104template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
105 const std::vector<double> &data,
106 const size_t stride);
107template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
108 const std::vector<float> &data,
109 const size_t stride);
110template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
111 const std::vector<uint8_t> &data,
112 const size_t stride);
113template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
114 const std::vector<uint16_t> &data,
115 const size_t stride);
116template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
117 const std::vector<int16_t> &data,
118 const size_t stride);
119template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
120 const std::vector<uint32_t> &data,
121 const size_t stride);
122template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
123 const std::vector<int32_t> &data,
124 const size_t stride);
125#ifdef GSLHELPERS_INT64
126template double ComputeScaleFromSortedData(
const ScaleCalculation scaleMethod,
127 const std::vector<int64_t> &data,
128 const size_t stride);
131template<
typename Base>
double ComputeLocationFromSortedData(
132 const LocationCalculation locationMethod,
133 const std::vector<Base> &data,
134 double trimAmount,
const size_t stride)
136 auto const size = data.size();
137 auto const theData = data.data();
139 switch (locationMethod)
141 case LOCATION_MEDIAN:
143 return gslMedianFromSortedData(theData, stride, size);
145 case LOCATION_TRIMMEDMEAN:
147 return gslTrimmedMeanFromSortedData(trimAmount, theData, stride, size);
149 case LOCATION_GASTWIRTH:
151 return gslGastwirthMeanFromSortedData(theData, stride, size);
154 case LOCATION_SIGMACLIPPING:
156 auto const median = gslMedianFromSortedData(theData, stride, size);
159 auto const stddev = gslStandardDeviation(theData, stride, size);
160 auto begin = GSLHelpers::make_strided_iter(data.cbegin(), stride);
161 auto end = GSLHelpers::make_strided_iter(data.cend(), stride);
164 auto lower = std::lower_bound(begin, end, (median - stddev * trimAmount));
165 auto upper = std::upper_bound(lower, end, (median + stddev * trimAmount));
168 auto sum = std::reduce(lower, upper);
169 const int num_remaining = std::distance(lower, upper);
170 if (num_remaining > 0)
return sum / num_remaining;
177 return gslMean(theData, stride, size);
182template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
183 const std::vector<double> &data,
184 double trimAmount,
const size_t stride);
185template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
186 const std::vector<float> &data,
187 double trimAmount,
const size_t stride);
188template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
189 const std::vector<uint8_t> &data,
190 double trimAmount,
const size_t stride);
191template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
192 const std::vector<uint16_t> &data,
193 double trimAmount,
const size_t stride);
194template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
195 const std::vector<int16_t> &data,
196 double trimAmount,
const size_t stride);
197template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
198 const std::vector<uint32_t> &data,
199 double trimAmount,
const size_t stride);
200template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
201 const std::vector<int32_t> &data,
202 double trimAmount,
const size_t stride);
203#ifdef GSLHELPERS_INT64
204template double ComputeLocationFromSortedData(
const LocationCalculation scaleMethod,
205 const std::vector<int64_t> &data,
206 double trimAmount,
const size_t stride);
210SampleStatistics ComputeSampleStatistics(std::vector<double> data,
211 const RobustStatistics::LocationCalculation locationMethod,
212 const RobustStatistics::ScaleCalculation scaleMethod,
216 std::sort(data.begin(), data.end());
217 double location = RobustStatistics::ComputeLocationFromSortedData(locationMethod, data, trimAmount, stride);
218 double scale = RobustStatistics::ComputeScaleFromSortedData(scaleMethod, data, stride);
219 double weight = RobustStatistics::ConvertScaleToWeight(scaleMethod, scale);
220 return RobustStatistics::SampleStatistics{
location, scale, weight};
234template<
typename Base =
double> Base whimed(Base* a,
int * w,
int n, Base* a_cand, Base* a_srt,
int * w_cand)
238 int64_t wleft, wmid, wright, w_tot, wrest;
243 for (i = 0; i < n; ++i)
251 for (i = 0; i < n; ++i)
256 gslSort(a_srt, 1, n);
264 for (i = 0; i < n; ++i)
268 else if (a[i] > trial)
279 if (2 * (wrest + wleft) > w_tot)
281 for (i = 0; i < n; ++i)
285 a_cand[kcand] = a[i];
286 w_cand[kcand] = w[i];
291 else if (2 * (wrest + wleft + wmid) <= w_tot)
293 for (i = 0; i < n; ++i)
297 a_cand[kcand] = a[i];
298 w_cand[kcand] = w[i];
303 wrest += wleft + wmid;
311 for (i = 0; i < n; ++i)
335template<
typename Base =
double>
double Pn0k_from_sorted_data(
const Base sorted_data[],
342 const int ni = (int) n;
343 Base * a_srt = &work[n];
344 Base * a_cand = &work[2 * n];
346 int *
left = &work_int[0];
347 int *
right = &work_int[n];
348 int *p = &work_int[2 * n];
349 int *q = &work_int[3 * n];
350 int *weight = &work_int[4 * n];
352 Base trial = (Base)0.0;
358 int64_t knew, nl, nr, sump, sumq;
366 for (i = 0; i < ni; ++i)
368 left[i] = ni - i + 1;
369 right[i] = (i <= h) ? ni : ni - (i - h);
374 nl = (int64_t)n * (n + 1) / 2;
379 while (!found && nr - nl > ni)
383 for (i = 1; i < ni; ++i)
385 if (left[i] <= right[i])
388 jh =
left[i] + weight[j] / 2;
389 work[j] = (sorted_data[i * stride] + sorted_data[(ni - jh) * stride]) / 2 ;
394 trial = whimed(work, weight, j, a_cand, a_srt, p);
397 for (i = ni - 1; i >= 0; --i)
399 while (j < ni && ((
double)(sorted_data[i * stride] + sorted_data[(ni - j - 1) * stride])) / 2 < trial)
406 for (i = 0; i < ni; ++i)
408 while ((
double)(sorted_data[i * stride] + sorted_data[(ni - j + 1) * stride]) / 2 > trial)
417 for (i = 0; i < ni; ++i)
425 for (i = 0; i < ni; ++i)
430 else if (knew > sumq)
432 for (i = 0; i < ni; ++i)
450 for (i = 1; i < ni; ++i)
454 for (jj = left[i]; jj <=
right[i]; ++jj)
456 work[j] = (sorted_data[i * stride] + sorted_data[(ni - jj) * stride]) / 2;
472template<
typename Base>
473double Pn_from_sorted_data(
const Base sorted_data[],
480 return sqrt(gslVariance(sorted_data, stride, n));
482 double upper = Pn0k_from_sorted_data(sorted_data, (3.0 / 4.0), stride, n, work, work_int);
483 double lower = Pn0k_from_sorted_data(sorted_data, (1.0 / 4.0), stride, n, work, work_int);
485 const double asymptoticCorrection = 1 / 0.9539;
487 auto const asymptoticEstimate = asymptoticCorrection * (upper - lower);
489 static double correctionFactors[] = {1.128, 1.303, 1.109, 1.064, 1.166, 1.103, 1.087, 1.105, 1.047, 1.063, 1.057,
490 1.040, 1.061, 1.047, 1.043, 1.048, 1.031, 1.037, 1.035, 1.028, 1.036, 1.030,
491 1.029, 1.032, 1.023, 1.025, 1.024, 1.021, 1.026, 1.022, 1.021, 1.023, 1.018,
492 1.020, 1.019, 1.017, 1.020, 1.018, 1.017, 1.018, 1.015, 1.016, 1.016, 1.014,
493 1.016, 1.015, 1.014, 1.015
498 return asymptoticEstimate * correctionFactors[n - 2];
502 return asymptoticEstimate * (n / (n - 0.7));
QVariant location(const QVariant &res)
const QList< QKeySequence > & begin()
const QList< QKeySequence > & end()
QTextStream & left(QTextStream &stream)
QTextStream & right(QTextStream &stream)