##// END OF EJS Templates
Creates method to compute automatic thresholds
Alexandre Leroux -
r1015:bd0d48a48271
parent child
Show More
@@ -3,6 +3,7
3 3
4 4 #include "CoreGlobal.h"
5 5
6 #include <Common/SortUtils.h>
6 7 #include <Data/DataSeriesIterator.h>
7 8
8 9 #include <QLoggingCategory>
@@ -197,6 +198,27 struct SCIQLOP_CORE_EXPORT DataSeriesUtils {
197 198 */
198 199 static Mesh regularMesh(DataSeriesIterator begin, DataSeriesIterator end,
199 200 Resolution xResolution, Resolution yResolution);
201
202 /**
203 * Calculates the min and max thresholds of a dataset.
204 *
205 * The thresholds of a dataset correspond to the min and max limits of the set to which the
206 * outliers are exluded (values distant from the others) For example, for the set [1, 2, 3, 4,
207 * 5, 10000], 10000 is an outlier and will be excluded from the thresholds.
208 *
209 * Bounds determining the thresholds is calculated according to the mean and the standard
210 * deviation of the defined data. The thresholds are limited to the min / max values of the
211 * dataset: if for example the calculated min threshold is 2 but the min value of the datasetset
212 * is 4, 4 is returned as the min threshold.
213 *
214 * @param begin the beginning of the dataset
215 * @param end the end of the dataset
216 * @param logarithmic computes threshold with a logarithmic scale or not
217 * @return the thresholds computed, a couple of nan values if it couldn't be computed
218 */
219 template <typename Iterator>
220 static std::pair<double, double> thresholds(Iterator begin, Iterator end,
221 bool logarithmic = false);
200 222 };
201 223
202 224 template <typename Iterator>
@@ -225,4 +247,71 DataSeriesUtils::Resolution DataSeriesUtils::resolution(Iterator begin, Iterator
225 247 return Resolution{resolution, logarithmic};
226 248 }
227 249
250 template <typename Iterator>
251 std::pair<double, double> DataSeriesUtils::thresholds(Iterator begin, Iterator end,
252 bool logarithmic)
253 {
254 /// Lambda that converts values in case of logaritmic scale
255 auto toLog = [logarithmic](const auto &value) {
256 if (logarithmic) {
257 // Logaritmic scale doesn't include zero value
258 return !(std::isnan(value) || value < std::numeric_limits<double>::epsilon())
259 ? std::log10(value)
260 : std::numeric_limits<double>::quiet_NaN();
261 }
262 else {
263 return value;
264 }
265 };
266
267 /// Lambda that converts values to linear scale
268 auto fromLog
269 = [logarithmic](const auto &value) { return logarithmic ? std::pow(10, value) : value; };
270
271 /// Lambda used to sum data and divide the sum by the number of data. It is used to calculate
272 /// the mean and standard deviation
273 /// @param fun the data addition function
274 auto accumulate = [begin, end](auto fun) {
275 double sum;
276 int nbValues;
277 std::tie(sum, nbValues) = std::accumulate(
278 begin, end, std::make_pair(0., 0), [fun](const auto &input, const auto &value) {
279 auto computedValue = fun(value);
280
281 // NaN values are excluded from the sum
282 return !std::isnan(computedValue)
283 ? std::make_pair(input.first + computedValue, input.second + 1)
284 : input;
285 });
286
287 return nbValues != 0 ? sum / nbValues : std::numeric_limits<double>::quiet_NaN();
288 };
289
290 // Computes mean
291 auto mean = accumulate([toLog](const auto &val) { return toLog(val); });
292 if (std::isnan(mean)) {
293 return {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()};
294 }
295
296 // Computes standard deviation
297 auto variance
298 = accumulate([mean, toLog](const auto &val) { return std::pow(toLog(val) - mean, 2); });
299 auto sigma = std::sqrt(variance);
300
301 // Computes thresholds
302 auto minThreshold = fromLog(mean - 3 * sigma);
303 auto maxThreshold = fromLog(mean + 3 * sigma);
304
305 // Finds min/max values
306 auto minIt = std::min_element(begin, end, [toLog](const auto &it1, const auto &it2) {
307 return SortUtils::minCompareWithNaN(toLog(it1), toLog(it2));
308 });
309 auto maxIt = std::max_element(begin, end, [toLog](const auto &it1, const auto &it2) {
310 return SortUtils::maxCompareWithNaN(toLog(it1), toLog(it2));
311 });
312
313 // Returns thresholds (bounded to min/max values)
314 return {std::max(*minIt, minThreshold), std::min(*maxIt, maxThreshold)};
315 }
316
228 317 #endif // SCIQLOP_DATASERIESUTILS_H
General Comments 0
You need to be logged in to leave comments. Login now