@@ -3,6 +3,7 | |||||
3 |
|
3 | |||
4 | #include "CoreGlobal.h" |
|
4 | #include "CoreGlobal.h" | |
5 |
|
5 | |||
|
6 | #include <Common/SortUtils.h> | |||
6 | #include <Data/DataSeriesIterator.h> |
|
7 | #include <Data/DataSeriesIterator.h> | |
7 |
|
8 | |||
8 | #include <QLoggingCategory> |
|
9 | #include <QLoggingCategory> | |
@@ -197,6 +198,27 struct SCIQLOP_CORE_EXPORT DataSeriesUtils { | |||||
197 | */ |
|
198 | */ | |
198 | static Mesh regularMesh(DataSeriesIterator begin, DataSeriesIterator end, |
|
199 | static Mesh regularMesh(DataSeriesIterator begin, DataSeriesIterator end, | |
199 | Resolution xResolution, Resolution yResolution); |
|
200 | Resolution xResolution, Resolution yResolution); | |
|
201 | ||||
|
202 | /** | |||
|
203 | * Calculates the min and max thresholds of a dataset. | |||
|
204 | * | |||
|
205 | * The thresholds of a dataset correspond to the min and max limits of the set to which the | |||
|
206 | * outliers are exluded (values distant from the others) For example, for the set [1, 2, 3, 4, | |||
|
207 | * 5, 10000], 10000 is an outlier and will be excluded from the thresholds. | |||
|
208 | * | |||
|
209 | * Bounds determining the thresholds is calculated according to the mean and the standard | |||
|
210 | * deviation of the defined data. The thresholds are limited to the min / max values of the | |||
|
211 | * dataset: if for example the calculated min threshold is 2 but the min value of the datasetset | |||
|
212 | * is 4, 4 is returned as the min threshold. | |||
|
213 | * | |||
|
214 | * @param begin the beginning of the dataset | |||
|
215 | * @param end the end of the dataset | |||
|
216 | * @param logarithmic computes threshold with a logarithmic scale or not | |||
|
217 | * @return the thresholds computed, a couple of nan values if it couldn't be computed | |||
|
218 | */ | |||
|
219 | template <typename Iterator> | |||
|
220 | static std::pair<double, double> thresholds(Iterator begin, Iterator end, | |||
|
221 | bool logarithmic = false); | |||
200 | }; |
|
222 | }; | |
201 |
|
223 | |||
202 | template <typename Iterator> |
|
224 | template <typename Iterator> | |
@@ -225,4 +247,71 DataSeriesUtils::Resolution DataSeriesUtils::resolution(Iterator begin, Iterator | |||||
225 | return Resolution{resolution, logarithmic}; |
|
247 | return Resolution{resolution, logarithmic}; | |
226 | } |
|
248 | } | |
227 |
|
249 | |||
|
250 | template <typename Iterator> | |||
|
251 | std::pair<double, double> DataSeriesUtils::thresholds(Iterator begin, Iterator end, | |||
|
252 | bool logarithmic) | |||
|
253 | { | |||
|
254 | /// Lambda that converts values in case of logaritmic scale | |||
|
255 | auto toLog = [logarithmic](const auto &value) { | |||
|
256 | if (logarithmic) { | |||
|
257 | // Logaritmic scale doesn't include zero value | |||
|
258 | return !(std::isnan(value) || value < std::numeric_limits<double>::epsilon()) | |||
|
259 | ? std::log10(value) | |||
|
260 | : std::numeric_limits<double>::quiet_NaN(); | |||
|
261 | } | |||
|
262 | else { | |||
|
263 | return value; | |||
|
264 | } | |||
|
265 | }; | |||
|
266 | ||||
|
267 | /// Lambda that converts values to linear scale | |||
|
268 | auto fromLog | |||
|
269 | = [logarithmic](const auto &value) { return logarithmic ? std::pow(10, value) : value; }; | |||
|
270 | ||||
|
271 | /// Lambda used to sum data and divide the sum by the number of data. It is used to calculate | |||
|
272 | /// the mean and standard deviation | |||
|
273 | /// @param fun the data addition function | |||
|
274 | auto accumulate = [begin, end](auto fun) { | |||
|
275 | double sum; | |||
|
276 | int nbValues; | |||
|
277 | std::tie(sum, nbValues) = std::accumulate( | |||
|
278 | begin, end, std::make_pair(0., 0), [fun](const auto &input, const auto &value) { | |||
|
279 | auto computedValue = fun(value); | |||
|
280 | ||||
|
281 | // NaN values are excluded from the sum | |||
|
282 | return !std::isnan(computedValue) | |||
|
283 | ? std::make_pair(input.first + computedValue, input.second + 1) | |||
|
284 | : input; | |||
|
285 | }); | |||
|
286 | ||||
|
287 | return nbValues != 0 ? sum / nbValues : std::numeric_limits<double>::quiet_NaN(); | |||
|
288 | }; | |||
|
289 | ||||
|
290 | // Computes mean | |||
|
291 | auto mean = accumulate([toLog](const auto &val) { return toLog(val); }); | |||
|
292 | if (std::isnan(mean)) { | |||
|
293 | return {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()}; | |||
|
294 | } | |||
|
295 | ||||
|
296 | // Computes standard deviation | |||
|
297 | auto variance | |||
|
298 | = accumulate([mean, toLog](const auto &val) { return std::pow(toLog(val) - mean, 2); }); | |||
|
299 | auto sigma = std::sqrt(variance); | |||
|
300 | ||||
|
301 | // Computes thresholds | |||
|
302 | auto minThreshold = fromLog(mean - 3 * sigma); | |||
|
303 | auto maxThreshold = fromLog(mean + 3 * sigma); | |||
|
304 | ||||
|
305 | // Finds min/max values | |||
|
306 | auto minIt = std::min_element(begin, end, [toLog](const auto &it1, const auto &it2) { | |||
|
307 | return SortUtils::minCompareWithNaN(toLog(it1), toLog(it2)); | |||
|
308 | }); | |||
|
309 | auto maxIt = std::max_element(begin, end, [toLog](const auto &it1, const auto &it2) { | |||
|
310 | return SortUtils::maxCompareWithNaN(toLog(it1), toLog(it2)); | |||
|
311 | }); | |||
|
312 | ||||
|
313 | // Returns thresholds (bounded to min/max values) | |||
|
314 | return {std::max(*minIt, minThreshold), std::min(*maxIt, maxThreshold)}; | |||
|
315 | } | |||
|
316 | ||||
228 | #endif // SCIQLOP_DATASERIESUTILS_H |
|
317 | #endif // SCIQLOP_DATASERIESUTILS_H |
General Comments 0
You need to be logged in to leave comments.
Login now