HG_REPOSITORIES/LPP/SciQLOP_Repos/SciQLop Commit - r1015:bd0d48a48271

             #ifndef SCIQLOP_DATASERIESUTILS_H
             #define SCIQLOP_DATASERIESUTILS_H
             #include "CoreGlobal.h"
+            #include <Common/SortUtils.h>
             #include <Data/DataSeriesIterator.h>
             #include <QLoggingCategory>
             #include <cmath>
             Q_DECLARE_LOGGING_CATEGORY(LOG_DataSeriesUtils)
             /**
              * Utility class with methods for data series
              */
             struct SCIQLOP_CORE_EXPORT DataSeriesUtils {
                 /**
                  * Define a meshs.
                  *
                  * A mesh is a regular grid representing cells of the same width (in x) and of the same height
                  * (in y). At each mesh point is associated a value.
                  *
                  * Each axis of the mesh is defined by a minimum value, a number of values is a mesh step.
                  * For example: if min = 1, nbValues = 5 and step = 2 => the axis of the mesh will be [1, 3, 5,
                  * 7, 9].
                  *
                  * The values are defined in an array of size {nbX * nbY}. The data is stored along the X axis.
                  *
                  * For example, the mesh:
                  * Y = 2 [  7   ;   8   ;   9
                  * Y = 1    4   ;   5   ;   6
                  * Y = 0    1   ;   2   ;   3   ]
                  *        X = 0   X = 1   X = 2
                  *
                  * will be represented by data [1, 2, 3, 4, 5, 6, 7, 8, 9]
                  */
                 struct Mesh {
                     explicit Mesh() = default;
                     explicit Mesh(int nbX, double xMin, double xStep, int nbY, double yMin, double yStep)
                             : m_NbX{nbX},
                               m_XMin{xMin},
                               m_XStep{xStep},
                               m_NbY{nbY},
                               m_YMin{yMin},
                               m_YStep{yStep},
                               m_Data(nbX * nbY)
                     {
                     }
                     inline bool isEmpty() const { return m_Data.size() == 0; }
                     inline double xMax() const { return m_XMin + (m_NbX - 1) * m_XStep; }
                     inline double yMax() const { return m_YMin + (m_NbY - 1) * m_YStep; }
                     int m_NbX{0};
                     double m_XMin{};
                     double m_XStep{};
                     int m_NbY{0};
                     double m_YMin{};
                     double m_YStep{};
                     std::vector<double> m_Data{};
                 };
                 /**
                  * Represents a resolution used to generate the data of a mesh on the x-axis or in Y.
                  *
                  * A resolution is represented by a value and flag indicating if it's in the logarithmic scale
                  * @sa Mesh
                  */
                 struct Resolution {
                     double m_Val{std::numeric_limits<double>::quiet_NaN()};
                     bool m_Logarithmic{false};
                 };
                 /**
                  * Processes data from a data series to complete the data holes with a fill value.
                  *
                  * A data hole is determined by the resolution passed in parameter: if, between two continuous
                  * data on the x-axis, the difference between these data is greater than the resolution, then
                  * there is one or more holes between them. The holes are filled by adding:
                  * - for the x-axis, new data corresponding to the 'step resolution' starting from the first
                  * data;
                  * - for values, a default value (fill value) for each new data added on the x-axis.
                  *
                  * For example, with :
                  * - xAxisData =  [0,    1,    5,    7,    14  ]
                  * - valuesData = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] (two components per x-axis data)
                  * - fillValue = NaN
                  * - and resolution = 2;
                  *
                  * For the x axis, we calculate as data holes: [3, 9, 11, 13]. These holes are added to the
                  * x-axis data, and NaNs (two per x-axis data) are added to the values:
                  * => xAxisData =  [0,    1,    3,        5,    7,    9,        11,       13,       14  ]
                  * => valuesData = [0, 1, 2, 3, NaN, NaN, 4, 5, 6, 7, NaN, NaN, NaN, NaN, NaN, NaN, 8, 9]
                  *
                  * It is also possible to set bounds for the data series. If these bounds are defined and exceed
                  * the limits of the data series, data holes are added to the series at the beginning and/or the
                  * end.
                  *
                  * The generation of data holes at the beginning/end of the data series is performed starting
                  * from the x-axis series limit and adding data holes at each 'resolution step' as long as the
                  * new bound is not reached.
                  *
                  * For example, with :
                  * - xAxisData =  [3,    4,    5,    6,    7  ]
                  * - valuesData = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
                  * - fillValue = NaN
                  * - minBound = 0
                  * - maxBound = 12
                  * - and resolution = 2;
                  *
                  * => Starting from 3 and decreasing 2 by 2 until reaching 0 : a data hole at value 1 will be
                  * added to the beginning of the series
                  * => Starting from 7 and increasing 2 by 2 until reaching 12 : data holes at values 9 and 11
                  * will be added to the end of the series
                  *
                  * So :
                  * => xAxisData =  [1,        3,    4,    5,    6,    7,    9,        11      ]
                  * => valuesData = [NaN, NaN, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NaN, NaN, NaN, NaN]
                  *
                  * @param xAxisData the x-axis data of the data series
                  * @param valuesData the values data of the data series
                  * @param resolution the resoultion (on x-axis) used to determinate data holes
                  * @param fillValue the fill value used for data holes in the values data
                  * @param minBound the limit at which to start filling data holes for the series. If set to NaN,
                  * the limit is not used
                  * @param maxBound the limit at which to end filling data holes for the series. If set to NaN,
                  * the limit is not used
                  *
                  * @remarks There is no control over the consistency between x-axis data and values data. The
                  * method considers that the data is well formed (the total number of values data is a multiple
                  * of the number of x-axis data)
                  */
                 static void fillDataHoles(std::vector<double> &xAxisData, std::vector<double> &valuesData,
                                           double resolution,
                                           double fillValue = std::numeric_limits<double>::quiet_NaN(),
                                           double minBound = std::numeric_limits<double>::quiet_NaN(),
                                           double maxBound = std::numeric_limits<double>::quiet_NaN());
                 /**
                  * Computes the resolution of a dataset passed as a parameter.
                  *
                  * The resolution of a dataset is the minimum difference between two values that follow in the
                  * set.
                  * For example:
                  * - for the set [0, 2, 4, 8, 10, 11, 13] => the resolution is 1 (difference between 10 and 11).
                  *
                  * A resolution can be calculated on the logarithmic scale (base of 10). In this case, the
                  * dataset is first converted to logarithmic values.
                  * For example:
                  * - for the set [10, 100, 10000, 1000000], the values are converted to [1, 2, 4, 6] => the
                  * logarithmic resolution is 1 (difference between 1 and 2).
                  *
                  * @param begin the iterator pointing to the beginning of the dataset
                  * @param end the iterator pointing to the end of the dataset
                  * @param logarithmic computes a logarithmic resolution or not
                  * @return the resolution computed
                  * @warning the method considers the dataset as sorted and doesn't control it.
                  */
                 template <typename Iterator>
                 static Resolution resolution(Iterator begin, Iterator end, bool logarithmic = false);
                 /**
                  * Computes a regular mesh for a data series, according to resolutions for x-axis and y-axis
                  * passed as parameters.
                  *
                  * The mesh is created from the resolutions in x and y and the boundaries delimiting the data
                  * series. If the resolutions do not allow to obtain a regular mesh, they are recalculated.
                  *
                  * For example :
                  * Let x-axis data = [0, 1, 3, 5, 9], its associated values = [0, 10, 30, 50, 90] and
                  * xResolution = 2.
                  * Based on the resolution, the mesh would be [0, 2, 4, 6, 8, 10] and would be invalid because
                  * it exceeds the maximum bound of the data. The resolution is thus recalculated so that the
                  * mesh holds between the data terminals.
                  * So => resolution is 1.8 and the mesh is [0, 1.8, 3.6, 5.4, 7.2, 9].
                  *
                  * Once the mesh is generated in x and y, the values are associated with each mesh point,
                  * based on the data in the series, finding the existing data at which the mesh point would be
                  * or would be closest to, without exceeding it.
                  *
                  * In the example, we determine the value of each mesh point:
                  * - x = 0 => value = 0 (existing x in the data series)
                  * - x = 1.8 => value = 10 (the closest existing x: 1)
                  * - x = 3.6 => value = 30 (the closest existing x: 3)
                  * - x = 5.4 => value = 50 (the closest existing x: 5)
                  * - x = 7.2 => value = 50 (the closest existing x: 5)
                  * - x = 9 => value = 90 (existing x in the data series)
                  *
                  * Same algorithm is applied for y-axis.
                  *
                  * @param begin the iterator pointing to the beginning of the data series
                  * @param end the iterator pointing to the end of the data series
                  * @param xResolution the resolution expected for the mesh's x-axis
                  * @param yResolution the resolution expected for the mesh's y-axis
                  * @return the mesh created, an empty mesh if the input data do not allow to generate a regular
                  * mesh (empty data, null resolutions, logarithmic x-axis)
                  * @warning the method considers the dataset as sorted and doesn't control it.
                  */
                 static Mesh regularMesh(DataSeriesIterator begin, DataSeriesIterator end,
                                         Resolution xResolution, Resolution yResolution);
+                /**
+                 * Calculates the min and max thresholds of a dataset.
+                 *
+                 * The thresholds of a dataset correspond to the min and max limits of the set to which the
+                 * outliers are exluded (values distant from the others) For example, for the set [1, 2, 3, 4,
+                 * 5, 10000], 10000 is an outlier and will be excluded from the thresholds.
+                 *
+                 * Bounds determining the thresholds is calculated according to the mean and the standard
+                 * deviation of the defined data. The thresholds are limited to the min / max values of the
+                 * dataset: if for example the calculated min threshold is 2 but the min value of the datasetset
+                 * is 4, 4 is returned as the min threshold.
+                 *
+                 * @param begin the beginning of the dataset
+                 * @param end the end of the dataset
+                 * @param logarithmic computes threshold with a logarithmic scale or not
+                 * @return the thresholds computed, a couple of nan values if it couldn't be computed
+                 */
+                template <typename Iterator>
+                static std::pair<double, double> thresholds(Iterator begin, Iterator end,
+                                                            bool logarithmic = false);
             };
             template <typename Iterator>
             DataSeriesUtils::Resolution DataSeriesUtils::resolution(Iterator begin, Iterator end,
                                                                     bool logarithmic)
             {
                 // Retrieves data into a work dataset
                 using ValueType = typename Iterator::value_type;
                 std::vector<ValueType> values{};
                 std::copy(begin, end, std::back_inserter(values));
                 // Converts data if logarithmic flag is activated
                 if (logarithmic) {
                     std::for_each(values.begin(), values.end(),
                                   [logarithmic](auto &val) { val = std::log10(val); });
                 }
                 // Computes the differences between the values in the dataset
                 std::adjacent_difference(values.begin(), values.end(), values.begin());
                 // Retrieves the smallest difference
                 auto resolutionIt = std::min_element(values.begin(), values.end());
                 auto resolution
                     = resolutionIt != values.end() ? *resolutionIt : std::numeric_limits<double>::quiet_NaN();
                 return Resolution{resolution, logarithmic};
             }
+            template <typename Iterator>
+            std::pair<double, double> DataSeriesUtils::thresholds(Iterator begin, Iterator end,
+                                                                  bool logarithmic)
+            {
+                /// Lambda that converts values in case of logaritmic scale
+                auto toLog = [logarithmic](const auto &value) {
+                    if (logarithmic) {
+                        // Logaritmic scale doesn't include zero value
+                        return !(std::isnan(value) || value < std::numeric_limits<double>::epsilon())
+                                   ? std::log10(value)
+                                   : std::numeric_limits<double>::quiet_NaN();
+                    }
+                    else {
+                        return value;
+                    }
+                };
+                /// Lambda that converts values to linear scale
+                auto fromLog
+                    = [logarithmic](const auto &value) { return logarithmic ? std::pow(10, value) : value; };
+                /// Lambda used to sum data and divide the sum by the number of data. It is used to calculate
+                /// the mean and standard deviation
+                /// @param fun the data addition function
+                auto accumulate = [begin, end](auto fun) {
+                    double sum;
+                    int nbValues;
+                    std::tie(sum, nbValues) = std::accumulate(
+                        begin, end, std::make_pair(0., 0), [fun](const auto &input, const auto &value) {
+                            auto computedValue = fun(value);
+                            // NaN values are excluded from the sum
+                            return !std::isnan(computedValue)
+                                       ? std::make_pair(input.first + computedValue, input.second + 1)
+                                       : input;
+                        });
+                    return nbValues != 0 ? sum / nbValues : std::numeric_limits<double>::quiet_NaN();
+                };
+                // Computes mean
+                auto mean = accumulate([toLog](const auto &val) { return toLog(val); });
+                if (std::isnan(mean)) {
+                    return {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()};
+                }
+                // Computes standard deviation
+                auto variance
+                    = accumulate([mean, toLog](const auto &val) { return std::pow(toLog(val) - mean, 2); });
+                auto sigma = std::sqrt(variance);
+                // Computes thresholds
+                auto minThreshold = fromLog(mean - 3 * sigma);
+                auto maxThreshold = fromLog(mean + 3 * sigma);
+                // Finds min/max values
+                auto minIt = std::min_element(begin, end, [toLog](const auto &it1, const auto &it2) {
+                    return SortUtils::minCompareWithNaN(toLog(it1), toLog(it2));
+                });
+                auto maxIt = std::max_element(begin, end, [toLog](const auto &it1, const auto &it2) {
+                    return SortUtils::maxCompareWithNaN(toLog(it1), toLog(it2));
+                });
+                // Returns thresholds (bounded to min/max values)
+                return {std::max(*minIt, minThreshold), std::min(*maxIt, maxThreshold)};
+            }
             #endif // SCIQLOP_DATASERIESUTILS_H

1

#ifndef SCIQLOP_DATASERIESUTILS_H

1

#ifndef SCIQLOP_DATASERIESUTILS_H

2

#define SCIQLOP_DATASERIESUTILS_H

2

#define SCIQLOP_DATASERIESUTILS_H

3

4

#include "CoreGlobal.h"

4

#include "CoreGlobal.h"

5

6

#include <Common/SortUtils.h>

6

#include <Data/DataSeriesIterator.h>

7

#include <Data/DataSeriesIterator.h>

7

8

#include <QLoggingCategory>

9

#include <QLoggingCategory>

9

#include <cmath>

10

#include <cmath>

10

11

Q_DECLARE_LOGGING_CATEGORY(LOG_DataSeriesUtils)

12

Q_DECLARE_LOGGING_CATEGORY(LOG_DataSeriesUtils)

12

13

/**

14

/**

14

* Utility class with methods for data series

15

* Utility class with methods for data series

15

*/

16

*/

16

struct SCIQLOP_CORE_EXPORT DataSeriesUtils {

17

struct SCIQLOP_CORE_EXPORT DataSeriesUtils {

17

/**

18

/**

18

* Define a meshs.

19

* Define a meshs.

19

*

20

*

20

* A mesh is a regular grid representing cells of the same width (in x) and of the same height

21

* A mesh is a regular grid representing cells of the same width (in x) and of the same height

21

* (in y). At each mesh point is associated a value.

22

* (in y). At each mesh point is associated a value.

22

*

23

*

23

* Each axis of the mesh is defined by a minimum value, a number of values is a mesh step.

24

* Each axis of the mesh is defined by a minimum value, a number of values is a mesh step.

24

* For example: if min = 1, nbValues = 5 and step = 2 => the axis of the mesh will be [1, 3, 5,

25

* For example: if min = 1, nbValues = 5 and step = 2 => the axis of the mesh will be [1, 3, 5,

25

* 7, 9].

26

* 7, 9].

26

*

27

*

27

* The values are defined in an array of size {nbX * nbY}. The data is stored along the X axis.

28

* The values are defined in an array of size {nbX * nbY}. The data is stored along the X axis.

28

*

29

*

29

* For example, the mesh:

30

* For example, the mesh:

30

* Y = 2 [ 7 ; 8 ; 9

31

* Y = 2 [ 7 ; 8 ; 9

31

* Y = 1 4 ; 5 ; 6

32

* Y = 1 4 ; 5 ; 6

32

* Y = 0 1 ; 2 ; 3 ]

33

* Y = 0 1 ; 2 ; 3 ]

33

* X = 0 X = 1 X = 2

34

* X = 0 X = 1 X = 2

34

*

35

*

35

* will be represented by data [1, 2, 3, 4, 5, 6, 7, 8, 9]

36

* will be represented by data [1, 2, 3, 4, 5, 6, 7, 8, 9]

36

*/

37

*/

37

struct Mesh {

38

struct Mesh {

38

explicit Mesh() = default;

39

explicit Mesh() = default;

39

explicit Mesh(int nbX, double xMin, double xStep, int nbY, double yMin, double yStep)

40

explicit Mesh(int nbX, double xMin, double xStep, int nbY, double yMin, double yStep)

40

: m_NbX{nbX},

41

: m_NbX{nbX},

41

m_XMin{xMin},

42

m_XMin{xMin},

42

m_XStep{xStep},

43

m_XStep{xStep},

43

m_NbY{nbY},

44

m_NbY{nbY},

44

m_YMin{yMin},

45

m_YMin{yMin},

45

m_YStep{yStep},

46

m_YStep{yStep},

46

m_Data(nbX * nbY)

47

m_Data(nbX * nbY)

47

{

48

{

48

}

49

}

49

50

inline bool isEmpty() const { return m_Data.size() == 0; }

51

inline bool isEmpty() const { return m_Data.size() == 0; }

51

inline double xMax() const { return m_XMin + (m_NbX - 1) * m_XStep; }

52

inline double xMax() const { return m_XMin + (m_NbX - 1) * m_XStep; }

52

inline double yMax() const { return m_YMin + (m_NbY - 1) * m_YStep; }

53

inline double yMax() const { return m_YMin + (m_NbY - 1) * m_YStep; }

53

54

int m_NbX{0};

55

int m_NbX{0};

55

double m_XMin{};

56

double m_XMin{};

56

double m_XStep{};

57

double m_XStep{};

57

int m_NbY{0};

58

int m_NbY{0};

58

double m_YMin{};

59

double m_YMin{};

59

double m_YStep{};

60

double m_YStep{};

60

std::vector<double> m_Data{};

61

std::vector<double> m_Data{};

61

};

62

};

62

63

/**

64

/**

64

* Represents a resolution used to generate the data of a mesh on the x-axis or in Y.

65

* Represents a resolution used to generate the data of a mesh on the x-axis or in Y.

65

*

66

*

66

* A resolution is represented by a value and flag indicating if it's in the logarithmic scale

67

* A resolution is represented by a value and flag indicating if it's in the logarithmic scale

67

* @sa Mesh

68

* @sa Mesh

68

*/

69

*/

69

struct Resolution {

70

struct Resolution {

70

double m_Val{std::numeric_limits<double>::quiet_NaN()};

71

double m_Val{std::numeric_limits<double>::quiet_NaN()};

71

bool m_Logarithmic{false};

72

bool m_Logarithmic{false};

72

};

73

};

73

74

/**

75

/**

75

* Processes data from a data series to complete the data holes with a fill value.

76

* Processes data from a data series to complete the data holes with a fill value.

76

*

77

*

77

* A data hole is determined by the resolution passed in parameter: if, between two continuous

78

* A data hole is determined by the resolution passed in parameter: if, between two continuous

78

* data on the x-axis, the difference between these data is greater than the resolution, then

79

* data on the x-axis, the difference between these data is greater than the resolution, then

79

* there is one or more holes between them. The holes are filled by adding:

80

* there is one or more holes between them. The holes are filled by adding:

80

* - for the x-axis, new data corresponding to the 'step resolution' starting from the first

81

* - for the x-axis, new data corresponding to the 'step resolution' starting from the first

81

* data;

82

* data;

82

* - for values, a default value (fill value) for each new data added on the x-axis.

83

* - for values, a default value (fill value) for each new data added on the x-axis.

83

*

84

*

84

* For example, with :

85

* For example, with :

85

* - xAxisData = [0, 1, 5, 7, 14 ]

86

* - xAxisData = [0, 1, 5, 7, 14 ]

86

* - valuesData = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] (two components per x-axis data)

87

* - valuesData = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] (two components per x-axis data)

87

* - fillValue = NaN

88

* - fillValue = NaN

88

* - and resolution = 2;

89

* - and resolution = 2;

89

*

90

*

90

* For the x axis, we calculate as data holes: [3, 9, 11, 13]. These holes are added to the

91

* For the x axis, we calculate as data holes: [3, 9, 11, 13]. These holes are added to the

91

* x-axis data, and NaNs (two per x-axis data) are added to the values:

92

* x-axis data, and NaNs (two per x-axis data) are added to the values:

92

* => xAxisData = [0, 1, 3, 5, 7, 9, 11, 13, 14 ]

93

* => xAxisData = [0, 1, 3, 5, 7, 9, 11, 13, 14 ]

93

* => valuesData = [0, 1, 2, 3, NaN, NaN, 4, 5, 6, 7, NaN, NaN, NaN, NaN, NaN, NaN, 8, 9]

94

* => valuesData = [0, 1, 2, 3, NaN, NaN, 4, 5, 6, 7, NaN, NaN, NaN, NaN, NaN, NaN, 8, 9]

94

*

95

*

95

* It is also possible to set bounds for the data series. If these bounds are defined and exceed

96

* It is also possible to set bounds for the data series. If these bounds are defined and exceed

96

* the limits of the data series, data holes are added to the series at the beginning and/or the

97

* the limits of the data series, data holes are added to the series at the beginning and/or the

97

* end.

98

* end.

98

*

99

*

99

* The generation of data holes at the beginning/end of the data series is performed starting

100

* The generation of data holes at the beginning/end of the data series is performed starting

100

* from the x-axis series limit and adding data holes at each 'resolution step' as long as the

101

* from the x-axis series limit and adding data holes at each 'resolution step' as long as the

101

* new bound is not reached.

102

* new bound is not reached.

102

*

103

*

103

* For example, with :

104

* For example, with :

104

* - xAxisData = [3, 4, 5, 6, 7 ]

105

* - xAxisData = [3, 4, 5, 6, 7 ]

105

* - valuesData = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

106

* - valuesData = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

106

* - fillValue = NaN

107

* - fillValue = NaN

107

* - minBound = 0

108

* - minBound = 0

108

* - maxBound = 12

109

* - maxBound = 12

109

* - and resolution = 2;

110

* - and resolution = 2;

110

*

111

*

111

* => Starting from 3 and decreasing 2 by 2 until reaching 0 : a data hole at value 1 will be

112

* => Starting from 3 and decreasing 2 by 2 until reaching 0 : a data hole at value 1 will be

112

* added to the beginning of the series

113

* added to the beginning of the series

113

* => Starting from 7 and increasing 2 by 2 until reaching 12 : data holes at values 9 and 11

114

* => Starting from 7 and increasing 2 by 2 until reaching 12 : data holes at values 9 and 11

114

* will be added to the end of the series

115

* will be added to the end of the series

115

*

116

*

116

* So :

117

* So :

117

* => xAxisData = [1, 3, 4, 5, 6, 7, 9, 11 ]

118

* => xAxisData = [1, 3, 4, 5, 6, 7, 9, 11 ]

118

* => valuesData = [NaN, NaN, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NaN, NaN, NaN, NaN]

119

* => valuesData = [NaN, NaN, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, NaN, NaN, NaN, NaN]

119

*

120

*

120

* @param xAxisData the x-axis data of the data series

121

* @param xAxisData the x-axis data of the data series

121

* @param valuesData the values data of the data series

122

* @param valuesData the values data of the data series

122

* @param resolution the resoultion (on x-axis) used to determinate data holes

123

* @param resolution the resoultion (on x-axis) used to determinate data holes

123

* @param fillValue the fill value used for data holes in the values data

124

* @param fillValue the fill value used for data holes in the values data

124

* @param minBound the limit at which to start filling data holes for the series. If set to NaN,

125

* @param minBound the limit at which to start filling data holes for the series. If set to NaN,

125

* the limit is not used

126

* the limit is not used

126

* @param maxBound the limit at which to end filling data holes for the series. If set to NaN,

127

* @param maxBound the limit at which to end filling data holes for the series. If set to NaN,

127

* the limit is not used

128

* the limit is not used

128

*

129

*

129

* @remarks There is no control over the consistency between x-axis data and values data. The

130

* @remarks There is no control over the consistency between x-axis data and values data. The

130

* method considers that the data is well formed (the total number of values data is a multiple

131

* method considers that the data is well formed (the total number of values data is a multiple

131

* of the number of x-axis data)

132

* of the number of x-axis data)

132

*/

133

*/

133

static void fillDataHoles(std::vector<double> &xAxisData, std::vector<double> &valuesData,

134

static void fillDataHoles(std::vector<double> &xAxisData, std::vector<double> &valuesData,

134

double resolution,

135

double resolution,

135

double fillValue = std::numeric_limits<double>::quiet_NaN(),

136

double fillValue = std::numeric_limits<double>::quiet_NaN(),

136

double minBound = std::numeric_limits<double>::quiet_NaN(),

137

double minBound = std::numeric_limits<double>::quiet_NaN(),

137

double maxBound = std::numeric_limits<double>::quiet_NaN());

138

double maxBound = std::numeric_limits<double>::quiet_NaN());

138

/**

139

/**

139

* Computes the resolution of a dataset passed as a parameter.

140

* Computes the resolution of a dataset passed as a parameter.

140

*

141

*

141

* The resolution of a dataset is the minimum difference between two values that follow in the

142

* The resolution of a dataset is the minimum difference between two values that follow in the

142

* set.

143

* set.

143

* For example:

144

* For example:

144

* - for the set [0, 2, 4, 8, 10, 11, 13] => the resolution is 1 (difference between 10 and 11).

145

* - for the set [0, 2, 4, 8, 10, 11, 13] => the resolution is 1 (difference between 10 and 11).

145

*

146

*

146

* A resolution can be calculated on the logarithmic scale (base of 10). In this case, the

147

* A resolution can be calculated on the logarithmic scale (base of 10). In this case, the

147

* dataset is first converted to logarithmic values.

148

* dataset is first converted to logarithmic values.

148

* For example:

149

* For example:

149

* - for the set [10, 100, 10000, 1000000], the values are converted to [1, 2, 4, 6] => the

150

* - for the set [10, 100, 10000, 1000000], the values are converted to [1, 2, 4, 6] => the

150

* logarithmic resolution is 1 (difference between 1 and 2).

151

* logarithmic resolution is 1 (difference between 1 and 2).

151

*

152

*

152

* @param begin the iterator pointing to the beginning of the dataset

153

* @param begin the iterator pointing to the beginning of the dataset

153

* @param end the iterator pointing to the end of the dataset

154

* @param end the iterator pointing to the end of the dataset

154

* @param logarithmic computes a logarithmic resolution or not

155

* @param logarithmic computes a logarithmic resolution or not

155

* @return the resolution computed

156

* @return the resolution computed

156

* @warning the method considers the dataset as sorted and doesn't control it.

157

* @warning the method considers the dataset as sorted and doesn't control it.

157

*/

158

*/

158

template <typename Iterator>

159

template <typename Iterator>

159

static Resolution resolution(Iterator begin, Iterator end, bool logarithmic = false);

160

static Resolution resolution(Iterator begin, Iterator end, bool logarithmic = false);

160

161

/**

162

/**

162

* Computes a regular mesh for a data series, according to resolutions for x-axis and y-axis

163

* Computes a regular mesh for a data series, according to resolutions for x-axis and y-axis

163

* passed as parameters.

164

* passed as parameters.

164

*

165

*

165

* The mesh is created from the resolutions in x and y and the boundaries delimiting the data

166

* The mesh is created from the resolutions in x and y and the boundaries delimiting the data

166

* series. If the resolutions do not allow to obtain a regular mesh, they are recalculated.

167

* series. If the resolutions do not allow to obtain a regular mesh, they are recalculated.

167

*

168

*

168

* For example :

169

* For example :

169

* Let x-axis data = [0, 1, 3, 5, 9], its associated values = [0, 10, 30, 50, 90] and

170

* Let x-axis data = [0, 1, 3, 5, 9], its associated values = [0, 10, 30, 50, 90] and

170

* xResolution = 2.

171

* xResolution = 2.

171

* Based on the resolution, the mesh would be [0, 2, 4, 6, 8, 10] and would be invalid because

172

* Based on the resolution, the mesh would be [0, 2, 4, 6, 8, 10] and would be invalid because

172

* it exceeds the maximum bound of the data. The resolution is thus recalculated so that the

173

* it exceeds the maximum bound of the data. The resolution is thus recalculated so that the

173

* mesh holds between the data terminals.

174

* mesh holds between the data terminals.

174

* So => resolution is 1.8 and the mesh is [0, 1.8, 3.6, 5.4, 7.2, 9].

175

* So => resolution is 1.8 and the mesh is [0, 1.8, 3.6, 5.4, 7.2, 9].

175

*

176

*

176

* Once the mesh is generated in x and y, the values are associated with each mesh point,

177

* Once the mesh is generated in x and y, the values are associated with each mesh point,

177

* based on the data in the series, finding the existing data at which the mesh point would be

178

* based on the data in the series, finding the existing data at which the mesh point would be

178

* or would be closest to, without exceeding it.

179

* or would be closest to, without exceeding it.

179

*

180

*

180

* In the example, we determine the value of each mesh point:

181

* In the example, we determine the value of each mesh point:

181

* - x = 0 => value = 0 (existing x in the data series)

182

* - x = 0 => value = 0 (existing x in the data series)

182

* - x = 1.8 => value = 10 (the closest existing x: 1)

183

* - x = 1.8 => value = 10 (the closest existing x: 1)

183

* - x = 3.6 => value = 30 (the closest existing x: 3)

184

* - x = 3.6 => value = 30 (the closest existing x: 3)

184

* - x = 5.4 => value = 50 (the closest existing x: 5)

185

* - x = 5.4 => value = 50 (the closest existing x: 5)

185

* - x = 7.2 => value = 50 (the closest existing x: 5)

186

* - x = 7.2 => value = 50 (the closest existing x: 5)

186

* - x = 9 => value = 90 (existing x in the data series)

187

* - x = 9 => value = 90 (existing x in the data series)

187

*

188

*

188

* Same algorithm is applied for y-axis.

189

* Same algorithm is applied for y-axis.

189

*

190

*

190

* @param begin the iterator pointing to the beginning of the data series

191

* @param begin the iterator pointing to the beginning of the data series

191

* @param end the iterator pointing to the end of the data series

192

* @param end the iterator pointing to the end of the data series

192

* @param xResolution the resolution expected for the mesh's x-axis

193

* @param xResolution the resolution expected for the mesh's x-axis

193

* @param yResolution the resolution expected for the mesh's y-axis

194

* @param yResolution the resolution expected for the mesh's y-axis

194

* @return the mesh created, an empty mesh if the input data do not allow to generate a regular

195

* @return the mesh created, an empty mesh if the input data do not allow to generate a regular

195

* mesh (empty data, null resolutions, logarithmic x-axis)

196

* mesh (empty data, null resolutions, logarithmic x-axis)

196

* @warning the method considers the dataset as sorted and doesn't control it.

197

* @warning the method considers the dataset as sorted and doesn't control it.

197

*/

198

*/

198

static Mesh regularMesh(DataSeriesIterator begin, DataSeriesIterator end,

199

static Mesh regularMesh(DataSeriesIterator begin, DataSeriesIterator end,

199

Resolution xResolution, Resolution yResolution);

200

Resolution xResolution, Resolution yResolution);

201

202

/**

203

* Calculates the min and max thresholds of a dataset.

204

*

205

* The thresholds of a dataset correspond to the min and max limits of the set to which the

206

* outliers are exluded (values distant from the others) For example, for the set [1, 2, 3, 4,

207

* 5, 10000], 10000 is an outlier and will be excluded from the thresholds.

208

*

209

* Bounds determining the thresholds is calculated according to the mean and the standard

210

* deviation of the defined data. The thresholds are limited to the min / max values of the

211

* dataset: if for example the calculated min threshold is 2 but the min value of the datasetset

212

* is 4, 4 is returned as the min threshold.

213

*

214

* @param begin the beginning of the dataset

215

* @param end the end of the dataset

216

* @param logarithmic computes threshold with a logarithmic scale or not

217

* @return the thresholds computed, a couple of nan values if it couldn't be computed

218

*/

219

template <typename Iterator>

220

static std::pair<double, double> thresholds(Iterator begin, Iterator end,

221

bool logarithmic = false);

200

};

222

};

201

223

202

template <typename Iterator>

224

template <typename Iterator>

203

DataSeriesUtils::Resolution DataSeriesUtils::resolution(Iterator begin, Iterator end,

225

DataSeriesUtils::Resolution DataSeriesUtils::resolution(Iterator begin, Iterator end,

204

bool logarithmic)

226

bool logarithmic)

205

{

227

{

206

// Retrieves data into a work dataset

228

// Retrieves data into a work dataset

207

using ValueType = typename Iterator::value_type;

229

using ValueType = typename Iterator::value_type;

208

std::vector<ValueType> values{};

230

std::vector<ValueType> values{};

209

std::copy(begin, end, std::back_inserter(values));

231

std::copy(begin, end, std::back_inserter(values));

210

232

211

// Converts data if logarithmic flag is activated

233

// Converts data if logarithmic flag is activated

212

if (logarithmic) {

234

if (logarithmic) {

213

std::for_each(values.begin(), values.end(),

235

std::for_each(values.begin(), values.end(),

214

[logarithmic](auto &val) { val = std::log10(val); });

236

[logarithmic](auto &val) { val = std::log10(val); });

215

}

237

}

216

238

217

// Computes the differences between the values in the dataset

239

// Computes the differences between the values in the dataset

218

std::adjacent_difference(values.begin(), values.end(), values.begin());

240

std::adjacent_difference(values.begin(), values.end(), values.begin());

219

241

220

// Retrieves the smallest difference

242

// Retrieves the smallest difference

221

auto resolutionIt = std::min_element(values.begin(), values.end());

243

auto resolutionIt = std::min_element(values.begin(), values.end());

222

auto resolution

244

auto resolution

223

= resolutionIt != values.end() ? *resolutionIt : std::numeric_limits<double>::quiet_NaN();

245

= resolutionIt != values.end() ? *resolutionIt : std::numeric_limits<double>::quiet_NaN();

224

246

225

return Resolution{resolution, logarithmic};

247

return Resolution{resolution, logarithmic};

226

}

248

}

227

249

250

template <typename Iterator>

251

std::pair<double, double> DataSeriesUtils::thresholds(Iterator begin, Iterator end,

252

bool logarithmic)

253

{

254

/// Lambda that converts values in case of logaritmic scale

255

auto toLog = [logarithmic](const auto &value) {

256

if (logarithmic) {

257

// Logaritmic scale doesn't include zero value

258

return !(std::isnan(value) || value < std::numeric_limits<double>::epsilon())

259

? std::log10(value)

260

: std::numeric_limits<double>::quiet_NaN();

261

}

262

else {

263

return value;

264

}

265

};

266

267

/// Lambda that converts values to linear scale

268

auto fromLog

269

= [logarithmic](const auto &value) { return logarithmic ? std::pow(10, value) : value; };

270

271

/// Lambda used to sum data and divide the sum by the number of data. It is used to calculate

272

/// the mean and standard deviation

273

/// @param fun the data addition function

274

auto accumulate = [begin, end](auto fun) {

275

double sum;

276

int nbValues;

277

std::tie(sum, nbValues) = std::accumulate(

278

begin, end, std::make_pair(0., 0), [fun](const auto &input, const auto &value) {

279

auto computedValue = fun(value);

280

281

// NaN values are excluded from the sum

282

return !std::isnan(computedValue)

283

? std::make_pair(input.first + computedValue, input.second + 1)

284

: input;

285

});

286

287

return nbValues != 0 ? sum / nbValues : std::numeric_limits<double>::quiet_NaN();

288

};

289

290

// Computes mean

291

auto mean = accumulate([toLog](const auto &val) { return toLog(val); });

292

if (std::isnan(mean)) {

293

return {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()};

294

}

295

296

// Computes standard deviation

297

auto variance

298

= accumulate([mean, toLog](const auto &val) { return std::pow(toLog(val) - mean, 2); });

299

auto sigma = std::sqrt(variance);

300

301

// Computes thresholds

302

auto minThreshold = fromLog(mean - 3 * sigma);

303

auto maxThreshold = fromLog(mean + 3 * sigma);

304

305

// Finds min/max values

306

auto minIt = std::min_element(begin, end, [toLog](const auto &it1, const auto &it2) {

307

return SortUtils::minCompareWithNaN(toLog(it1), toLog(it2));

308

});

309

auto maxIt = std::max_element(begin, end, [toLog](const auto &it1, const auto &it2) {

310

return SortUtils::maxCompareWithNaN(toLog(it1), toLog(it2));

311

});

312

313

// Returns thresholds (bounded to min/max values)

314

return {std::max(*minIt, minThreshold), std::min(*maxIt, maxThreshold)};

315

}

316

228

#endif // SCIQLOP_DATASERIESUTILS_H

317

#endif // SCIQLOP_DATASERIESUTILS_H

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages