|
| 1 | +" |
| 2 | +I am a collection of data that statistical measures can be applied to. |
| 3 | + |
| 4 | +My input collection can be anything but most measures need numerical data to work properly. |
| 5 | +
|
| 6 | +Create me by calling new and then calling my `data:` message |
| 7 | +
|
| 8 | + sample := PMStatisticalSample new |
| 9 | + sample data: #(1 2 3 4 5) |
| 10 | +
|
| 11 | +or using my `newFrom:` class message |
| 12 | +
|
| 13 | + sample := PMStatisticalSample newFrom: #(1 2 3 4 5) |
| 14 | +
|
| 15 | +Now you can get statistical measures of the data using any of the following messages |
| 16 | +
|
| 17 | + sample mode |
| 18 | + sample geometricMean |
| 19 | + sample harmonicMean |
| 20 | +
|
| 21 | +If you only want one statistical measure for a collection you can use my class methods |
| 22 | +
|
| 23 | + gMean := PMStatisticalSample geometricMean: #(1 2 3 4 5) |
| 24 | + hMean := PMStatisticalSample harmonicMean: #(1 2 3 4 5) |
| 25 | + mode := PMStatisticalSample mode: #(1 2 3 4 5) |
| 26 | +
|
| 27 | +
|
| 28 | +" |
| 29 | +Class { |
| 30 | + #name : #PMStatisticalSample, |
| 31 | + #superclass : #Object, |
| 32 | + #instVars : [ |
| 33 | + 'data' |
| 34 | + ], |
| 35 | + #category : #'Math-Statistics' |
| 36 | +} |
| 37 | + |
| 38 | +{ #category : #information } |
| 39 | +PMStatisticalSample class >> geometricMean: aCollection [ |
| 40 | + "Calculate the geometric mean of a collection. This shortcut method will create a new instance of PMStatisticalSample and return the desired metric." |
| 41 | + |
| 42 | + ^ (self newFrom: aCollection) geometricMean |
| 43 | +] |
| 44 | + |
| 45 | +{ #category : #information } |
| 46 | +PMStatisticalSample class >> harmonicMean: aCollection [ |
| 47 | + "Calculate the harmonic mean of a collection. This shortcut method will create a new instance of PMStatisticalSample and return the desired metric." |
| 48 | + |
| 49 | + ^ (self newFrom: aCollection) harmonicMean |
| 50 | +] |
| 51 | + |
| 52 | +{ #category : #information } |
| 53 | +PMStatisticalSample class >> mode: aCollection [ |
| 54 | + "Calculate the mode of a collection. This shortcut method will create a new instance of PMStatisticalSample and return the desired metric." |
| 55 | + |
| 56 | + ^ (self newFrom: aCollection) mode |
| 57 | +] |
| 58 | + |
| 59 | +{ #category : #'instance creation' } |
| 60 | +PMStatisticalSample class >> newFrom: aCollection [ |
| 61 | + "Create a new PMStatisticalSample with aCollection as the data" |
| 62 | + |
| 63 | + | ss | |
| 64 | + ss := self new. |
| 65 | + ss data: aCollection. |
| 66 | + ^ ss |
| 67 | +] |
| 68 | + |
| 69 | +{ #category : #accessing } |
| 70 | +PMStatisticalSample >> data [ |
| 71 | + "Get the collection that this StatisticalSample is calculated against" |
| 72 | + |
| 73 | + ^data |
| 74 | +] |
| 75 | + |
| 76 | +{ #category : #accessing } |
| 77 | +PMStatisticalSample >> data: aCollection [ |
| 78 | + "Set the collection of data points that statistical samples will be made against" |
| 79 | + |
| 80 | + data := aCollection |
| 81 | +] |
| 82 | + |
| 83 | +{ #category : #information } |
| 84 | +PMStatisticalSample >> geometricMean [ |
| 85 | + "Answer with the geometric mean of the collection" |
| 86 | + |
| 87 | + "(StatisticalSample new data: #(1 1 2 3 4 5 5 6 6 7 8 9)) geometricMean >>> 3.8583980015011217" |
| 88 | + |
| 89 | + "(StatisticalSample new data: { 4. 1. 1 / 32}) geometricMean >>> 0.49999999999999994" |
| 90 | + |
| 91 | + "(StatisticalSample new data: #(3.14 1 4.56 0.333)) geometricMean >>> 1.4776945822943937" |
| 92 | + |
| 93 | + "(StatisticalSample new data: {1/3. 2/3. 8/3. 16/3}) geometricMean >>> 1.3333333333333335" |
| 94 | + |
| 95 | + data |
| 96 | + detect: [ :i | i <= 0 ] |
| 97 | + ifFound: [ Error new |
| 98 | + signal: 'The geometric mean should only be calculated on positive numbers' ]. |
| 99 | + |
| 100 | + "For large collections there is a chance of Float overflow/underflow when raising to a fraction. |
| 101 | + Instead sum the logarithms with the side effect that the returned value will be a Float and subject |
| 102 | + to floating point arethmetic imprecision" |
| 103 | + "^(self reduce: [ :a :b | a * b ] ) raisedToFraction: 1 / self size." |
| 104 | + ^ ((data collect: #ln) sum / data size) exp |
| 105 | +] |
| 106 | + |
| 107 | +{ #category : #information } |
| 108 | +PMStatisticalSample >> harmonicMean [ |
| 109 | + "Answer with the harmonic mean of the data." |
| 110 | + |
| 111 | + "(StatisticalSample new data: #(2.5 3 10)) harmonicMean >>> 3.6" |
| 112 | + |
| 113 | + | sum | |
| 114 | + data |
| 115 | + detect: [ :i | i <= 0 ] |
| 116 | + ifFound: [ Error new |
| 117 | + signal: 'The harmonic mean should only be calculated on positive numbers' ]. |
| 118 | + sum := 0. |
| 119 | + data do: [ :i | sum := sum + (1 / i) ]. |
| 120 | + ^ data size / sum |
| 121 | +] |
| 122 | + |
| 123 | +{ #category : #information } |
| 124 | +PMStatisticalSample >> mode [ |
| 125 | + "answers with the most common value in a collection. |
| 126 | + |
| 127 | + If there are values that are equally common then the one that is |
| 128 | + smallest is returned." |
| 129 | + |
| 130 | + "(StatisticalSample new data: #(1 2 2 2 3 4 5)) mode >>> 2" |
| 131 | + |
| 132 | + "(StatisticalSample new data: #(5 5 1 1 2 3 4)) mode >>> 1" |
| 133 | + |
| 134 | + ^ (data asBag sortedCounts at: 1) value |
| 135 | +] |
0 commit comments