Merge pull request #636 from menghanl/histogram

Change histogram to comply with benchmark worker requirements
This commit is contained in:
Qi Zhao
2016-04-21 18:06:02 -07:00
2 changed files with 82 additions and 68 deletions

View File

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"io" "io"
"math"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -15,6 +16,8 @@ type HistogramValue struct {
Count int64 Count int64
// Sum is the sum of all the values added to the histogram. // Sum is the sum of all the values added to the histogram.
Sum int64 Sum int64
// SumOfSquares is the sum of squares of all values.
SumOfSquares int64
// Min is the minimum of all the values added to the histogram. // Min is the minimum of all the values added to the histogram.
Min int64 Min int64
// Max is the maximum of all the values added to the histogram. // Max is the maximum of all the values added to the histogram.
@ -26,7 +29,7 @@ type HistogramValue struct {
// HistogramBucket is one histogram bucket. // HistogramBucket is one histogram bucket.
type HistogramBucket struct { type HistogramBucket struct {
// LowBound is the lower bound of the bucket. // LowBound is the lower bound of the bucket.
LowBound int64 LowBound float64
// Count is the number of values in the bucket. // Count is the number of values in the bucket.
Count int64 Count int64
} }
@ -40,7 +43,7 @@ func (v HistogramValue) Print(w io.Writer) {
return return
} }
maxBucketDigitLen := len(strconv.FormatInt(v.Buckets[len(v.Buckets)-1].LowBound, 10)) maxBucketDigitLen := len(strconv.FormatFloat(v.Buckets[len(v.Buckets)-1].LowBound, 'f', 6, 64))
if maxBucketDigitLen < 3 { if maxBucketDigitLen < 3 {
// For "inf". // For "inf".
maxBucketDigitLen = 3 maxBucketDigitLen = 3
@ -50,9 +53,9 @@ func (v HistogramValue) Print(w io.Writer) {
accCount := int64(0) accCount := int64(0)
for i, b := range v.Buckets { for i, b := range v.Buckets {
fmt.Fprintf(w, "[%*d, ", maxBucketDigitLen, b.LowBound) fmt.Fprintf(w, "[%*f, ", maxBucketDigitLen, b.LowBound)
if i+1 < len(v.Buckets) { if i+1 < len(v.Buckets) {
fmt.Fprintf(w, "%*d)", maxBucketDigitLen, v.Buckets[i+1].LowBound) fmt.Fprintf(w, "%*f)", maxBucketDigitLen, v.Buckets[i+1].LowBound)
} else { } else {
fmt.Fprintf(w, "%*s)", maxBucketDigitLen, "inf") fmt.Fprintf(w, "%*s)", maxBucketDigitLen, "inf")
} }
@ -73,16 +76,21 @@ func (v HistogramValue) String() string {
return b.String() return b.String()
} }
// A Histogram accumulates values in the form of a histogram. The type of the // Histogram accumulates values in the form of a histogram with
// values is int64, which is suitable for keeping track of things like RPC // exponentially increased bucket sizes.
// latency in milliseconds. New histogram objects should be obtained via the // The first bucket (with index 0) is [0, n) where n = baseBucketSize.
// New() function. // Bucket i (i>=1) contains [n * m^(i-1), n * m^i), where m = 1 + GrowthFactor.
// The type of the values is int64.
type Histogram struct { type Histogram struct {
opts HistogramOptions opts HistogramOptions
buckets []bucketInternal buckets []bucketInternal
count *Counter count *Counter
sum *Counter sum *Counter
sumOfSquares *Counter
tracker *Tracker tracker *Tracker
logBaseBucketSize float64
oneOverLogOnePlusGrowthFactor float64
} }
// HistogramOptions contains the parameters that define the histogram's buckets. // HistogramOptions contains the parameters that define the histogram's buckets.
@ -92,9 +100,8 @@ type HistogramOptions struct {
// GrowthFactor is the growth factor of the buckets. A value of 0.1 // GrowthFactor is the growth factor of the buckets. A value of 0.1
// indicates that bucket N+1 will be 10% larger than bucket N. // indicates that bucket N+1 will be 10% larger than bucket N.
GrowthFactor float64 GrowthFactor float64
// SmallestBucketSize is the size of the first bucket. Bucket sizes are // BaseBucketSize is the size of the first bucket.
// rounded down to the nearest integer. BaseBucketSize float64
SmallestBucketSize float64
// MinValue is the lower bound of the first bucket. // MinValue is the lower bound of the first bucket.
MinValue int64 MinValue int64
} }
@ -102,7 +109,7 @@ type HistogramOptions struct {
// bucketInternal is the internal representation of a bucket, which includes a // bucketInternal is the internal representation of a bucket, which includes a
// rate counter. // rate counter.
type bucketInternal struct { type bucketInternal struct {
lowBound int64 lowBound float64
count *Counter count *Counter
} }
@ -112,23 +119,28 @@ func NewHistogram(opts HistogramOptions) *Histogram {
if opts.NumBuckets == 0 { if opts.NumBuckets == 0 {
opts.NumBuckets = 32 opts.NumBuckets = 32
} }
if opts.SmallestBucketSize == 0.0 { if opts.BaseBucketSize == 0.0 {
opts.SmallestBucketSize = 1.0 opts.BaseBucketSize = 1.0
} }
h := Histogram{ h := Histogram{
opts: opts, opts: opts,
buckets: make([]bucketInternal, opts.NumBuckets), buckets: make([]bucketInternal, opts.NumBuckets),
count: newCounter(), count: newCounter(),
sum: newCounter(), sum: newCounter(),
sumOfSquares: newCounter(),
tracker: newTracker(), tracker: newTracker(),
logBaseBucketSize: math.Log(opts.BaseBucketSize),
oneOverLogOnePlusGrowthFactor: 1 / math.Log(1+opts.GrowthFactor),
} }
low := opts.MinValue m := 1.0 + opts.GrowthFactor
delta := opts.SmallestBucketSize delta := opts.BaseBucketSize
for i := 0; i < opts.NumBuckets; i++ { h.buckets[0].lowBound = float64(opts.MinValue)
h.buckets[i].lowBound = low h.buckets[0].count = newCounter()
for i := 1; i < opts.NumBuckets; i++ {
h.buckets[i].lowBound = float64(opts.MinValue) + delta
h.buckets[i].count = newCounter() h.buckets[i].count = newCounter()
low = low + int64(delta) delta = delta * m
delta = delta * (1.0 + opts.GrowthFactor)
} }
return &h return &h
} }
@ -147,6 +159,7 @@ func (h *Histogram) Add(value int64) error {
h.buckets[bucket].count.Incr(1) h.buckets[bucket].count.Incr(1)
h.count.Incr(1) h.count.Incr(1)
h.sum.Incr(value) h.sum.Incr(value)
h.sumOfSquares.Incr(value * value)
h.tracker.Push(value) h.tracker.Push(value)
return nil return nil
} }
@ -169,6 +182,7 @@ func (h *Histogram) Value() HistogramValue {
v := HistogramValue{ v := HistogramValue{
Count: h.count.Value(), Count: h.count.Value(),
Sum: h.sum.Value(), Sum: h.sum.Value(),
SumOfSquares: h.sumOfSquares.Value(),
Min: h.tracker.Min(), Min: h.tracker.Min(),
Max: h.tracker.Max(), Max: h.tracker.Max(),
Buckets: b, Buckets: b,
@ -189,6 +203,7 @@ func (h *Histogram) Delta1h() HistogramValue {
v := HistogramValue{ v := HistogramValue{
Count: h.count.Delta1h(), Count: h.count.Delta1h(),
Sum: h.sum.Delta1h(), Sum: h.sum.Delta1h(),
SumOfSquares: h.sumOfSquares.Delta1h(),
Min: h.tracker.Min1h(), Min: h.tracker.Min1h(),
Max: h.tracker.Max1h(), Max: h.tracker.Max1h(),
Buckets: b, Buckets: b,
@ -209,6 +224,7 @@ func (h *Histogram) Delta10m() HistogramValue {
v := HistogramValue{ v := HistogramValue{
Count: h.count.Delta10m(), Count: h.count.Delta10m(),
Sum: h.sum.Delta10m(), Sum: h.sum.Delta10m(),
SumOfSquares: h.sumOfSquares.Delta10m(),
Min: h.tracker.Min10m(), Min: h.tracker.Min10m(),
Max: h.tracker.Max10m(), Max: h.tracker.Max10m(),
Buckets: b, Buckets: b,
@ -229,6 +245,7 @@ func (h *Histogram) Delta1m() HistogramValue {
v := HistogramValue{ v := HistogramValue{
Count: h.count.Delta1m(), Count: h.count.Delta1m(),
Sum: h.sum.Delta1m(), Sum: h.sum.Delta1m(),
SumOfSquares: h.sumOfSquares.Delta1m(),
Min: h.tracker.Min1m(), Min: h.tracker.Min1m(),
Max: h.tracker.Max1m(), Max: h.tracker.Max1m(),
Buckets: b, Buckets: b,
@ -236,20 +253,17 @@ func (h *Histogram) Delta1m() HistogramValue {
return v return v
} }
// findBucket does a binary search to find in which bucket the value goes.
func (h *Histogram) findBucket(value int64) (int, error) { func (h *Histogram) findBucket(value int64) (int, error) {
lastBucket := len(h.buckets) - 1 delta := float64(value - h.opts.MinValue)
min, max := 0, lastBucket var b int
for max >= min { if delta >= h.opts.BaseBucketSize {
b := (min + max) / 2 // b = log_{1+growthFactor} (delta / baseBucketSize) + 1
if value >= h.buckets[b].lowBound && (b == lastBucket || value < h.buckets[b+1].lowBound) { // = log(delta / baseBucketSize) / log(1+growthFactor) + 1
return b, nil // = (log(delta) - log(baseBucketSize)) * (1 / log(1+growthFactor)) + 1
} b = int((math.Log(delta)-h.logBaseBucketSize)*h.oneOverLogOnePlusGrowthFactor + 1)
if value < h.buckets[b].lowBound {
max = b - 1
continue
}
min = b + 1
} }
if b >= len(h.buckets) {
return 0, fmt.Errorf("no bucket for value: %d", value) return 0, fmt.Errorf("no bucket for value: %d", value)
} }
return b, nil
}

View File

@ -84,9 +84,9 @@ func (stats *Stats) maybeUpdate() {
} }
stats.histogram = NewHistogram(HistogramOptions{ stats.histogram = NewHistogram(HistogramOptions{
NumBuckets: numBuckets, NumBuckets: numBuckets,
// max(i.e., Nth lower bound) = min + (1 + growthFactor)^(numBuckets-2). // max-min(lower bound of last bucket) = (1 + growthFactor)^(numBuckets-2) * baseBucketSize.
GrowthFactor: math.Pow(float64(stats.max-stats.min), 1/float64(stats.numBuckets-2)) - 1, GrowthFactor: math.Pow(float64(stats.max-stats.min), 1/float64(numBuckets-2)) - 1,
SmallestBucketSize: 1.0, BaseBucketSize: 1.0,
MinValue: stats.min}) MinValue: stats.min})
for _, d := range stats.durations { for _, d := range stats.durations {