mirror of
https://github.com/grafana/grafana.git
synced 2025-07-29 23:52:19 +08:00
364 lines
8.8 KiB
Go
364 lines
8.8 KiB
Go
package scheduler
|
|
|
|
import (
|
|
"container/list"
|
|
"context"
|
|
"errors"
|
|
"time"
|
|
|
|
"github.com/grafana/dskit/services"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
)
|
|
|
|
const (
|
|
// DefaultMaxSizePerTenant is the default maximum number of items per tenant in the queue.
|
|
DefaultMaxSizePerTenant = 100
|
|
)
|
|
|
|
var ErrQueueClosed = errors.New("queue closed")
|
|
var ErrTenantQueueFull = errors.New("tenant queue full")
|
|
var ErrNilRunnable = errors.New("cannot enqueue nil runnable")
|
|
var ErrMissingTenantID = errors.New("item requires TenantID")
|
|
|
|
type tenantQueue struct {
|
|
id string
|
|
items []func(ctx context.Context)
|
|
isActive bool
|
|
}
|
|
|
|
func (tq *tenantQueue) len() int {
|
|
return len(tq.items)
|
|
}
|
|
func (tq *tenantQueue) clear() {
|
|
tq.items = nil
|
|
tq.isActive = false
|
|
}
|
|
func (tq *tenantQueue) isEmpty() bool {
|
|
return len(tq.items) == 0
|
|
}
|
|
func (tq *tenantQueue) isFull(maxSize int) bool {
|
|
return maxSize > 0 && len(tq.items) >= maxSize
|
|
}
|
|
func (tq *tenantQueue) addItem(runnable func(ctx context.Context)) {
|
|
tq.items = append(tq.items, runnable)
|
|
}
|
|
|
|
type enqueueRequest struct {
|
|
tenantID string
|
|
runnable func(ctx context.Context)
|
|
respChan chan error
|
|
}
|
|
|
|
type dequeueRequest struct {
|
|
respChan chan dequeueResponse
|
|
}
|
|
|
|
type dequeueResponse struct {
|
|
runnable func(ctx context.Context)
|
|
err error
|
|
}
|
|
|
|
type lenRequest struct {
|
|
respChan chan int
|
|
}
|
|
|
|
type activeTenantsLenRequest struct {
|
|
respChan chan int
|
|
}
|
|
|
|
type NoopQueue struct{}
|
|
|
|
func (*NoopQueue) Enqueue(ctx context.Context, _ string, runnable func(ctx context.Context)) error {
|
|
runnable(ctx)
|
|
return nil
|
|
}
|
|
|
|
func NewNoopQueue() *NoopQueue {
|
|
return &NoopQueue{}
|
|
}
|
|
|
|
// Queue implements a multi-tenant qos with round-robin fairness using a dispatcher goroutine.
|
|
type Queue struct {
|
|
services.Service
|
|
|
|
enqueueChan chan enqueueRequest
|
|
dequeueChan chan dequeueRequest
|
|
lenChan chan lenRequest
|
|
activeTenantsLenChan chan activeTenantsLenRequest
|
|
dispatcherStoppedChan chan struct{}
|
|
|
|
// tenantQueues stores the queues for each tenant
|
|
tenantQueues map[string]*tenantQueue
|
|
// activeTenants is a list of tenants with items in their queues
|
|
// used for round-robin dequeueing
|
|
activeTenants *list.List
|
|
// pendingDequeueRequests is a list of dequeue requests waiting for items
|
|
// used for notifying when items are available
|
|
pendingDequeueRequests *list.List
|
|
// maxSizePerTenant is the maximum number of items per tenant
|
|
maxSizePerTenant int
|
|
|
|
// Metrics
|
|
queueLength *prometheus.GaugeVec
|
|
discardedRequests *prometheus.CounterVec
|
|
enqueueDuration prometheus.Histogram
|
|
}
|
|
|
|
type QueueOptions struct {
|
|
MaxSizePerTenant int
|
|
Registerer prometheus.Registerer
|
|
}
|
|
|
|
// NewQueue creates a new Queue and starts its dispatcher goroutine.
|
|
func NewQueue(opts *QueueOptions) *Queue {
|
|
if opts.MaxSizePerTenant <= 0 {
|
|
opts.MaxSizePerTenant = DefaultMaxSizePerTenant
|
|
}
|
|
|
|
q := &Queue{
|
|
enqueueChan: make(chan enqueueRequest),
|
|
dequeueChan: make(chan dequeueRequest),
|
|
lenChan: make(chan lenRequest),
|
|
activeTenantsLenChan: make(chan activeTenantsLenRequest),
|
|
dispatcherStoppedChan: make(chan struct{}),
|
|
|
|
tenantQueues: make(map[string]*tenantQueue),
|
|
activeTenants: list.New(),
|
|
pendingDequeueRequests: list.New(),
|
|
maxSizePerTenant: opts.MaxSizePerTenant,
|
|
}
|
|
|
|
q.queueLength = promauto.With(opts.Registerer).NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "queue_length",
|
|
Help: "Number of items in the queue",
|
|
}, []string{"namespace"})
|
|
q.discardedRequests = promauto.With(opts.Registerer).NewCounterVec(prometheus.CounterOpts{
|
|
Name: "discarded_requests_total",
|
|
Help: "Total number of discarded requests",
|
|
}, []string{"namespace", "reason"})
|
|
q.enqueueDuration = promauto.With(opts.Registerer).NewHistogram(prometheus.HistogramOpts{
|
|
Name: "enqueue_duration_seconds",
|
|
Help: "Duration of enqueue operation in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
})
|
|
|
|
q.Service = services.NewBasicService(nil, q.dispatcherLoop, q.stopping)
|
|
|
|
return q
|
|
}
|
|
|
|
func (q *Queue) scheduleRoundRobin() {
|
|
// Process as long as we have both pending requests and active tenants
|
|
for {
|
|
// Get the front elements of both lists
|
|
reqElem := q.pendingDequeueRequests.Front()
|
|
tenantElem := q.activeTenants.Front()
|
|
|
|
// Exit when either list is empty
|
|
if reqElem == nil || tenantElem == nil {
|
|
break
|
|
}
|
|
|
|
req := reqElem.Value.(*dequeueRequest)
|
|
tq := tenantElem.Value.(*tenantQueue)
|
|
|
|
// Get and deliver the runnable item
|
|
item := tq.items[0]
|
|
req.respChan <- dequeueResponse{runnable: item, err: nil}
|
|
|
|
// Update bookkeeping
|
|
q.pendingDequeueRequests.Remove(reqElem)
|
|
tq.items = tq.items[1:]
|
|
|
|
// Update metrics
|
|
q.queueLength.WithLabelValues(tq.id).Set(float64(tq.len()))
|
|
|
|
// Round-robin: move to back if tenant still has items, otherwise remove
|
|
if tq.isEmpty() {
|
|
tq.clear()
|
|
q.activeTenants.Remove(tenantElem)
|
|
} else {
|
|
q.activeTenants.MoveToBack(tenantElem)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (q *Queue) handleEnqueueRequest(req enqueueRequest) {
|
|
tq, exists := q.tenantQueues[req.tenantID]
|
|
if !exists {
|
|
tq = &tenantQueue{
|
|
id: req.tenantID,
|
|
items: make([]func(ctx context.Context), 0, 8),
|
|
}
|
|
q.tenantQueues[req.tenantID] = tq
|
|
}
|
|
|
|
if tq.isFull(q.maxSizePerTenant) {
|
|
q.discardedRequests.WithLabelValues(req.tenantID, "queue_full").Inc()
|
|
req.respChan <- ErrTenantQueueFull
|
|
return
|
|
}
|
|
|
|
tq.addItem(req.runnable)
|
|
q.queueLength.WithLabelValues(req.tenantID).Set(float64(len(tq.items)))
|
|
|
|
if !tq.isActive {
|
|
q.activeTenants.PushBack(tq)
|
|
tq.isActive = true
|
|
}
|
|
|
|
req.respChan <- nil
|
|
}
|
|
|
|
func (q *Queue) handleDequeueRequest(req dequeueRequest) {
|
|
q.pendingDequeueRequests.PushBack(&req)
|
|
}
|
|
|
|
func (q *Queue) handleLenRequest(req lenRequest) {
|
|
total := 0
|
|
for _, tq := range q.tenantQueues {
|
|
total += tq.len()
|
|
}
|
|
req.respChan <- total
|
|
}
|
|
|
|
func (q *Queue) dispatcherLoop(ctx context.Context) error {
|
|
defer close(q.dispatcherStoppedChan)
|
|
|
|
for {
|
|
q.scheduleRoundRobin()
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
|
|
case req := <-q.enqueueChan:
|
|
q.handleEnqueueRequest(req)
|
|
|
|
case req := <-q.dequeueChan:
|
|
q.handleDequeueRequest(req)
|
|
|
|
case req := <-q.lenChan:
|
|
q.handleLenRequest(req)
|
|
|
|
case req := <-q.activeTenantsLenChan:
|
|
req.respChan <- q.activeTenants.Len()
|
|
}
|
|
}
|
|
}
|
|
|
|
// Enqueue adds a work item to the appropriate tenant's qos.
|
|
// It blocks only if the dispatcher is busy or the tenant queue is full.
|
|
func (q *Queue) Enqueue(ctx context.Context, tenantID string, runnable func(ctx context.Context)) error {
|
|
if runnable == nil {
|
|
return ErrNilRunnable
|
|
}
|
|
if tenantID == "" {
|
|
return ErrMissingTenantID
|
|
}
|
|
|
|
if q.State() != services.Running {
|
|
return ErrQueueClosed
|
|
}
|
|
|
|
start := time.Now()
|
|
|
|
respChan := make(chan error, 1)
|
|
req := enqueueRequest{
|
|
tenantID: tenantID,
|
|
runnable: runnable,
|
|
respChan: respChan,
|
|
}
|
|
|
|
var err error
|
|
select {
|
|
case q.enqueueChan <- req:
|
|
err = <-respChan
|
|
q.enqueueDuration.Observe(time.Since(start).Seconds())
|
|
case <-q.dispatcherStoppedChan:
|
|
q.discardedRequests.WithLabelValues(tenantID, "dispatcher_stopped").Inc()
|
|
err = ErrQueueClosed
|
|
case <-ctx.Done():
|
|
q.discardedRequests.WithLabelValues(tenantID, "context_canceled").Inc()
|
|
err = ctx.Err()
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
// Dequeue removes and returns a work item from the qos using linked-list round-robin.
|
|
// It blocks until an item is available for any tenant, the queue is closed,
|
|
// or the context is cancelled.
|
|
func (q *Queue) Dequeue(ctx context.Context) (func(ctx context.Context), error) {
|
|
if q.State() != services.Running {
|
|
return nil, ErrQueueClosed
|
|
}
|
|
|
|
respChan := make(chan dequeueResponse, 1)
|
|
req := dequeueRequest{
|
|
respChan: respChan,
|
|
}
|
|
|
|
select {
|
|
case q.dequeueChan <- req:
|
|
select {
|
|
case resp := <-respChan:
|
|
return resp.runnable, resp.err
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
}
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
case <-q.dispatcherStoppedChan:
|
|
return nil, ErrQueueClosed
|
|
}
|
|
}
|
|
|
|
// Len returns the total number of items across all tenants in the queue.
|
|
func (q *Queue) Len() int {
|
|
respChan := make(chan int, 1)
|
|
req := lenRequest{respChan: respChan}
|
|
|
|
select {
|
|
case q.lenChan <- req:
|
|
select {
|
|
case count := <-respChan:
|
|
return count
|
|
case <-q.dispatcherStoppedChan:
|
|
return 0
|
|
}
|
|
case <-q.dispatcherStoppedChan:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
// ActiveTenantsLen returns the number of tenants with items currently in the queue.
|
|
func (q *Queue) ActiveTenantsLen() int {
|
|
respChan := make(chan int, 1)
|
|
req := activeTenantsLenRequest{respChan: respChan}
|
|
|
|
select {
|
|
case q.activeTenantsLenChan <- req:
|
|
select {
|
|
case count := <-respChan:
|
|
return count
|
|
case <-q.dispatcherStoppedChan:
|
|
return 0
|
|
}
|
|
case <-q.dispatcherStoppedChan:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func (q *Queue) stopping(_ error) error {
|
|
q.queueLength.Reset()
|
|
q.discardedRequests.Reset()
|
|
for _, tq := range q.tenantQueues {
|
|
tq.clear()
|
|
}
|
|
q.activeTenants.Init()
|
|
q.pendingDequeueRequests.Init()
|
|
return nil
|
|
}
|