wubloader/common/common/flask_stats.py

"""
Code shared between components to gather stats from flask methods.
Note that this code requires flask, but the common module as a whole does not
to avoid needing to install them for components that don't need it.
"""

import functools

from flask import request
from flask import g as request_store
from monotonic import monotonic
import prometheus_client as prom


# Generic metrics that all http requests get logged to (see below for specific metrics per endpoint)

LATENCY_HELP = "Time taken to run the request handler and create a response"
# buckets: very long playlists / cutting can be quite slow,
# so we have a wider range of latencies than default, up to 10min.
LATENCY_BUCKETS = [.001, .005, .01, .05, .1, .5, 1, 5, 10, 30, 60, 120, 300, 600]
generic_latency = prom.Histogram(
	'http_request_latency_all', LATENCY_HELP,
	['endpoint', 'method', 'status'],
	buckets=LATENCY_BUCKETS,
)

CONCURRENT_HELP = 'Number of requests currently ongoing'
generic_concurrent = prom.Gauge(
	'http_request_concurrency_all', CONCURRENT_HELP,
	['endpoint', 'method'],
)


def request_stats(fn):
	"""Decorator that wraps a handler func to collect metrics.
	Adds handler func args as labels, along with 'endpoint' label using func's name,
	method and response status where applicable."""
	# We have to jump through some hoops here, because the prometheus client lib demands
	# we pre-define our label names, but we don't know the names of the handler kwargs
	# until the first time the function's called. So we delay defining the metrics until
	# first call.
	# In addition, it doesn't let us have different sets of labels with the same name.
	# So we record everything twice: Once under a generic name with only endpoint, method
	# and status, and once under a name specific to the endpoint with the full set of labels.
	metrics = {}
	endpoint = fn.__name__

	@functools.wraps(fn)
	def _stats(**kwargs):
		if not metrics:
			# first call, set up metrics
			labels_no_status = sorted(kwargs.keys()) + ['endpoint', 'method']
			labels = labels_no_status + ['status']
			metrics['latency'] = prom.Histogram(
				'http_request_latency_{}'.format(endpoint), LATENCY_HELP,
				labels, buckets=LATENCY_BUCKETS,
			)
			metrics['concurrent'] = prom.Gauge(
				'http_request_concurrency_{}'.format(endpoint), CONCURRENT_HELP,
				labels_no_status,
			)

		request_store.metrics = metrics
		request_store.endpoint = endpoint
		request_store.method = request.method
		request_store.labels = {k: str(v) for k, v in kwargs.items()}
		generic_concurrent.labels(endpoint=endpoint, method=request.method).inc()
		metrics['concurrent'].labels(endpoint=endpoint, method=request.method, **request_store.labels).inc()
		request_store.start_time = monotonic()
		return fn(**kwargs)

	return _stats


def after_request(response):
	"""Must be registered to run after requests. Finishes tracking the request
	and logs most of the metrics.
	We do it in this way, instead of inside the request_stats wrapper, because it lets flask
	normalize the handler result into a Response object.
	"""
	if 'metrics' not in request_store:
		return response # untracked handler

	end_time = monotonic()
	metrics = request_store.metrics
	endpoint = request_store.endpoint
	method = request_store.method
	labels = request_store.labels
	start_time = request_store.start_time

	generic_concurrent.labels(endpoint=endpoint, method=method).dec()
	metrics['concurrent'].labels(endpoint=endpoint, method=method, **labels).dec()

	status = str(response.status_code)
	generic_latency.labels(endpoint=endpoint, method=method, status=status).observe(end_time - start_time)
	metrics['latency'].labels(endpoint=endpoint, method=method, status=status, **labels).observe(end_time - start_time)

	return response