wubloader/common/common/flask_stats.py

"""
Code shared between components to gather stats from flask methods.
Note that this code requires flask, but the common module as a whole does not
to avoid needing to install them for components that don't need it.
"""

import functools

from flask import request
from flask import g as request_store
from monotonic import monotonic
import prometheus_client as prom


# Generic metrics that all http requests get logged to (see below for specific metrics per endpoint)

LATENCY_HELP = "Time taken to run the request handler and create a response"
# buckets: very long playlists / cutting can be quite slow,
# so we have a wider range of latencies than default, up to 10min.
LATENCY_BUCKETS = [.001, .005, .01, .05, .1, .5, 1, 5, 10, 30, 60, 120, 300, 600]
generic_latency = prom.Histogram(
	'http_request_latency_all', LATENCY_HELP,
	['endpoint', 'method', 'status'],
	buckets=LATENCY_BUCKETS,
)

CONCURRENT_HELP = 'Number of requests currently ongoing'
generic_concurrent = prom.Gauge(
	'http_request_concurrency_all', CONCURRENT_HELP,
	['endpoint', 'method'],
)


def request_stats(fn):
	"""Decorator that wraps a handler func to collect metrics.
	Adds handler func args as labels, along with 'endpoint' label using func's name,
	method and response status where applicable."""
	# We have to jump through some hoops here, because the prometheus client lib demands
	# we pre-define our label names, but we don't know the names of the handler kwargs
	# until the first time the function's called. So we delay defining the metrics until
	# first call.
	# In addition, it doesn't let us have different sets of labels with the same name.
	# So we record everything twice: Once under a generic name with only endpoint, method
	# and status, and once under a name specific to the endpoint with the full set of labels.
	metrics = {}
	endpoint = fn.__name__

	@functools.wraps(fn)
	def _stats(**kwargs):
		if not metrics:
			# first call, set up metrics
			labels_no_status = sorted(kwargs.keys()) + ['endpoint', 'method']
			labels = labels_no_status + ['status']
			metrics['latency'] = prom.Histogram(
				'http_request_latency_{}'.format(endpoint), LATENCY_HELP,
				labels, buckets=LATENCY_BUCKETS,
			)
			metrics['concurrent'] = prom.Gauge(
				'http_request_concurrency_{}'.format(endpoint), CONCURRENT_HELP,
				labels_no_status,
			)

		request_store.metrics = metrics
		request_store.endpoint = endpoint
		request_store.method = request.method
		request_store.labels = {k: str(v) for k, v in kwargs.items()}
		generic_concurrent.labels(endpoint=endpoint, method=request.method).inc()
		metrics['concurrent'].labels(endpoint=endpoint, method=request.method, **request_store.labels).inc()
		request_store.start_time = monotonic()
		return fn(**kwargs)

	return _stats


def after_request(response):
	"""Must be registered to run after requests. Finishes tracking the request
	and logs most of the metrics.
	We do it in this way, instead of inside the request_stats wrapper, because it lets flask
	normalize the handler result into a Response object.
	"""
	if 'metrics' not in request_store:
		return response # untracked handler

	end_time = monotonic()
	metrics = request_store.metrics
	endpoint = request_store.endpoint
	method = request_store.method
	labels = request_store.labels
	start_time = request_store.start_time

	generic_concurrent.labels(endpoint=endpoint, method=method).dec()
	metrics['concurrent'].labels(endpoint=endpoint, method=method, **labels).dec()

	status = str(response.status_code)
	generic_latency.labels(endpoint=endpoint, method=method, status=status).observe(end_time - start_time)
	metrics['latency'].labels(endpoint=endpoint, method=method, status=status, **labels).observe(end_time - start_time)

	return response
comments 5 years ago			`"""`
			`Code shared between components to gather stats from flask methods.`
			`Note that this code requires flask, but the common module as a whole does not`
			`to avoid needing to install them for components that don't need it.`
			`"""`

moved flask monitoring to its own module 5 years ago			`import functools`

			`from flask import request`
			`from flask import g as request_store`
			`from monotonic import monotonic`
			`import prometheus_client as prom`


			`# Generic metrics that all http requests get logged to (see below for specific metrics per endpoint)`

			`LATENCY_HELP = "Time taken to run the request handler and create a response"`
			`# buckets: very long playlists / cutting can be quite slow,`
			`# so we have a wider range of latencies than default, up to 10min.`
			`LATENCY_BUCKETS = [.001, .005, .01, .05, .1, .5, 1, 5, 10, 30, 60, 120, 300, 600]`
			`generic_latency = prom.Histogram(`
			`'http_request_latency_all', LATENCY_HELP,`
			`['endpoint', 'method', 'status'],`
			`buckets=LATENCY_BUCKETS,`
			`)`

			`CONCURRENT_HELP = 'Number of requests currently ongoing'`
			`generic_concurrent = prom.Gauge(`
			`'http_request_concurrency_all', CONCURRENT_HELP,`
			`['endpoint', 'method'],`
			`)`


			`def request_stats(fn):`
			`"""Decorator that wraps a handler func to collect metrics.`
			`Adds handler func args as labels, along with 'endpoint' label using func's name,`
			`method and response status where applicable."""`
			`# We have to jump through some hoops here, because the prometheus client lib demands`
			`# we pre-define our label names, but we don't know the names of the handler kwargs`
			`# until the first time the function's called. So we delay defining the metrics until`
			`# first call.`
			`# In addition, it doesn't let us have different sets of labels with the same name.`
			`# So we record everything twice: Once under a generic name with only endpoint, method`
			`# and status, and once under a name specific to the endpoint with the full set of labels.`
			`metrics = {}`
			`endpoint = fn.__name__`

			`@functools.wraps(fn)`
			`def _stats(**kwargs):`
			`if not metrics:`
			`# first call, set up metrics`
			`labels_no_status = sorted(kwargs.keys()) + ['endpoint', 'method']`
			`labels = labels_no_status + ['status']`
			`metrics['latency'] = prom.Histogram(`
			`'http_request_latency_{}'.format(endpoint), LATENCY_HELP,`
			`labels, buckets=LATENCY_BUCKETS,`
			`)`
			`metrics['concurrent'] = prom.Gauge(`
			`'http_request_concurrency_{}'.format(endpoint), CONCURRENT_HELP,`
			`labels_no_status,`
			`)`

			`request_store.metrics = metrics`
			`request_store.endpoint = endpoint`
			`request_store.method = request.method`
			`request_store.labels = {k: str(v) for k, v in kwargs.items()}`
			`generic_concurrent.labels(endpoint=endpoint, method=request.method).inc()`
			`metrics['concurrent'].labels(endpoint=endpoint, method=request.method, **request_store.labels).inc()`
			`request_store.start_time = monotonic()`
			`return fn(**kwargs)`

			`return _stats`


			`def after_request(response):`
			`"""Must be registered to run after requests. Finishes tracking the request`
			`and logs most of the metrics.`
			`We do it in this way, instead of inside the request_stats wrapper, because it lets flask`
			`normalize the handler result into a Response object.`
			`"""`
			`if 'metrics' not in request_store:`
			`return response # untracked handler`

			`end_time = monotonic()`
			`metrics = request_store.metrics`
			`endpoint = request_store.endpoint`
			`method = request_store.method`
			`labels = request_store.labels`
			`start_time = request_store.start_time`

			`generic_concurrent.labels(endpoint=endpoint, method=method).dec()`
			`metrics['concurrent'].labels(endpoint=endpoint, method=method, **labels).dec()`

			`status = str(response.status_code)`
			`generic_latency.labels(endpoint=endpoint, method=method, status=status).observe(end_time - start_time)`
			`metrics['latency'].labels(endpoint=endpoint, method=method, status=status, **labels).observe(end_time - start_time)`

			`return response`