Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTTP API metrics #1969

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ejabberd_app.erl
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ start(normal, _Args) ->
%%ejabberd_debug:fprof_start(),
start_services(),
start_modules(),
mongoose_metrics:init(),
ejabberd_listener:start_listeners(),
mongoose_metrics:init(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was there some kind of problem with old order?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, mongoose_metrics:init() subscribes all reporters to all metrics which are running at the moment of its start. Since ejabberd_cowboy is started as one of the listeners, it would define metrics after the reporters were subscribed, which means that no reporter reported HTTP API metrics.

ejabberd_admin:start(),
?INFO_MSG("ejabberd ~s is started in the node ~p", [?MONGOOSE_VERSION, node()]),
Sup;
Expand Down
78 changes: 73 additions & 5 deletions src/ejabberd_cowboy.erl
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,9 @@ do_start_cowboy(Ref, Opts) ->
TransportOpts = gen_mod:get_opt(transport_options, Opts, []),
Modules = gen_mod:get_opt(modules, Opts),
Dispatch = cowboy_router:compile(get_routes(Modules)),
ProtocolOpts = [{env, [{dispatch, Dispatch}]} |
gen_mod:get_opt(protocol_options, Opts, [])],
{MetricsEnv, MetricsProtoOpts} = maybe_init_metrics(Opts),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm maybe we can move whole metrics logic to separate module ?

ProtocolOpts = [{env, [{dispatch, Dispatch} | MetricsEnv]} |
gen_mod:get_opt(protocol_options, Opts, [])] ++ MetricsProtoOpts,
case catch start_http_or_https(SSLOpts, Ref, NumAcceptors, TransportOpts, ProtocolOpts) of
{error, {{shutdown,
{failed_to_start_child, ranch_acceptors_sup,
Expand Down Expand Up @@ -180,7 +181,9 @@ get_routes([], Routes) ->
Routes;
get_routes([{Host, BasePath, Module} | Tail], Routes) ->
get_routes([{Host, BasePath, Module, []} | Tail], Routes);
get_routes([{Host, BasePath, Module, Opts} | Tail], Routes) ->
get_routes([{Host, BasePath, Module, HandlerOpts} | Tail], Routes) ->
get_routes([{Host, BasePath, Module, HandlerOpts, []} | Tail], Routes);
get_routes([{Host, BasePath, Module, HandlerOpts, _Opts} | Tail], Routes) ->
%% ejabberd_config tries to expand the atom '_' as a Macro, which fails.
%% To work around that, use "_" instead and translate it to '_' here.
CowboyHost = case Host of
Expand All @@ -190,8 +193,8 @@ get_routes([{Host, BasePath, Module, Opts} | Tail], Routes) ->
{module, Module} = code:ensure_loaded(Module),
Paths = proplists:get_value(CowboyHost, Routes, []) ++
case erlang:function_exported(Module, cowboy_router_paths, 2) of
true -> Module:cowboy_router_paths(BasePath, Opts);
_ -> [{BasePath, Module, Opts}]
true -> Module:cowboy_router_paths(BasePath, HandlerOpts);
_ -> [{BasePath, Module, HandlerOpts}]
end,
get_routes(Tail, lists:keystore(CowboyHost, 1, Routes, {CowboyHost, Paths})).

Expand Down Expand Up @@ -224,3 +227,68 @@ maybe_insert_max_connections(TransportOpts, Opts) ->
NewTuple = {Key, Value},
lists:keystore(Key, 1, TransportOpts, NewTuple)
end.

-spec measured_methods() -> [mongoose_cowboy_metrics:method()].
measured_methods() ->
[<<"GET">>,
<<"HEAD">>,
<<"POST">>,
<<"PUT">>,
<<"DELETE">>,
<<"OPTIONS">>,
<<"PATCH">>].

-spec measured_classes() -> [mongoose_cowboy_metrics:status_class()].
measured_classes() ->
[<<"1XX">>, <<"2XX">>, <<"3XX">>, <<"4XX">>, <<"5XX">>].

base_metrics_prefix() ->
[http].

middlewares_with_metrics() ->
[mongoose_cowboy_metrics_mw_before,
cowboy_router,
mongoose_cowboy_metrics_mw_after,
cowboy_handler].

-spec maybe_init_metrics(list()) -> {MetricsEnv :: list(), MetricsProtocolOpts :: list()}.
maybe_init_metrics(Opts) ->
case proplists:get_value(metrics, Opts, false) of
true ->
BasePrefix = base_metrics_prefix(),
HandlerToPrefixMappings = build_metric_prefixes(
BasePrefix, proplists:get_value(modules, Opts, []), #{}),
[create_metrics(Prefix) || Prefix <- maps:values(HandlerToPrefixMappings)],
{[{record_metrics, true}, {handler_to_metric_prefix, HandlerToPrefixMappings}],
[{middlewares, middlewares_with_metrics()}]};
false ->
{[], []}
end.

-spec build_metric_prefixes(BasePrefix :: list(), Modules :: [tuple()], Acc) -> Acc
when Acc :: #{module() => mongoose_cowboy_metrics:prefix()}.
build_metric_prefixes(_BasePrefix, [], Acc) ->
Acc;
build_metric_prefixes(BasePrefix, [{_Host, _Path, Handler, _HandlerOpts, Opts} | Tail], Acc) ->
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if these metrics are that valuable with no path distinction. Various actions triggered by different paths (and their :parameters) may have drastically different impact on latency stats (if they all apply e.g. to 200 responses to GETs).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree that in general case it won't work well. However, in our HTTP API handlers usually do the exact same thing given some HTTP method (i.e. it doesn't matter what was the path or parameters).

The other solution would be hand-crafted metrics, different for each handler (i.e. we explicitly report metrics from each HTTP module). However, we would not get any metrics in case the handler has crashed.

Maybe the metrics for each handler in a single ejabberd_cowboy's scope should opt-in as well? This way we can turn them on only for endpoints which we know it makes sense to have these kinds of metrics for.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, I understand it's good enough until someone actually needs a more fine-grained names. :)

case proplists:get_value(metrics, Opts, []) of
MetricsOpts when is_list(MetricsOpts) ->
HandlerPrefix = proplists:get_value(prefix, MetricsOpts, Handler),
Prefix = BasePrefix ++ lists:flatten([HandlerPrefix]),
build_metric_prefixes(BasePrefix, Tail, maps:put(Handler, Prefix, Acc));
false ->
build_metric_prefixes(BasePrefix, Tail, Acc)
end;
build_metric_prefixes(BasePrefix, [{Host, Path, Handler, HandlerOpts} | Tail], Acc) ->
build_metric_prefixes(BasePrefix, [{Host, Path, Handler, HandlerOpts, []} | Tail], Acc);
build_metric_prefixes(BasePrefix, [{Host, Path, Handler} | Tail], Acc) ->
build_metric_prefixes(BasePrefix, [{Host, Path, Handler, [], []} | Tail], Acc).

-spec create_metrics(mongoose_cowboy_metrics:prefix()) -> any().
create_metrics(Prefix) ->
CountMetrics = [mongoose_cowboy_metrics:request_count_metric(Prefix, M) || M <- measured_methods()] ++
[mongoose_cowboy_metrics:response_count_metric(Prefix, M, C)
|| M <- measured_methods(), C <- measured_classes()],
LatencyMetrics = [mongoose_cowboy_metrics:response_latency_metric(Prefix, M, C)
|| M <- measured_methods(), C <- measured_classes()],
[mongoose_metrics:ensure_metric(global, M, spiral) || M <- CountMetrics],
[mongoose_metrics:ensure_metric(global, M, histogram) || M <- LatencyMetrics].
55 changes: 55 additions & 0 deletions src/mongoose_cowboy_metrics.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
%%==============================================================================
%% Copyright 2016 Erlang Solutions Ltd.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% @doc Functions for generating names of metric's updated by `mongoose_cowboy_metrics_mw_after'
%%
%% To generate metric names use `request_count_metric/2', `response_latency_metric/2' and
%% `response_count_metric/3'. See `mongoose_cowboy_metric_mw_after' module to check what values
%% `Prefix', `Method' and `Class' may take.
%%
%%==============================================================================

-module(mongoose_cowboy_metrics).

%% API
-export([request_count_metric/2]).
-export([response_count_metric/3]).
-export([response_latency_metric/3]).

-type prefix() :: list().
-type method() :: binary(). %% <<"GET">>, <<"POST">>, etc.
-type status_class() :: binary(). %% <<"2XX">>, <<"4XX">>, etc.
-type metric_name() :: list().

-export_type([prefix/0]).
-export_type([method/0]).
-export_type([status_class/0]).
-export_type([metric_name/0]).

%%-------------------------------------------------------------------
%% API
%%-------------------------------------------------------------------

-spec request_count_metric(prefix(), method()) -> metric_name().
request_count_metric(Prefix, Method) ->
Prefix ++ [Method, request, count].

-spec response_count_metric(prefix(), method(), status_class()) -> metric_name().
response_count_metric(Prefix, Method, Class) ->
Prefix ++ [Method, response, Class, count].

-spec response_latency_metric(prefix(), method(), status_class()) -> metric_name().
response_latency_metric(Prefix, Method, Class) ->
Prefix ++ [Method, response, Class, latency].
121 changes: 121 additions & 0 deletions src/mongoose_cowboy_metrics_mw_after.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
%%==============================================================================
%% Copyright 2016 Erlang Solutions Ltd.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% @doc Cowboy middleware updating metrics related to request handling
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be even better to have all of this information in a markdown docs. :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we document each MongooseIM module in markdown docs? Not only the ones which are configurable by users? I mean, I know that one can configure this middleware in ejabberd_cowboy's protocol opts, but it's rather aimed at MongooseIM's developers, and not operators.

What do you think?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metrics names and meaning must be described in user docs. The module role description may remain here.

%%
%% This middleware needs to run after `mongoose_cowboy_metrics_mw_before' and `cowboy_router'
%% middleware. However, it does not need to run after `cowboy_hander' middleware because metrics are
%% updated in `onresponse' callback which is called whenever the request is sent.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should there be whenever the response is sent instead of request?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thanks!

%%
%% It is executed only if listener's env variable `record_metrics' is set to `true'.
%%
%% This middleware does not create any metrics, they need to be created earlier using
%% `mongoose_metrics' module. Metric names used by the middleware are contructed as follows:
%% - take some `Prefix', a list
%% - take a request method, `Method', one of:
%% - `<<"GET">>'
%% - `<<"HEAD">>'
%% - `<<"POST">>'
%% - `<<"PUT">>'
%% - `<<"DELETE">>'
%% - `<<"OPTIONS">>'
%% - `<<"PATCH">>'
%% - take a request status class, `Class', and create a string like e.g. `<<"2XX">>' for success
%% status codes
%% - for each `Method' and `Class' define following metric names
%% - `Prefix ++ [Method, request, count]' - updated by `1' whenever a request with method `Method'
%% is about to be handled
%% - `Prefix ++ [Method, response, Class, count]' - updated by `1' whenever a response of status
%% class `Class' to a request with method `Method' is sent
%% - `Prefix ++ [Method, response, Class, latency]' - updated by number of microseconds which
%% passed since request timestamp was recorded by `mongoose_cowboy_metrics_mw_before' whenever
%% a response of status class `Class' to a request with method `Method' is sent
%%
%% As you might have already guessed it makes sense to define `count' metrics as spirals, and
%% `latency' metrics as histograms. The middleware will always try to update the metric regardless
%% of whether it was created. Note that it's run after `cowboy_router' middleware, which means that
%% error responses returned by the router (such as 404 for no matching handler) won't be recorded.
%%
%% And what about `Prefix'? By default prefix is the name of the handler handling the
%% request wrapped in a list. However, you might provide `handler_to_metric_prefix' map as Cowboy
%% listener environment value, where keys are handler names and values are corresponding prefixes.
%%
%% You can use functions from `mongoose_cowboy_metrics' module to generate names of metrics recorded
%% by this module.
%%
%%==============================================================================

-module(mongoose_cowboy_metrics_mw_after).

-behaviour(cowboy_middleware).

%% cowboy_middleware callbacks
-export([execute/2]).

%%-------------------------------------------------------------------
%% cowboy_middleware callbacks
%%-------------------------------------------------------------------

execute(Req, Env) ->
case proplists:get_value(record_metrics, Env, false) of
true ->
{req_timestamp, StartTs} = proplists:lookup(req_timestamp, Env),
{handler, Handler} = proplists:lookup(handler, Env),
Method = get_req_method(Req),
HandlerToPrefixMappings = proplists:get_value(handler_to_metric_prefix, Env, #{}),
Prefix = maps:get(Handler, HandlerToPrefixMappings, [Handler]),
mongoose_metrics:update(global, mongoose_cowboy_metrics:request_count_metric(Prefix, Method), 1),
Copy link
Contributor

@michalwski michalwski Jul 9, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to move the request counter update to the before middleware?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately we can't do that 😢 "Before" middleware doesn't know which handler will serve the request, because cowboy_router has not been run yet.

On the other hand we can't merge the middleware, because in case of no matching routes cowboy_router terminates the middleware chain. In those cases we'd lose even the request count metric (currently we lose response count and latency).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, thanks for explanation :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michalwski sorry, I was wrong. We can't report the metric until we know the handler, which means the reporting needs to occur after cowboy_router. However, if we're willing to not report the time which it takes for cowboy_router to execute (i.e. only measure the run time of the handler itself) then we could merge the middleware into one which could be placed after cowboy_router.

OnResponse = on_response_fun(StartTs, Method, Prefix),
{ok, cowboy_req:set([{onresponse, OnResponse}], Req), Env};
false ->
{ok, Req, Env}
end.

%%-------------------------------------------------------------------
%% Internals
%%-------------------------------------------------------------------

-spec on_response_fun(erlang:timestamp(), mongoose_cowboy_metrics:method(),
mongoose_cowboy_metrics:prefix()) -> cowboy:onresponse_fun().
on_response_fun(StartTs, Method, Prefix) ->
fun(Status, _Headers, _Body, RespReq) ->
EndTs = erlang:timestamp(),
Latency = calculate_latency(StartTs, EndTs),
Class = calculate_status_class(Status),
mongoose_metrics:update(global, mongoose_cowboy_metrics:response_count_metric(Prefix, Method, Class), 1),
mongoose_metrics:update(global, mongoose_cowboy_metrics:response_latency_metric(Prefix, Method, Class), Latency),
RespReq
end.

-spec calculate_latency(erlang:timestamp(), erlang:timestamp()) -> Microsecs :: non_neg_integer().
calculate_latency(StartTs, EndTs) ->
timestamp_to_microsecs(EndTs) - timestamp_to_microsecs(StartTs).

-spec timestamp_to_microsecs(erlang:timestamp()) -> Microsecs :: non_neg_integer().
timestamp_to_microsecs({MegaSecs, Secs, MicroSecs}) ->
(MegaSecs * 1000000 + Secs) * 1000000 + MicroSecs.

-spec get_req_method(cowboy_req:req()) -> mongoose_cowboy_metrics:method().
get_req_method(Req) ->
{Method, _} = cowboy_req:method(Req),
Method.

-spec calculate_status_class(100..599) -> mongoose_cowboy_metrics:status_class().
calculate_status_class(S) when S >= 100, S < 200 -> <<"1XX">>;
calculate_status_class(S) when S >= 200, S < 300 -> <<"2XX">>;
calculate_status_class(S) when S >= 300, S < 400 -> <<"3XX">>;
calculate_status_class(S) when S >= 400, S < 500 -> <<"4XX">>;
calculate_status_class(S) when S >= 500, S < 600 -> <<"5XX">>.

46 changes: 46 additions & 0 deletions src/mongoose_cowboy_metrics_mw_before.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
%%==============================================================================
%% Copyright 2016 Erlang Solutions Ltd.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% @doc Cowboy middleware, one of the two responsible for recoding HTTP API metrics
%%
%% The job of this middleware is to record the timestamp of when the request comes in and to set.
%%
%% It's executed only if listener's env variable `record_metrics' is set to `true'.
%%
%% This middleware should be placed as soon as possible in the middleware chain, so that request
%% timestamp of the request will be captured as quickly as possible.
%%
%%==============================================================================

-module(mongoose_cowboy_metrics_mw_before).

-behaviour(cowboy_middleware).

%% cowboy_middleware callbacks
-export([execute/2]).

%%-------------------------------------------------------------------
%% cowboy_middleware callbacks
%%-------------------------------------------------------------------

execute(Req, Env) ->
case proplists:get_value(record_metrics, Env, false) of
true ->
Ts = erlang:timestamp(),
{ok, Req, [{req_timestamp, Ts} | Env]};
false ->
{ok, Req, Env}
end.

Loading