Skip to content

Commit

Permalink
Remove useless metrics (#983)
Browse files Browse the repository at this point in the history
* Remove useless metrics

* Remove unused functions
  • Loading branch information
macpie authored Jul 31, 2023
1 parent 2d85748 commit ab766d9
Show file tree
Hide file tree
Showing 14 changed files with 35 additions and 397 deletions.
42 changes: 2 additions & 40 deletions include/metrics.hrl
Original file line number Diff line number Diff line change
@@ -1,61 +1,23 @@
-define(METRICS_TICK_INTERVAL, timer:seconds(10)).
-define(METRICS_TICK, '__router_metrics_tick').

-define(METRICS_DC, "router_dc_balance").
-define(METRICS_SC_OPENED_COUNT, "router_state_channel_opened_count").
-define(METRICS_SC_OVERSPENT_COUNT, "router_state_channel_overspent_count").
-define(METRICS_SC_ACTIVE_COUNT, "router_state_channel_active_count").
-define(METRICS_SC_ACTIVE_BALANCE, "router_state_channel_active_balance").
-define(METRICS_SC_ACTIVE_ACTORS, "router_state_channel_active_actors").
-define(METRICS_SC_CLOSE_CONFLICT, "router_state_channel_close_conflicts").
-define(METRICS_ROUTING_OFFER, "router_device_routing_offer_duration").
-define(METRICS_ROUTING_PACKET, "router_device_routing_packet_duration").
-define(METRICS_PACKET_TRIP, "router_device_packet_trip_duration").
-define(METRICS_PACKET_HOLD_TIME, "router_device_packet_hold_time_duration").
-define(METRICS_PACKET_ERROR, "router_device_packet_error_count").
-define(METRICS_DECODED_TIME, "router_decoder_decoded_duration").
-define(METRICS_FUN_DURATION, "router_function_duration").
-define(METRICS_CONSOLE_API_TIME, "router_console_api_duration").
-define(METRICS_DOWNLINK, "router_device_downlink_packet").
-define(METRICS_CONSOLE_API, "router_console_api_duration").
-define(METRICS_WS, "router_ws_state").
-define(METRICS_CHAIN_BLOCKS, "router_blockchain_blocks").
-define(METRICS_VM_CPU, "router_vm_cpu").
-define(METRICS_VM_PROC_Q, "router_vm_process_queue").
-define(METRICS_VM_ETS_MEMORY, "router_vm_ets_memory").
-define(METRICS_XOR_FILTER, "router_xor_filter").
-define(METRICS_GRPC_CONNECTION_COUNT, "router_grpc_connection_count").
-define(METRICS_SC_CLOSE_SUBMIT, "router_sc_close_submit_count").
-define(METRICS_DEVICE_TOTAL, "router_device_total_gauge").
-define(METRICS_DEVICE_RUNNING, "router_device_running_gauge").

-define(METRICS, [
{?METRICS_DC, prometheus_gauge, [], "Active State Channel balance"},
{?METRICS_SC_OPENED_COUNT, prometheus_gauge, [], "Opened State Channels count"},
{?METRICS_SC_OVERSPENT_COUNT, prometheus_gauge, [], "Overspent State Channels count"},
{?METRICS_SC_ACTIVE_COUNT, prometheus_gauge, [], "Active State Channels count"},
{?METRICS_SC_ACTIVE_BALANCE, prometheus_gauge, [], "Active State Channels balance"},
{?METRICS_SC_ACTIVE_ACTORS, prometheus_gauge, [], "Active State Channels actors"},
{?METRICS_SC_CLOSE_CONFLICT, prometheus_gauge, [], "State Channels close with conflicts"},
{?METRICS_ROUTING_OFFER, prometheus_histogram, [type, status, reason],
"Routing Offer duration"},
{?METRICS_ROUTING_PACKET, prometheus_histogram, [type, status, reason, downlink],
"Routing Packet duration"},
{?METRICS_PACKET_TRIP, prometheus_histogram, [type, downlink], "Packet round trip duration"},
{?METRICS_PACKET_HOLD_TIME, prometheus_histogram, [type], "Packet hold time duration"},
{?METRICS_PACKET_ERROR, prometheus_counter, [type, error], "Packet errors in routing"},
{?METRICS_DECODED_TIME, prometheus_histogram, [type, status], "Decoder decoded duration"},
{?METRICS_FUN_DURATION, prometheus_histogram, [function], "Function duration"},
{?METRICS_CONSOLE_API_TIME, prometheus_histogram, [type, status], "Console API duration"},
{?METRICS_DOWNLINK, prometheus_counter, [type, status], "Downlink count"},
{?METRICS_CONSOLE_API, prometheus_histogram, [type, status], "Console API duration"},
{?METRICS_WS, prometheus_boolean, [], "Websocket State"},
{?METRICS_CHAIN_BLOCKS, prometheus_gauge, [], "Router's blockchain blocks"},
{?METRICS_VM_CPU, prometheus_gauge, [cpu], "Router CPU usage"},
{?METRICS_VM_PROC_Q, prometheus_gauge, [name], "Router process queue"},
{?METRICS_VM_ETS_MEMORY, prometheus_gauge, [name], "Router ets memory"},
{?METRICS_XOR_FILTER, prometheus_counter, [], "Router XOR Filter udpates"},
{?METRICS_GRPC_CONNECTION_COUNT, prometheus_gauge, [], "Number of active GRPC Connections"},
{?METRICS_SC_CLOSE_SUBMIT, prometheus_counter, [status],
"Router state channels close txn status"},
{?METRICS_DEVICE_TOTAL, prometheus_gauge, [], "Device total gauge"},
{?METRICS_DEVICE_RUNNING, prometheus_gauge, [], "Device running gauge"}
]).
1 change: 0 additions & 1 deletion src/cli/router_cli_organization.erl
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ org_unfunded_cmd() ->
]
].


reset_unfunded(_, _, _) ->
Before = router_console_dc_tracker:list_unfunded(),
ok = router_console_dc_tracker:reset_unfunded_from_api(),
Expand Down
11 changes: 2 additions & 9 deletions src/decoders/router_decoder.erl
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,13 @@ delete(ID) ->
UplinkDetails :: map()
) -> {ok, any()} | {error, any()}.
decode(DecoderID, Payload, Port, UplinkDetails) ->
Start = erlang:system_time(millisecond),
try decode_(DecoderID, Payload, Port, UplinkDetails) of
{Type, {ok, _} = OK} ->
End = erlang:system_time(millisecond),
ok = router_metrics:decoder_observe(Type, ok, End - Start),
{_Type, {ok, _} = OK} ->
OK;
{Type, {error, _} = Err} ->
End = erlang:system_time(millisecond),
ok = router_metrics:decoder_observe(Type, error, End - Start),
{_Type, {error, _} = Err} ->
Err
catch
_Class:_Reason:_Stacktrace ->
End = erlang:system_time(millisecond),
ok = router_metrics:decoder_observe(decoder_crashed, error, End - Start),
lager:error("decoder ~p crashed: ~p (~p) stacktrace ~p", [
DecoderID,
_Reason,
Expand Down
7 changes: 0 additions & 7 deletions src/device/router_device_channels_worker.erl
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,8 @@ frame_timeout(Pid, UUID, BalanceNonce) ->

-spec handle_console_downlink(binary(), map(), router_channel:channel(), first | last) -> ok.
handle_console_downlink(DeviceID, MapPayload, Channel, Position) ->
{ChannelHandler, _} = router_channel:handler(Channel),
case router_devices_sup:maybe_start_worker(DeviceID, #{}) of
{error, _Reason} ->
ok = router_metrics:downlink_inc(ChannelHandler, error),
Desc = io_lib:format("Failed to queue downlink (worker failed): ~p", [_Reason]),
ok = maybe_report_downlink_dropped(DeviceID, Desc, Channel),
lager:info("failed to start/find device ~p: ~p", [DeviceID, _Reason]);
Expand All @@ -118,7 +116,6 @@ handle_console_downlink(DeviceID, MapPayload, Channel, Position) ->
lager:info("clearing device queue because downlink payload from console"),
router_device_worker:clear_queue(Pid);
{ok, {Confirmed, Port, Region, Payload}} ->
ok = router_metrics:downlink_inc(ChannelHandler, ok),
router_device_worker:queue_downlink(
Pid,
#downlink{
Expand All @@ -135,7 +132,6 @@ handle_console_downlink(DeviceID, MapPayload, Channel, Position) ->
_Reason
]),
ok = maybe_report_downlink_dropped(DeviceID, Desc, Channel),
ok = router_metrics:downlink_inc(ChannelHandler, error),
lager:debug("could not parse json downlink message ~p for ~p", [
_Reason,
DeviceID
Expand Down Expand Up @@ -264,13 +260,11 @@ handle_cast(
{handle_downlink, BinaryPayload, Channel},
#state{device_worker = DeviceWorker} = State
) ->
{ChannelHandler, _} = router_channel:handler(Channel),
case downlink_decode(BinaryPayload) of
{ok, clear_queue} ->
lager:info("clearing device queue because downlink payload"),
router_device_worker:clear_queue(DeviceWorker);
{ok, {Confirmed, Port, Region, Payload}} ->
ok = router_metrics:downlink_inc(ChannelHandler, ok),
ok = router_device_worker:queue_downlink(DeviceWorker, #downlink{
confirmed = Confirmed,
port = Port,
Expand All @@ -279,7 +273,6 @@ handle_cast(
region = Region
});
{error, _Reason} ->
ok = router_metrics:downlink_inc(ChannelHandler, error),
lager:debug("could not parse json downlink message ~p", [_Reason])
end,
{noreply, State};
Expand Down
50 changes: 4 additions & 46 deletions src/device/router_device_routing.erl
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,9 @@ handle_offer(Offer, HandlerPid) ->
false ->
{error, deprecated};
true ->
Start = erlang:system_time(millisecond),
Routing = blockchain_state_channel_offer_v1:routing(Offer),
{OfferCheckTime, OfferCheck} = timer:tc(fun offer_check/1, [Offer]),
Resp =
case OfferCheck of
case offer_check(Offer) of
{error, _} = Error0 ->
Error0;
ok ->
Expand All @@ -140,18 +138,8 @@ handle_offer(Offer, HandlerPid) ->
packet_offer(Offer)
end
end,
End = erlang:system_time(millisecond),
erlang:spawn(fun() ->
ok = router_metrics:function_observe(
'router_device_routing:offer_check', OfferCheckTime
),
ok = router_metrics:packet_trip_observe_start(
blockchain_state_channel_offer_v1:packet_hash(Offer),
blockchain_state_channel_offer_v1:hotspot(Offer),
Start
),
ok = print_handle_offer_resp(Offer, HandlerPid, Resp),
ok = handle_offer_metrics(Routing, Resp, End - Start)
ok = print_handle_offer_resp(Offer, HandlerPid, Resp)
end),
case Resp of
{ok, Device} ->
Expand Down Expand Up @@ -906,7 +894,6 @@ packet(
end
end;
{error, api_not_found} ->
router_metrics:packet_routing_error(join, api_not_found),
lager:debug(
[{app_eui, AppEUI}, {dev_eui, DevEUI}],
"no key for ~p ~p received by ~s",
Expand All @@ -918,7 +905,6 @@ packet(
),
{error, undefined_app_key};
{error, _Reason} ->
router_metrics:packet_routing_error(join, bad_mic),
lager:debug(
[{app_eui, AppEUI}, {dev_eui, DevEUI}],
"Device ~s with AppEUI ~s tried to join through ~s " ++
Expand Down Expand Up @@ -1050,7 +1036,6 @@ send_to_device_worker(
undefined ->
case find_device(PubKeyBin, DevAddr, MIC, Payload) of
{error, unknown_device} ->
router_metrics:packet_routing_error(packet, device_not_found),
lager:warning(
"unable to find device for packet [devaddr: ~p / ~p] [gateway: ~p]",
[
Expand Down Expand Up @@ -1219,10 +1204,8 @@ get_device_for_offer(Offer, DevAddr, PubKeyBin) ->
PubKeyBin :: libp2p_crypto:pubkey_bin()
) -> [router_device:device()].
get_and_sort_devices(DevAddr, PubKeyBin) ->
{Time1, Devices0} = timer:tc(router_device_cache, get_by_devaddr, [DevAddr]),
router_metrics:function_observe('router_device_cache:get_by_devaddr', Time1),
Devices1 = router_device_devaddr:sort_devices(Devices0, PubKeyBin),
Devices1.
Devices0 = router_device_cache:get_by_devaddr(DevAddr),
router_device_devaddr:sort_devices(Devices0, PubKeyBin).

-spec get_device_by_mic(binary(), binary(), [router_device:device()]) ->
{router_device:device(), binary(), non_neg_integer()} | undefined.
Expand Down Expand Up @@ -1384,26 +1367,6 @@ maybe_start_worker(DeviceID) ->
WorkerID = router_devices_sup:id(DeviceID),
router_devices_sup:maybe_start_worker(WorkerID, #{}).

-spec handle_offer_metrics(
#routing_information_pb{},
{ok, router_device:device()} | {error, any()},
non_neg_integer()
) -> ok.
handle_offer_metrics(#routing_information_pb{data = {eui, _}}, {ok, _}, Time) ->
ok = router_metrics:routing_offer_observe(join, accepted, accepted, Time);
handle_offer_metrics(#routing_information_pb{data = {eui, _}}, {error, Reason}, Time) ->
ok = router_metrics:routing_offer_observe(join, rejected, Reason, Time);
handle_offer_metrics(#routing_information_pb{data = {devaddr, _}}, {ok, _}, Time) ->
ok = router_metrics:routing_offer_observe(packet, accepted, accepted, Time);
handle_offer_metrics(
#routing_information_pb{data = {devaddr, _}},
{error, ?DEVADDR_NOT_IN_SUBNET},
Time
) ->
ok = router_metrics:routing_offer_observe(packet, rejected, ?DEVADDR_NOT_IN_SUBNET, Time);
handle_offer_metrics(#routing_information_pb{data = {devaddr, _}}, {error, Reason}, Time) ->
ok = router_metrics:routing_offer_observe(packet, rejected, Reason, Time).

-spec reason_to_single_atom(any()) -> any().
reason_to_single_atom(Reason) ->
case Reason of
Expand Down Expand Up @@ -1502,9 +1465,6 @@ handle_join_offer_test() ->
meck:expect(blockchain_worker, blockchain, fun() -> chain end),
meck:new(router_console_dc_tracker, [passthrough]),
meck:expect(router_console_dc_tracker, has_enough_dc, fun(_, _) -> {ok, orgid, 0, 1} end),
meck:new(router_metrics, [passthrough]),
meck:expect(router_metrics, routing_offer_observe, fun(_, _, _, _) -> ok end),
meck:expect(router_metrics, function_observe, fun(_, _) -> ok end),
meck:new(router_devices_sup, [passthrough]),
meck:expect(router_devices_sup, maybe_start_worker, fun(_, _) -> {ok, self()} end),

Expand All @@ -1527,8 +1487,6 @@ handle_join_offer_test() ->
meck:unload(blockchain_worker),
?assert(meck:validate(router_console_dc_tracker)),
meck:unload(router_console_dc_tracker),
?assert(meck:validate(router_metrics)),
meck:unload(router_metrics),
?assert(meck:validate(router_devices_sup)),
meck:unload(router_devices_sup),
ets:delete(?BF_ETS),
Expand Down
5 changes: 1 addition & 4 deletions src/device/router_device_worker.erl
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,6 @@ handle_cast(
) ->
PHash = blockchain_helium_packet_v1:packet_hash(Packet0),
lager:debug("got join packet (~p) ~p", [PHash, lager:pr(Packet0, blockchain_helium_packet_v1)]),
ok = router_metrics:packet_hold_time_observe(join, HoldTime),
%% TODO we should really just call this once per join nonce
%% and have a seperate function for getting the join nonce so we can check
%% the cache
Expand Down Expand Up @@ -698,15 +697,14 @@ handle_cast(
end
end;
handle_cast(
{frame, _NwkSKey, PacketFCnt, Packet, PacketTime, HoldTime, PubKeyBin, _Region, _Pid},
{frame, _NwkSKey, PacketFCnt, Packet, PacketTime, _HoldTime, PubKeyBin, _Region, _Pid},
#state{
device = Device,
db = DB,
cf = CF,
is_active = false
} = State
) ->
ok = router_metrics:packet_hold_time_observe(packet, HoldTime),
PHash = blockchain_helium_packet_v1:packet_hash(Packet),
ok = router_device_multibuy:max(PHash, 0),
ok = router_utils:event_uplink_dropped_device_inactive(
Expand Down Expand Up @@ -735,7 +733,6 @@ handle_cast(
cf = CF
} = State
) ->
ok = router_metrics:packet_hold_time_observe(packet, HoldTime),
MetricPacketType =
case Disco of
true -> discovery_packet;
Expand Down
6 changes: 2 additions & 4 deletions src/grpc/helium_packet_service.erl
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,10 @@ route(#envelope_up_v1_pb{data = {packet, PacketUp}}, StreamState) ->
Self = self(),
erlang:spawn(fun() ->
SCPacket = to_sc_packet(PacketUp),
{Time, _} = timer:tc(router_device_routing, handle_free_packet, [
router_device_routing:handle_free_packet(
SCPacket, erlang:system_time(millisecond), Self
]),
router_metrics:function_observe('router_device_routing:handle_free_packet', Time)
)
end),

{ok, StreamState}
end;
route(_EnvUp, StreamState) ->
Expand Down
5 changes: 2 additions & 3 deletions src/grpc/helium_router_service.erl
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@ route(Ctx, #blockchain_state_channel_message_v1_pb{msg = {packet, SCPacket}} = _
true ->
%% handle the packet and then await a response
%% if no response within given time, then give up and return error
{Time, _} = timer:tc(router_device_routing, handle_free_packet, [
router_device_routing:handle_free_packet(
SCPacket, erlang:system_time(millisecond), self()
]),
router_metrics:function_observe('router_device_routing:handle_free_packet', Time),
),
wait_for_response(Ctx)
end.

Expand Down
Loading

0 comments on commit ab766d9

Please sign in to comment.