Skip to content

Commit

Permalink
Migrate from ELK to Grafana (#1658)
Browse files Browse the repository at this point in the history
  • Loading branch information
ggrossetie authored Oct 13, 2023
1 parent ef023ac commit 8353366
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 22 deletions.
22 changes: 0 additions & 22 deletions server/ops/elasticsearch/template-logs.json

This file was deleted.

86 changes: 86 additions & 0 deletions server/ops/grafana/grafana-agent.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
metrics:
wal_directory: /tmp/wal
global:
remote_write:
- url: 'https://<prom-instance>.grafana.net/api/prom/push'
basic_auth:
username: '<username>'
password: '<password>'

integrations:
cadvisor:
enabled: true
docker_only: true
instance: 'a-kroki' # must match instance used in logs
relabel_configs:
- action: replace
replacement: integrations/docker
target_label: job
agent:
enabled: true
node_exporter:
enabled: true
enable_collectors:
- processes
disable_collectors:
- ipvs #high cardinality on kubelet
- btrfs
- infiniband
- xfs
- zfs
- mdadm
# exclude dynamic interfaces
netclass_ignored_devices: "^(veth.*|cali.*|[a-f0-9]{15})$"
netdev_device_exclude: "^(veth.*|cali.*|[a-f0-9]{15})$"
# disable tmpfs
filesystem_fs_types_exclude: "^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|tmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|secuurityfs|selinuxfs|squashfs|sysfs|tracefs)$"
include_exporter_metrics: true
# drop extensive scrape statistics
relabel_configs:
- replacement: '<kroki-instance>'
target_label: instance
metric_relabel_configs:
- action: drop
regex: node_scrape_collector_.+
source_labels:
- __name__
- action: keep
regex: node_arp_entries|node_boot_time_seconds|node_context_switches_total|node_cpu_seconds_total|node_disk_io_time_seconds_total|node_disk_io_time_weighted_seconds_total|node_disk__read_bytes_total|node_disk_read_time_seconds_total|node_disk_reads_completed_total|node_disk_write_time_seconds_total|node_disk_writes_completed_total|node_disk_written_bytes_total|node_ffilefd_allocated|node_filefd_maximum|node_filesystem_avail_bytes|node_filesystem_device_error|node_filesystem_files|node_filesystem_files_free|node_filesystem_readonly|node_filesystem_sizee_bytes|node_intr_total|node_load1|node_load15|node_load5|node_md_disks|node_md_disks_required|node_memory_Active_anon_bytes|node_memory_Active_bytes|node_memory_Active_file_bytes|node_memmory_AnonHugePages_bytes|node_memory_AnonPages_bytes|node_memory_Bounce_bytes|node_memory_Buffers_bytes|node_memory_Cached_bytes|node_memory_CommitLimit_bytes|node_memory_Committed_AS_bytees|node_memory_DirectMap1G_bytes|node_memory_DirectMap2M_bytes|node_memory_DirectMap4k_bytes|node_memory_Dirty_bytes|node_memory_HugePages_Free|node_memory_HugePages_Rsvd|node_memory_HugePPages_Surp|node_memory_HugePages_Total|node_memory_Hugepagesize_bytes|node_memory_Inactive_anon_bytes|node_memory_Inactive_bytes|node_memory_Inactive_file_bytes|node_memory_Mapped_bytes|node_memory_MemAvailable_bytes|node_memory_MemFree_bytes|node_memory_MemTotal_bytes|node_memory_SReclaimable_bytes|node_memory_SUnreclaim_bytes|node_memory_ShmemHugePages_bytes|node_memory__ShmemPmdMapped_bytes|node_memory_Shmem_bytes|node_memory_Slab_bytes|node_memory_SwapTotal_bytes|node_memory_VmallocChunk_bytes|node_memory_VmallocTotal_bytes|node_memory_VmallocUsed_bytess|node_memory_WritebackTmp_bytes|node_memory_Writeback_bytes|node_netstat_Icmp6_InErrors|node_netstat_Icmp6_InMsgs|node_netstat_Icmp6_OutMsgs|node_netstat_Icmp_InErrors|node_netstat_Icmp_IInMsgs|node_netstat_Icmp_OutMsgs|node_netstat_IpExt_InOctets|node_netstat_IpExt_OutOctets|node_netstat_TcpExt_ListenDrops|node_netstat_TcpExt_ListenOverflows|node_netstat_TcpExt_TCPSynRetrrans|node_netstat_Tcp_InErrs|node_netstat_Tcp_InSegs|node_netstat_Tcp_OutRsts|node_netstat_Tcp_OutSegs|node_netstat_Tcp_RetransSegs|node_netstat_Udp6_InDatagrams|node_netstat_Udp6_InErrorss|node_netstat_Udp6_NoPorts|node_netstat_Udp6_OutDatagrams|node_netstat_Udp6_RcvbufErrors|node_netstat_Udp6_SndbufErrors|node_netstat_UdpLite_InErrors|node_netstat_Udp_InDatagrams|node_nettstat_Udp_InErrors|node_netstat_Udp_NoPorts|node_netstat_Udp_OutDatagrams|node_netstat_Udp_RcvbufErrors|node_netstat_Udp_SndbufErrors|node_network_carrier|node_network_info|node_network_mttu_bytes|node_network_receive_bytes_total|node_network_receive_compressed_total|node_network_receive_drop_total|node_network_receive_errs_total|node_network_receive_fifo_total|node_networkk_receive_multicast_total|node_network_receive_packets_total|node_network_speed_bytes|node_network_transmit_bytes_total|node_network_transmit_compressed_total|node_network_transmit_drop_tootal|node_network_transmit_errs_total|node_network_transmit_fifo_total|node_network_transmit_multicast_total|node_network_transmit_packets_total|node_network_transmit_queue_length|node_nettwork_up|node_nf_conntrack_entries|node_nf_conntrack_entries_limit|node_os_info|node_sockstat_FRAG6_inuse|node_sockstat_FRAG_inuse|node_sockstat_RAW6_inuse|node_sockstat_RAW_inuse|node_socckstat_TCP6_inuse|node_sockstat_TCP_alloc|node_sockstat_TCP_inuse|node_sockstat_TCP_mem|node_sockstat_TCP_mem_bytes|node_sockstat_TCP_orphan|node_sockstat_TCP_tw|node_sockstat_UDP6_inuse|nnode_sockstat_UDPLITE6_inuse|node_sockstat_UDPLITE_inuse|node_sockstat_UDP_inuse|node_sockstat_UDP_mem|node_sockstat_UDP_mem_bytes|node_sockstat_sockets_used|node_softnet_dropped_total|nodde_softnet_processed_total|node_softnet_times_squeezed_total|node_systemd_unit_state|node_textfile_scrape_error|node_time_zone_offset_seconds|node_timex_estimated_error_seconds|node_timex__maxerror_seconds|node_timex_offset_seconds|node_timex_sync_status|node_uname_info|node_vmstat_oom_kill|node_vmstat_pgfault|node_vmstat_pgmajfault|node_vmstat_pgpgin|node_vmstat_pgpgout|node_vmstat_pswpin|node_vmstat_pswpout|process_max_fds|process_open_fds|node_processes_threads|node_processes_max_threads|node_processes_threads_state|node_processes_pids|node_processes_max_processes|node_forks_total|node_procs_running|node_procs_blocked|node_softirqs_total
source_labels:
- __name__

logs:
configs:
- name: default
positions:
filename: /tmp/positions.yaml
scrape_configs:
- job_name: varlogs
static_configs:
- targets: [localhost]
labels:
job: varlogs
__path__: /var/log/*log
- job_name: integrations/docker
docker_sd_configs:
- host: unix:///var/run/docker.sock
refresh_interval: 5s
relabel_configs:
- action: replace
replacement: integrations/docker
target_label: job
- action: replace
replacement: '<kroki-instance>' # must match instance used in cadvisor
target_label: instance
- source_labels:
- __meta_docker_container_name
regex: '/(.*)'
target_label: container
- source_labels:
- __meta_docker_container_log_stream
target_label: stream
clients:
- url: "https://<loki-instance>.grafana.net/api/prom/push"
basic_auth:
username: '<username>'
password: '<password>'

0 comments on commit 8353366

Please sign in to comment.