Filebeat 详解
Filebeat是本地文件的日志数据采集器,可监控日志目录或特定日志文件(tail file),并将它们转发给Elasticsearch或Logstatsh进行索引、kafka等。带有内部模块(auditd,Apache,Nginx,System和MySQL),可通过一个指定命令来简化通用日志格式的收集,解析和可视化。
工作原理
Filebeat涉及两个组件:查找器prospector和采集器harvester,来读取文件(tail file)并将事件数据发送到指定的输出。
查找器prospector查找符合条件的文件,采集器harvester负责读取找到的文件内容发送到output。
配置
filebeat.inputs:
# 输入类型,支持 log\stdin\filestream
- type: log
# 为true则表示当前输入成立
enabled: true
# 日志路径,不支持网络路径,只支持本地绝对路径.
paths:
- D:\wamp64\logs\*.log
#- c:\programdata\elasticsearch\logs\*
# 排除日志中特定的行,支持正则表达式
#exclude_lines: ['^DBG']
# 包含日志中特定的行,支持正则表达式
#include_lines: ['^ERR', '^WARN']
# 排除日志中特定的文件,支持正则表达式
#exclude_files: ['.gz$']
# 附加字段
#fields:
# level: debug
# review: 1
# log_type: mysql
# 多行匹配
# 开始匹配字符
# multiline.pattern: ^\[
# 是否以开始匹配字符取反来结束匹配
#multiline.negate: false
#multiline.match: after
# ====== 模块 =========
filebeat.config.modules:
# 模块配置文件
path: ${path.config}/modules.d/*.yml
# 配置文件自动重载时间
#reload.period: 10s
# ====== Elasticsearch template setting ======
setup.template.settings:
index.number_of_shards: 1
#index.codec: best_compression
#_source.enabled: false
# ===== 常规 ======
# The name of the shipper that publishes the network data. It can be used to group
# all the transactions sent by a single shipper in the web interface.
#name:
# The tags of the shipper are included in their own field with each
# transaction published.
#tags: ["service-X", "web-tier"]
# Optional fields that you can specify to add additional information to the
# output.
#fields:
# env: staging
# ====== Dashboards =========
# These settings control loading the sample dashboards to the Kibana index. Loading
# the dashboards is disabled by default and can be enabled either by setting the
# options here or by using the `setup` command.
#setup.dashboards.enabled: false
# The URL from where to download the dashboards archive. By default this URL
# has a value which is computed based on the Beat name and version. For released
# versions, this URL points to the dashboard archive on the artifacts.elastic.co
# website.
#setup.dashboards.url:
# ========= Kibana ========
# Starting with Beats version 6.0.0, the dashboards are loaded via the Kibana API.
# This requires a Kibana endpoint configuration.
setup.kibana:
# Kibana Host
# Scheme and port can be left out and will be set to the default (http and 5601)
# In case you specify and additional path, the scheme is required: http://localhost:5601/path
# IPv6 addresses should always be defined as: https://[2001:db8::1]:5601
#host: "localhost:5601"
# Kibana Space ID
# ID of the Kibana Space into which the dashboards should be loaded. By default,
# the Default Space will be used.
#space.id:
# ======== Elastic Cloud =======
# These settings simplify using Filebeat with the Elastic Cloud (https://cloud.elastic.co/).
# The cloud.id setting overwrites the `output.elasticsearch.hosts` and
# `setup.kibana.host` options.
# You can find the `cloud.id` in the Elastic Cloud web UI.
#cloud.id:
# The cloud.auth setting overwrites the `output.elasticsearch.username` and
# `output.elasticsearch.password` settings. The format is `<user>:<pass>`.
#cloud.auth:
# ========= Outputs ==========
# 日志输出,可选择输入到Elasticsearch或者Logstash
# -------- Elasticsearch Output -----------
#output.elasticsearch:
# Array of hosts to connect to.
#hosts: ["localhost:9200"]
# Protocol - either `http` (default) or `https`.
#protocol: "https"
# Authentication credentials - either API key or username/password.
#api_key: "id:api_key"
#username: "elastic"
#password: "changeme"
# --------- Logstash Output ----------
output.logstash:
# The Logstash hosts
hosts: ["localhost:5044"]
# Optional SSL. By default is off.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# ======== Processors =============
# 采集器设置
processors:
# 添加附加字段
- add_host_metadata:
when.not.contains.tags: forwarded
- add_cloud_metadata: ~
- add_docker_metadata: ~
- add_kubernetes_metadata: ~
# 删除不必要字段
- drop_fields:
fields: ["input_type", "log.offset", "host.name", "input.type", "agent.hostname", "agent.type", "ecs.version", "agent.ephemeral_id", "agent.id", "agent.version", "fields.ics", "log.file.path", "log.flags" ]
# ============== Logging =================
# 日志级别,默认为info,可用日志类型有:error, warning, info, debug
#logging.level: debug
# 调试选择器,可以选择输出哪些组件的日志,使用 ["*"]表示输出所有日志,选择器有"beat""publish", "service"
#logging.selectors: ["*"]
# ========= X-Pack Monitoring =========
# Filebeat can export internal metrics to a central Elasticsearch monitoring
# cluster. This requires xpack monitoring to be enabled in Elasticsearch. The
# reporting is disabled by default.
# Set to true to enable the monitoring reporter.
#monitoring.enabled: false
# Sets the UUID of the Elasticsearch cluster under which monitoring data for this
# Filebeat instance will appear in the Stack Monitoring UI. If output.elasticsearch
# is enabled, the UUID is derived from the Elasticsearch cluster referenced by output.elasticsearch.
#monitoring.cluster_uuid:
# Uncomment to send the metrics to Elasticsearch. Most settings from the
# Elasticsearch output are accepted here as well.
# Note that the settings should point to your Elasticsearch *monitoring* cluster.
# Any setting that is not set is automatically inherited from the Elasticsearch
# output configuration, so if you have the Elasticsearch output configured such
# that it is pointing to your Elasticsearch monitoring cluster, you can simply
# uncomment the following line.
#monitoring.elasticsearch:
# ========== Instrumentation =========
# Instrumentation support for the filebeat.
#instrumentation:
# Set to true to enable instrumentation of filebeat.
#enabled: false
# Environment in which filebeat is running on (eg: staging, production, etc.)
#environment: ""
# APM Server hosts to report instrumentation results to.
#hosts:
# - http://localhost:8200
# API Key for the APM Server(s).
# If api_key is set then secret_token will be ignored.
#api_key:
# Secret token for the APM Server(s).
#secret_token:
# ========== Migration =============
# This allows to enable 6.7 migration aliases
#migration.6_to_7.enabled: true
启动
./filebeat -e -c filebeat.yml -d "*"
延伸
- logstash 无法收集filebeat中的全部日志
删除 data/registry 与 data/registry.old,这两个文件记录了数据的偏移量
- 采集docker日志是报错
2021-02-22T16:41:29.812+0800 INFO [crawler] beater/crawler.go:71 Loading Inputs: 3 2021-02-22T16:41:29.812+0800 WARN [cfgwarn] docker/input.go:49 DEPRECATED: 'docker' input deprecated. Use 'container' input instead. Will be removed in version: 8.0.0
从7.2.0版本开始,已取取消docker类型,可以使用以下格式采集
> filebeat.inputs: > - type: container > paths: > - '/var/lib/docker/containers/*/*.log' > ``` > * 连接出错
2021-02-23T17:38:22.119+0800 ERROR [publisher_pipeline_output] pipeline/output.go:154 Failed to connect to backoff(async(tcp://148.70.118.28:5044)): dial tcp 148.70.118.28:5044: connect: connection refused 2021-02-23T17:38:22.119+0800 INFO [publisher_pipeline_output] pipeline/output.go:145 Attempting to reconnect to backoff(async(tcp://148.70.118.28:5044)) with 7 reconnect attempt(s)
> telnet 试试是否能连通,登录logstash服务器,查看进程是否启动 * 连接成功,但数据推送失败
2021-02-23T17:56:17.266+0800 ERROR [publisher_pipeline_output] pipeline/output.go:180 failed to publish events: write tcp 172.16.215.82:60730->148.70.118.28:5044: write: connection reset by peer 2021-02-23T17:56:17.266+0800 INFO [publisher_pipeline_output] pipeline/output.go:143 Connecting to backoff(async(tcp://148.70.118.28:5044))
* 字段 filebeat会附加所需要收集机器的信息,而实际我们需要的信息只有message字段的内容
{ "@timestamp": "2021-02-24T02:43:25.030Z", "@metadata": { "beat": "filebeat", "type": "_doc", "version": "7.11.1" }, "host": { "architecture": "x86_64", "os": { "kernel": "3.10.0-957.5.1.el7.x86_64", "codename": "Core", "platform": "centos", "version": "7 (Core)", "family": "redhat", "name": "CentOS Linux" }, "name": "iZbp1t7Z", "id": "20190215172108590907433256076310", "containerized": false, "ip": [ "172.16.215.82", "172.17.0.1", "172.18.0.1", "172.19.0.1" ], "mac": [ "00:16:3e:08:d8:74", "02:42:da:8e:cd:4c", "02:42:fe:84:a3:ca", "02:42:09:1e:67:40", "be:27:c6:68:2b:02" ], "hostname": "iZbp1*w1t7Z" }, "agent": { "ephemeral_id": "3ce4e84d-b3ec-4202-8fea-87d292679cc5", "id": "8a3d0020-e670-4250-8b38-90214d98df22", "name": "iZbp1*w1t7Z", "type": "filebeat", "version": "7.11.1", "hostname": "iZbp1*w1t7Z" }, "ecs": { "version": "1.6.0" }, "container": { "name": "pms_api", "id": "d491a5e5024e0c6d2f0284e5cc9b96125ecdee9e8e8c755f696009644319a427", "labels": { "com_docker_compose_oneoff": "False", "org_opencontainers_image_created": "2020-01-14 00:00:00-08:00", "org_label-schema_build-date": "20200114", "com_docker_compose_config-hash": "da95ed189f017e29f515da4f9cb1dca489cede5b1bb6670a6a8db4229566b99d", "org_label-schema_vendor": "CentOS", "org_opencontainers_image_licenses": "GPL-2.0-only", "org_opencontainers_image_vendor": "CentOS", "com_docker_compose_container-number": "1", "com_docker_compose_service": "pms_api", "org_opencontainers_image_title": "CentOS Base Image", "org_label-schema_license": "GPLv2", "org_label-schema_name": "CentOS Base Image", "com_docker_compose_project": "pms", "com_docker_compose_version": "1.18.0", "org_label-schema_schema-version": "1.0" }, "image": { "name": "centos:latest" } }, "log": { "offset": 1118528, "file": { "path": "/var/lib/docker/containers/d491a5e502644319a427/d491a5e502755f696009644319a427-json.log" } }, "stream": "stdout", "message": "172.19.0.1 - [2021-02-24 10:43:25] \"GET /admin/etf/templates?page=1 HTTP/1.0 200 2.984831ms 1109\" \"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36\" \"\"", "input": { "type": "container" } }
可以修改配置文件去除冗余数据
# ================================= Processors ================================= processors: # - add_host_metadata: # when.not.contains.tags: forwarded # - add_cloud_metadata: ~ # - add_docker_metadata: ~ # - add_kubernetes_metadata: ~ - drop_fields: fields: ["input_type", "stream","log", "host","input", "agent", "ecs"]
```