1 Makefile 分析 #
Makefile:
# https://github.com/cilium/ebpf/blob/master/Makefile
# The development version of clang is distributed as the 'clang' binary,
# while stable/released versions have a version number attached.
# Pin the default clang to a stable version.
CLANG ?= clang-14
STRIP ?= llvm-strip-14
OBJCOPY ?= llvm-objcopy-14
CFLAGS := -O2 -g -Wall -Werror $(CFLAGS)
CI_KERNEL_URL ?= https://github.com/cilium/ci-kernels/raw/master/
# Obtain an absolute path to the directory of the Makefile.
# Assume the Makefile is in the root of the repository.
REPODIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
UIDGID := $(shell stat -c '%u:%g' ${REPODIR})
# Prefer podman if installed, otherwise use docker.
# Note: Setting the var at runtime will always override.
CONTAINER_ENGINE ?= $(if $(shell command -v podman), podman, docker)
CONTAINER_RUN_ARGS ?= $(if $(filter ${CONTAINER_ENGINE}, podman), --log-driver=none, --user "${UIDGID}")
IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE)
VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION)
# clang <8 doesn't tag relocs properly (STT_NOTYPE)
# clang 9 is the first version emitting BTF
TARGETS := \
testdata/loader-clang-7 \
testdata/loader-clang-9 \
testdata/loader-$(CLANG) \
testdata/manyprogs \
testdata/btf_map_init \
testdata/invalid_map \
testdata/raw_tracepoint \
testdata/invalid_map_static \
testdata/invalid_btf_map_init \
testdata/strings \
testdata/freplace \
testdata/iproute2_map_compat \
testdata/map_spin_lock \
testdata/subprog_reloc \
testdata/fwd_decl \
testdata/kconfig \
testdata/kconfig_config \
testdata/kfunc \
testdata/invalid-kfunc \
testdata/kfunc-kmod \
btf/testdata/relocs \
btf/testdata/relocs_read \
btf/testdata/relocs_read_tgt \
cmd/bpf2go/testdata/minimal
.PHONY: all clean container-all container-shell generate
.DEFAULT_TARGET = container-all
# Build all ELF binaries using a containerized LLVM toolchain.
container-all:
+${CONTAINER_ENGINE} run --rm -ti ${CONTAINER_RUN_ARGS} \
-v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \
--env CFLAGS="-fdebug-prefix-map=/ebpf=." \
--env HOME="/tmp" \
"${IMAGE}:${VERSION}" \
make all
# (debug) Drop the user into a shell inside the container as root.
container-shell:
${CONTAINER_ENGINE} run --rm -ti \
-v "${REPODIR}":/ebpf -w /ebpf \
"${IMAGE}:${VERSION}"
clean:
-$(RM) testdata/*.elf
-$(RM) btf/testdata/*.elf
format:
find . -type f -name "*.c" | xargs clang-format -i
all: format $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) generate
ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf
ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf
# $BPF_CLANG is used in go:generate invocations.
generate: export BPF_CLANG := $(CLANG)
generate: export BPF_CFLAGS := $(CFLAGS)
generate:
go generate ./...
testdata/loader-%-el.elf: testdata/loader.c
$* $(CFLAGS) -target bpfel -c $< -o $@
$(STRIP) -g $@
testdata/loader-%-eb.elf: testdata/loader.c
$* $(CFLAGS) -target bpfeb -c $< -o $@
$(STRIP) -g $@
%-el.elf: %.c
$(CLANG) $(CFLAGS) -target bpfel -c $< -o $@
$(STRIP) -g $@
%-eb.elf : %.c
$(CLANG) $(CFLAGS) -target bpfeb -c $< -o $@
$(STRIP) -g $@
.PHONY: generate-btf
generate-btf: KERNEL_VERSION?=5.19
generate-btf:
$(eval TMP := $(shell mktemp -d))
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION).bz" -o "$(TMP)/bzImage"
./testdata/extract-vmlinux "$(TMP)/bzImage" > "$(TMP)/vmlinux"
$(OBJCOPY) --dump-section .BTF=/dev/stdout "$(TMP)/vmlinux" /dev/null | gzip > "btf/testdata/vmlinux.btf.gz"
curl -fL "$(CI_KERNEL_URL)/linux-$(KERNEL_VERSION)-selftests-bpf.tgz" -o "$(TMP)/selftests.tgz"
tar -xf "$(TMP)/selftests.tgz" --to-stdout tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.ko | \
$(OBJCOPY) --dump-section .BTF="btf/testdata/btf_testmod.btf" - /dev/null
$(RM) -r "$(TMP)"
- CLANG: clang-14 版本;
- CFLAGS := -O2 -g -Wall -Werror $(CFLAGS), 指定了 -g 后会在 obj 中包含 BTF 信息,支持 CO-RE;
- generate-btf: KERNEL_VERSION?=5.19, 可以从 ci-kernels 下载指定内核的文件,然后使用 llvm-objcopy-14 来从中提取 .BTF Section, 写入 btf/testdata/vmlinux.btf.gz 文件中。后续可以使用: bpftool btf dump 来生成 vmlinux.h 头文件:
zhangjun@lima-learning-ebpf:/Users/zhangjun/go/src/github.com/cilium/ebpf$ gunzip btf/testdata/vmlinux.btf.gz
zhangjun@lima-learning-ebpf:/Users/zhangjun/go/src/github.com/cilium/ebpf$ bpftool btf dump file btf/testdata/vmlinux.btf format c >vmlinux.h
zhangjun@lima-learning-ebpf:/Users/zhangjun/go/src/github.com/cilium/ebpf$ wc -l vmlinux.h
93868 vmlinux.h
eamples/headers 目录下包含 libpf 相关的 headers 和自定义的 common.h header
- update.sh 从指定版本的 libbpf 库下载目录中的各 libbpf headers 文件;
- common.h 时 vmlinux.h 的 compact version,用作 example 中的 C code;
- 它 include 了 bpf_helpers.h, 进而 include 了 bpf_helper_defs.h;
- bpf_endian.h 和 bpf_tracing.h 需要按需自己 include。
$ ls -l examples/headers/
total 204K
-rw-r--r-- 1 zhangjun 1.7K 6 22 21:11 LICENSE.BSD-2-Clause
-rw-r--r-- 1 zhangjun 3.7K 6 22 21:11 bpf_endian.h
-rw-r--r-- 1 zhangjun 149K 6 22 21:11 bpf_helper_defs.h
-rw-r--r-- 1 zhangjun 8.7K 6 22 21:11 bpf_helpers.h
-rw-r--r-- 1 zhangjun 21K 6 22 21:11 bpf_tracing.h
-rw-r--r-- 1 zhangjun 3.5K 6 22 21:11 common.h
-rwxr-xr-x 1 zhangjun 511 6 22 21:11 update.sh*
$ wc -l examples/headers/common.h
148 examples/headers/common.h
fentry.c 示例:https://github.com/cilium/ebpf/blob/master/examples/fentry/fentry.c
- “bpf_endian.h” 和 “bpf_tracing.h” 需要自定义;
- 也可以从 Linux kernel 拷贝一些定义,比如 struct sock_common {}
//go:build ignore
#include "common.h"
#include "bpf_endian.h"
#include "bpf_tracing.h"
#define AF_INET 2
#define TASK_COMM_LEN 16
char __license[] SEC("license") = "Dual MIT/GPL";
/**
* This example copies parts of struct sock_common and struct sock from
* the Linux kernel, but doesn't cause any CO-RE information to be emitted
* into the ELF object. This requires the struct layout (up until the fields
* that are being accessed) to match the kernel's, and the example will break
* or misbehave when this is no longer the case.
*
* Also note that BTF-enabled programs like fentry, fexit, fmod_ret, tp_btf,
* lsm, etc. declared using the BPF_PROG macro can read kernel memory without
* needing to call bpf_probe_read*().
*/
/**
* struct sock_common reflects the start of the kernel's struct sock_common.
* It only contains the fields up until skc_family that are accessed in the
* program, with padding to match the kernel's declaration.
*/
struct sock_common {
union {
struct {
__be32 skc_daddr;
__be32 skc_rcv_saddr;
};
};
union {
// Padding out union skc_hash.
__u32 _;
};
union {
struct {
__be16 skc_dport;
__u16 skc_num;
};
};
short unsigned int skc_family;
};
对应的 go 部分代码:
- -I../headers 指定了 examples/headers 头文件目录。
package main
import (
"bytes"
"encoding/binary"
"errors"
"log"
"net"
"os"
"os/signal"
"syscall"
"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/ringbuf"
"github.com/cilium/ebpf/rlimit"
)
// $BPF_CLANG and $BPF_CFLAGS are set by the Makefile.
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -cflags $BPF_CFLAGS -type event bpf fentry.c -- -I../headers
func main() {
LoadCollectionSpecFromReader() 返回 CollectionSpec 中个 Programs 包含 AttachTo,AttachType 等从 SEC(“xx”)中提取的内容。
// https://github.com/cilium/ebpf/blob/master/elf_reader.go#L60
// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec.
func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) {
f, err := internal.NewSafeELFFile(rd)
if err != nil {
return nil, err
}
。。。
for i, sec := range f.Sections {
idx := elf.SectionIndex(i)
switch {
case strings.HasPrefix(sec.Name, "license"):
licenseSection = sec
case strings.HasPrefix(sec.Name, "version"):
versionSection = sec
case strings.HasPrefix(sec.Name, "maps"):
sections[idx] = newElfSection(sec, mapSection)
case sec.Name == ".maps":
sections[idx] = newElfSection(sec, btfMapSection)
case sec.Name == ".bss" || sec.Name == ".data" || strings.HasPrefix(sec.Name, ".rodata"):
sections[idx] = newElfSection(sec, dataSection)
case sec.Type == elf.SHT_REL:
// Store relocations under the section index of the target
relSections[elf.SectionIndex(sec.Info)] = sec
case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0:
// SEC() 注解的 Program
sections[idx] = newElfSection(sec, programSection)
}
}
。。。
if err := ec.loadRelocations(relSections, symbols); err != nil {
return nil, fmt.Errorf("load relocations: %w", err)
}
if err := ec.loadMaps(); err != nil {
return nil, fmt.Errorf("load maps: %w", err)
}
if err := ec.loadBTFMaps(); err != nil {
return nil, fmt.Errorf("load BTF maps: %w", err)
}
if err := ec.loadDataSections(); err != nil {
return nil, fmt.Errorf("load data sections: %w", err)
}
if err := ec.loadKconfigSection(); err != nil {
return nil, fmt.Errorf("load virtual .kconfig section: %w", err)
}
if err := ec.loadKsymsSection(); err != nil {
return nil, fmt.Errorf("load virtual .ksyms section: %w", err)
}
// SEC() 注解的 Program Section
// Finally, collect programs and link them.
progs, err := ec.loadProgramSections()
if err != nil {
return nil, fmt.Errorf("load programs: %w", err)
}
return &CollectionSpec{ec.maps, progs, btfSpec, ec.ByteOrder}, nil
}
// loadProgramSections iterates ec's sections and emits a ProgramSpec
// for each function it finds.
//
// The resulting map is indexed by function name.
func (ec *elfCode) loadProgramSections() (map[string]*ProgramSpec, error) {
progs := make(map[string]*ProgramSpec)
// Generate a ProgramSpec for each function found in each program section.
var export []string
for _, sec := range ec.sections {
if sec.kind != programSection {
continue
}
if len(sec.symbols) == 0 {
return nil, fmt.Errorf("section %v: missing symbols", sec.Name)
}
// 加载各函数的字节码指令
funcs, err := ec.loadFunctions(sec)
if err != nil {
return nil, fmt.Errorf("section %v: %w", sec.Name, err)
}
// 关键点:从 SEC(XXX) 中获取 Program Type 和 Attach 信息。
// AttachTo 是 kprobe/xx 去掉前缀后的 xx 内容。
progType, attachType, progFlags, attachTo := getProgType(sec.Name)
for name, insns := range funcs {
// 返回的 ProgramSpec 中含有 AttachTo 信息!
spec := &ProgramSpec{
Name: name,
Type: progType,
Flags: progFlags,
AttachType: attachType,
AttachTo: attachTo,
SectionName: sec.Name,
License: ec.license,
KernelVersion: ec.version,
Instructions: insns,
ByteOrder: ec.ByteOrder,
}
// Function names must be unique within a single ELF blob.
if progs[name] != nil {
return nil, fmt.Errorf("duplicate program name %s", name)
}
progs[name] = spec
if spec.SectionName != ".text" {
export = append(export, name)
}
}
}
。。。
}
// https://github.com/cilium/ebpf/blob/master/elf_reader.go#L1185C1-L1287C1
func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) {
types := []struct {
prefix string
progType ProgramType
attachType AttachType
progFlags uint32
}{
// Please update the types from libbpf.c and follow the order of it.
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c
{"socket", SocketFilter, AttachNone, 0},
{"sk_reuseport/migrate", SkReuseport, AttachSkReuseportSelectOrMigrate, 0},
{"sk_reuseport", SkReuseport, AttachSkReuseportSelect, 0},
{"kprobe/", Kprobe, AttachNone, 0},
{"uprobe/", Kprobe, AttachNone, 0},
{"kretprobe/", Kprobe, AttachNone, 0},
{"uretprobe/", Kprobe, AttachNone, 0},
{"tc", SchedCLS, AttachNone, 0},
{"classifier", SchedCLS, AttachNone, 0},
{"action", SchedACT, AttachNone, 0},
{"tracepoint/", TracePoint, AttachNone, 0},
{"tp/", TracePoint, AttachNone, 0},
{"raw_tracepoint/", RawTracepoint, AttachNone, 0},
{"raw_tp/", RawTracepoint, AttachNone, 0},
{"raw_tracepoint.w/", RawTracepointWritable, AttachNone, 0},
{"raw_tp.w/", RawTracepointWritable, AttachNone, 0},
{"tp_btf/", Tracing, AttachTraceRawTp, 0},
{"fentry/", Tracing, AttachTraceFEntry, 0},
{"fmod_ret/", Tracing, AttachModifyReturn, 0},
{"fexit/", Tracing, AttachTraceFExit, 0},
{"fentry.s/", Tracing, AttachTraceFEntry, unix.BPF_F_SLEEPABLE},
{"fmod_ret.s/", Tracing, AttachModifyReturn, unix.BPF_F_SLEEPABLE},
{"fexit.s/", Tracing, AttachTraceFExit, unix.BPF_F_SLEEPABLE},
{"freplace/", Extension, AttachNone, 0},
{"lsm/", LSM, AttachLSMMac, 0},
{"lsm.s/", LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE},
{"iter/", Tracing, AttachTraceIter, 0},
{"iter.s/", Tracing, AttachTraceIter, unix.BPF_F_SLEEPABLE},
{"syscall", Syscall, AttachNone, 0},
{"xdp.frags_devmap/", XDP, AttachXDPDevMap, unix.BPF_F_XDP_HAS_FRAGS},
{"xdp_devmap/", XDP, AttachXDPDevMap, 0},
{"xdp.frags_cpumap/", XDP, AttachXDPCPUMap, unix.BPF_F_XDP_HAS_FRAGS},
{"xdp_cpumap/", XDP, AttachXDPCPUMap, 0},
{"xdp.frags", XDP, AttachNone, unix.BPF_F_XDP_HAS_FRAGS},
{"xdp", XDP, AttachNone, 0},
{"perf_event", PerfEvent, AttachNone, 0},
{"lwt_in", LWTIn, AttachNone, 0},
{"lwt_out", LWTOut, AttachNone, 0},
{"lwt_xmit", LWTXmit, AttachNone, 0},
{"lwt_seg6local", LWTSeg6Local, AttachNone, 0},
{"cgroup_skb/ingress", CGroupSKB, AttachCGroupInetIngress, 0},
{"cgroup_skb/egress", CGroupSKB, AttachCGroupInetEgress, 0},
{"cgroup/skb", CGroupSKB, AttachNone, 0},
{"cgroup/sock_create", CGroupSock, AttachCGroupInetSockCreate, 0},
{"cgroup/sock_release", CGroupSock, AttachCgroupInetSockRelease, 0},
{"cgroup/sock", CGroupSock, AttachCGroupInetSockCreate, 0},
{"cgroup/post_bind4", CGroupSock, AttachCGroupInet4PostBind, 0},
{"cgroup/post_bind6", CGroupSock, AttachCGroupInet6PostBind, 0},
{"cgroup/dev", CGroupDevice, AttachCGroupDevice, 0},
{"sockops", SockOps, AttachCGroupSockOps, 0},
{"sk_skb/stream_parser", SkSKB, AttachSkSKBStreamParser, 0},
{"sk_skb/stream_verdict", SkSKB, AttachSkSKBStreamVerdict, 0},
{"sk_skb", SkSKB, AttachNone, 0},
{"sk_msg", SkMsg, AttachSkMsgVerdict, 0},
{"lirc_mode2", LircMode2, AttachLircMode2, 0},
{"flow_dissector", FlowDissector, AttachFlowDissector, 0},
{"cgroup/bind4", CGroupSockAddr, AttachCGroupInet4Bind, 0},
{"cgroup/bind6", CGroupSockAddr, AttachCGroupInet6Bind, 0},
{"cgroup/connect4", CGroupSockAddr, AttachCGroupInet4Connect, 0},
{"cgroup/connect6", CGroupSockAddr, AttachCGroupInet6Connect, 0},
{"cgroup/sendmsg4", CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0},
{"cgroup/sendmsg6", CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0},
{"cgroup/recvmsg4", CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0},
{"cgroup/recvmsg6", CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0},
{"cgroup/getpeername4", CGroupSockAddr, AttachCgroupInet4GetPeername, 0},
{"cgroup/getpeername6", CGroupSockAddr, AttachCgroupInet6GetPeername, 0},
{"cgroup/getsockname4", CGroupSockAddr, AttachCgroupInet4GetSockname, 0},
{"cgroup/getsockname6", CGroupSockAddr, AttachCgroupInet6GetSockname, 0},
{"cgroup/sysctl", CGroupSysctl, AttachCGroupSysctl, 0},
{"cgroup/getsockopt", CGroupSockopt, AttachCGroupGetsockopt, 0},
{"cgroup/setsockopt", CGroupSockopt, AttachCGroupSetsockopt, 0},
{"struct_ops+", StructOps, AttachNone, 0},
{"sk_lookup/", SkLookup, AttachSkLookup, 0},
{"seccomp", SocketFilter, AttachNone, 0},
{"kprobe.multi", Kprobe, AttachTraceKprobeMulti, 0},
{"kretprobe.multi", Kprobe, AttachTraceKprobeMulti, 0},
}
for _, t := range types {
if !strings.HasPrefix(sectionName, t.prefix) {
continue
}
if !strings.HasSuffix(t.prefix, "/") {
return t.progType, t.attachType, t.progFlags, ""
}
return t.progType, t.attachType, t.progFlags, sectionName[len(t.prefix):]
}
return UnspecifiedProgram, AttachNone, 0, ""
}
然后 bpf2go 生成的 go 文件中包含:
- loadBpf() 返回 *ebpf.CollectionSpec;
- loadBpfObjects() 内部先生成 CollectionSpec,然后再调用他的 spec.LoadAndAssign(obj, opts)
// https://github.com/cilium/ebpf/blob/master/examples/kprobe/bpf_bpfeb.go#L15
// loadBpf returns the embedded CollectionSpec for bpf.
func loadBpf() (*ebpf.CollectionSpec, error) {
reader := bytes.NewReader(_BpfBytes)
spec, err := ebpf.LoadCollectionSpecFromReader(reader)
if err != nil {
return nil, fmt.Errorf("can't load bpf: %w", err)
}
return spec, err
}
// loadBpfObjects loads bpf and converts it into a struct.
//
// The following types are suitable as obj argument:
//
// *bpfObjects
// *bpfPrograms
// *bpfMaps
//
// See ebpf.CollectionSpec.LoadAndAssign documentation for details.
func loadBpfObjects(obj interface{}, opts *ebpf.CollectionOptions) error {
spec, err := loadBpf()
if err != nil {
return err
}
return spec.LoadAndAssign(obj, opts)
}
所以在用户程序中可以调用 loadBpf() 来获得 ebpf.CollectionSpec,然后从中获得 ProgramSpec,中间包括 AttachTo 信息。
2 eBPF C struct 字段定义和转换 #
- C chart 类型会被转换为 go 的 int8 类型;
- C __u8 类型会被转换为 go 的 uint8 类型,即 byte 类型。
所以,如果 C struct 里如果定义的是字符串数组,则应该使用 __u8 类型,例如:
struct event {
pid_t pid;
pid_t ppid;
pid_t tgid;
uid_t uid;
long int cgroup_id;
long type;
long int retval;
long args_count;
unsigned int args_size;
__u8 comm[TASK_COMM_LEN]; // __u8 会被转换为 go 的 uint8,等效为 byte,而 char 被转换为 go 的 int8.
__u8 filename[TASK_FILENAME_LEN];
__u8 args[FULL_MAX_ARGS_ARR];
__u8 envs[FULL_MAX_ENVS_ARR];
};
3 从 ebpf C 类型生成多个 go struct 定义 #
-
在 C 中定义 struct 的同时创建一个 unused 对象,目的是在生产的 object 文件中记录该类型信息:
struct debug { __u8 index; __u8 type; pid_t pid; pid_t ppid; pid_t tgid; long int val; }; struct debug *unused_debug __attribute__((unused)); struct event { pid_t pid; pid_t ppid; pid_t tgid; uid_t uid; long int cgroup_id; __u64 type; long int retval; __u64 args_count; unsigned int args_size; char comm[TASK_COMM_LEN]; char filename[TASK_FILENAME_LEN]; char args[FULL_MAX_ARGS_ARR]; char envs[FULL_MAX_ENVS_ARR]; }; struct event *unused_event __attribute__((unused));
-
在 go 文件中,用 -type 添加用上面 unused 定义的对象 struct 类型名称:
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -type debug -type event -cc $BPF_CLANG -cflags "-O2 -g" bpf exec.bpf.c -- -I ./ -I../headers -g
另外, 在使用 clium/ebpf 的 go generate 来生成 ebpg go 代码文件时, 如果使用了体系结构实现相关的 PT_REGS_PARM* 宏, 需要在 go generate 中明确指定 –target 参数值为对应的 GOARCH 环境变量, 否则编译时报错: “the eBPF is using target specific macros, please provide -target that is not bpf, bpfel or bpfeb”
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -cflags "-O2 -g" --target=$GOARCH bpf nettuple.bpf.c -- -I ./ -I../headers -g
如果没有使用这些体系结构宏, 则可以不指定 –target, 这样同时生成两个 bpfel 和 bpfeb 的 go 文件.
4 加载 btf 文件 #
- 使用 btf.LoadSpec("/path/to/btf/file") 来生产 btfSpec。
- 配置 ebpf.CollectionOptions,在 Programs 中添加 KernelTypes 字段,指定生成的 btfSpec。
- 然后执行 go generate 自动生成 loadBpfObjects(),并在出错时,使用 errors.As() 来判断是否是 *ebpf.VerifierError 类型,如果是的话,打印 load & verify 错误日志。
// 4.19 内核缺少 BTF 信息, 这里使用自定义 btf 文件来支持 CO-RE.
btfSpec, err := btf.LoadSpec(options.BtfPath)
if err != nil {
log.Fatal(err)
}
opts := ebpf.CollectionOptions{
Programs: ebpf.ProgramOptions{
LogLevel: (ebpf.LogLevelBranch | ebpf.LogLevelInstruction | ebpf.LogLevelStats),
LogSize: 1024 * 1024,
KernelTypes: btfSpec,
},
}
if err := loadBpfObjects(&objs, &opts); err != nil {
var ve *ebpf.VerifierError
if errors.As(err, &ve) {
log.Fatalf("verifier error: %+v", ve)
}
log.Fatalf("loading objects: %+v", err)
}
defer objs.Close()
5 打印 load verify error 日志 #
loadBpfObjects 返回出错是,需要使用 errors.As 来将它转换为 *ebpf.VerifierError 类型后,使用 %+v 才能完整显示load/verify 的报错信息;
- https://github.com/cilium/ebpf/discussions/838#discussioncomment-4079434
- 否则,在此之前的报错信息只有一行 2023/07/13 12:06:05 exec.go:58: loading objects: field TracepointSyscallsSysEnterExecve: program tracepoint__syscalls__sys_enter_execve: load program: permission denied: invalid indirect read from stack off -40+28 size 32 (102 line(s) omitted)
示例如下:
// https://pkg.go.dev/github.com/cilium/ebpf#example-VerifierError-RetrieveFullLog
opts := ebpf.CollectionOptions{
Programs: ebpf.ProgramOptions{
LogLevel: (ebpf.LogLevelBranch | ebpf.LogLevelInstruction | ebpf.LogLevelStats),
LogSize: 1024 * 1024,
KernelTypes: btfSpec,
},
}
if err := loadBpfObjects(&objs, &opts); err != nil {
var ve *ebpf.VerifierError
if errors.As(err, &ve) {
log.Fatalf("verifier error: %+v", ve)
}
log.Fatalf("loading objects: %+v", err)
}
6 perf Reader 和 LostSample #
使用 cilium/epbf 创建 perfReader 时,指定的 perCPU buffer 要比 Event 结构大,太小的会导致 perfReader hang 的问题。perfReader 在程序退出时需要 Close() ;
在 Read() 到 record 时,需要先判断是否是 record.LostSamples 大于零,只要当等于 0 时才能用 binary.Read() 来解码:
// int(unsafe.Sizeof(Event{})): https://github.com/cilium/ebpf/issues/559
perfReader, err := perf.NewReader(objs.bpfMaps.Events, os.Getpagesize())
if err != nil {
log.Fatal(err)
}
go func() {
<-stopper
if err := debugPerfReader.Close(); err != nil {
log.Fatalf("closing debug perf event buffer failed: %v", err)
}
if err := perfReader.Close(); err != nil {
log.Fatalf("closing perf event buffer failed: %v", err)
}
os.Exit(1)
}()
var event bpfEvent
for {
record, err := perfReader.Read()
if err != nil {
if errors.Is(err, perf.ErrClosed) {
log.Println("Received signal, exiting..")
return
}
log.Printf("reading from perf event reader error: %v", err)
continue
}
// https://github.com/cilium/ebpf/blob/master/perf/reader.go#L72
if record.LostSamples > 0 {
log.Printf("kernel perf event buff full, losted sample count: %d", record.LostSamples)
} else if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, &event); err != nil {
log.Printf("parsing perf event error: %v", err)
} else {
log.Printf("EVENT: %s\n", event)
}
}