Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

revert xds TTL on resources #80

Merged
merged 1 commit into from
Mar 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -135,4 +135,6 @@ test-e2e:
-endpoint.checkPeriod=1s \
-ssl.rotation=1s \
-log.level=INFO \
-log.pretty
-log.pretty

make clean
3 changes: 0 additions & 3 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ const (
AppName = "envoy-control-plane"
sslRotationPeriodDefault = 1 * time.Hour
endpointCheckPeriodDefault = 60 * time.Second
endpointTTLDefault = 30 * time.Second
configDrainPeriodDefault = 5 * time.Second
)

Expand All @@ -49,7 +48,6 @@ type Type struct {
NodeZoneLabel *string `yaml:"nodeZoneLabel"`
ConfigDrainPeriod *time.Duration `yaml:"configDrainPeriod"`
EndpointCheckPeriod *time.Duration `yaml:"endpointCheckPeriod"`
EndpointTTL *time.Duration `yaml:"endpointTtl"`
SentryDSN *string `yaml:"sentryDsn"`
SSLName *string `yaml:"sslName"`
SSLCrt *string `yaml:"sslCrt"`
Expand Down Expand Up @@ -77,7 +75,6 @@ var config = Type{
NodeZoneLabel: flag.String("node.label.zone", "topology.kubernetes.io/zone", "node label region"),
ConfigDrainPeriod: flag.Duration("config.drainPeriod", configDrainPeriodDefault, "drain period"),
EndpointCheckPeriod: flag.Duration("endpoint.checkPeriod", endpointCheckPeriodDefault, "check period"),
EndpointTTL: flag.Duration("endpoint.ttl", endpointTTLDefault, "xDS TTL"),
SentryDSN: flag.String("sentry.dsn", "", "sentry DSN"),
SSLName: flag.String("ssl.name", "envoy_control_plane_default", "name of certificate in envoy secrets"), //nolint:lll
SSLCrt: flag.String("ssl.crt", "", "path to CA cert"),
Expand Down
5 changes: 0 additions & 5 deletions pkg/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ package config_test

import (
"testing"
"time"

"github.com/maksim-paskal/envoy-control-plane/pkg/config"
)
Expand All @@ -29,8 +28,4 @@ func TestConfig(t *testing.T) {
if want := "/some/test/path"; *config.Get().KubeConfigFile != want {
t.Fatalf("KubeConfigFile != %s", want)
}

if want := 3 * time.Second; *config.Get().EndpointTTL != want {
t.Fatalf("EndpointTTL != %s", want)
}
}
1 change: 0 additions & 1 deletion pkg/config/config_test.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
kubeConfigFile: /some/test/path
endpointTtl: 3s
11 changes: 4 additions & 7 deletions pkg/configstore/configStore.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ type ConfigStore struct {
Version string
Config *appConfig.ConfigType
configEndpoints map[string][]*endpoint.LocalityLbEndpoints
lastEndpoints []types.ResourceWithTTL
lastEndpoints []types.Resource
lastEndpointsArray []string
log *log.Entry
mutex sync.Mutex
Expand Down Expand Up @@ -151,7 +151,7 @@ func (cs *ConfigStore) getConfigEndpoints() (map[string][]*endpoint.LocalityLbEn
lbEndpoints := make(map[string][]*endpoint.LocalityLbEndpoints)

for _, ep := range endpoints {
fixed, ok := ep.Resource.(*endpoint.ClusterLoadAssignment)
fixed, ok := ep.(*endpoint.ClusterLoadAssignment)
if !ok {
cs.log.WithError(errAssertion).Fatal("ep.(*endpoint.ClusterLoadAssignment)")
}
Expand Down Expand Up @@ -259,7 +259,7 @@ func (cs *ConfigStore) saveLastEndpoints() {
}

isInvalidIP := false
publishEp := []types.ResourceWithTTL{}
publishEp := []types.Resource{}
publishEpArray := []string{} // for reflect.DeepEqual

for clusterName, ep := range lbEndpoints {
Expand Down Expand Up @@ -290,10 +290,7 @@ func (cs *ConfigStore) saveLastEndpoints() {
Endpoints: ep,
}

publishEp = append(publishEp, types.ResourceWithTTL{
Resource: &clusterLoadAssignment,
TTL: appConfig.Get().EndpointTTL,
})
publishEp = append(publishEp, &clusterLoadAssignment)
}

if isInvalidIP {
Expand Down
9 changes: 1 addition & 8 deletions pkg/controlplane/controlPlane.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,7 @@ const (
grpcMaxConcurrentStreams = 1000000
)

// SnapshotCache create cache with heartbeat responses for resources with a TTL.
var SnapshotCache cache.SnapshotCache = cache.NewSnapshotCacheWithHeartbeating(
context.Background(),
false,
cache.IDHash{},
&Logger{},
*config.Get().EndpointTTL,
)
var SnapshotCache cache.SnapshotCache = cache.NewSnapshotCache(false, cache.IDHash{}, &Logger{})

var grpcServer *grpc.Server

Expand Down
26 changes: 13 additions & 13 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import (
"google.golang.org/protobuf/types/known/anypb"
)

func GetConfigSnapshot(version string, configType *config.ConfigType, endpoints []types.ResourceWithTTL, commonSecrets []tls.Secret) (cache.Snapshot, error) { //nolint: lll
func GetConfigSnapshot(version string, configType *config.ConfigType, endpoints []types.Resource, commonSecrets []tls.Secret) (cache.Snapshot, error) { //nolint: lll
clusters, err := YamlToResources(configType.Clusters, cluster.Cluster{})
if err != nil {
return cache.Snapshot{}, err
Expand Down Expand Up @@ -63,21 +63,21 @@ func GetConfigSnapshot(version string, configType *config.ConfigType, endpoints
}

for i := range commonSecrets {
secrets = append(secrets, types.ResourceWithTTL{Resource: &commonSecrets[i]})
secrets = append(secrets, &commonSecrets[i])
}

resources := make(map[string][]types.ResourceWithTTL)
resources := make(map[string][]types.Resource)

resources[resource.ClusterType] = clusters
resources[resource.RouteType] = routes
resources[resource.ListenerType] = listiners
resources[resource.SecretType] = secrets
resources[resource.EndpointType] = endpoints

return cache.NewSnapshotWithTTLs(version, resources)
return cache.NewSnapshot(version, resources)
}

func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.ResourceWithTTL, error) {
func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.Resource, error) {
if len(yamlObj) == 0 {
return nil, nil
}
Expand All @@ -96,7 +96,7 @@ func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.Resour
return nil, errors.Wrap(err, "json.Unmarshal(jsonObj, &resources)")
}

results := make([]types.ResourceWithTTL, len(resources))
results := make([]types.Resource, len(resources))

for k, v := range resources {
resourcesJSON, err := utils.GetJSONfromYAML(v)
Expand All @@ -115,7 +115,7 @@ func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.Resour
return nil, errors.Wrap(err, "cluster.Cluster")
}

results[k] = types.ResourceWithTTL{Resource: &resource}
results[k] = &resource

case route.RouteConfiguration:
resource := route.RouteConfiguration{}
Expand All @@ -127,7 +127,7 @@ func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.Resour
return nil, errors.Wrap(err, "route.RouteConfiguration")
}

results[k] = types.ResourceWithTTL{Resource: &resource}
results[k] = &resource
case endpoint.ClusterLoadAssignment:
resource := endpoint.ClusterLoadAssignment{}
err = protojson.Unmarshal(resourcesJSON, &resource)
Expand All @@ -138,7 +138,7 @@ func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.Resour
return nil, errors.Wrap(err, "endpoint.ClusterLoadAssignment")
}

results[k] = types.ResourceWithTTL{Resource: &resource}
results[k] = &resource
case listener.Listener:
resource := listener.Listener{}
err = protojson.Unmarshal(resourcesJSON, &resource)
Expand All @@ -149,7 +149,7 @@ func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.Resour
return nil, errors.Wrap(err, "listener.Listener")
}

results[k] = types.ResourceWithTTL{Resource: &resource}
results[k] = &resource
case tls.Secret:
resource := tls.Secret{}
err = protojson.Unmarshal(resourcesJSON, &resource)
Expand All @@ -160,7 +160,7 @@ func YamlToResources(yamlObj []interface{}, outType interface{}) ([]types.Resour
return nil, errors.Wrap(err, "tls.Secret")
}

results[k] = types.ResourceWithTTL{Resource: &resource}
results[k] = &resource
default:
return nil, errUnknownClass
}
Expand Down Expand Up @@ -231,9 +231,9 @@ func NewSecrets(dnsName string, validation interface{}) ([]tls.Secret, error) {
}

// remove require_client_certificate from all listeners.
func filterCertificates(listiners []types.ResourceWithTTL) error {
func filterCertificates(listiners []types.Resource) error {
for _, listiner := range listiners {
c, ok := listiner.Resource.(*listener.Listener)
c, ok := listiner.(*listener.Listener)
if !ok {
return errUnknownClass
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/utils/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ func TestGetConfigSnapshot(t *testing.T) {
t.Parallel()

c := config.ConfigType{}
r := []types.ResourceWithTTL{}
r := []types.Resource{}
s := []tls.Secret{}

e := endpoint.ClusterLoadAssignment{
ClusterName: "clusterName",
}

r = append(r, types.ResourceWithTTL{Resource: &e})
r = append(r, &e)

version := uuid.New().String()

Expand Down
84 changes: 84 additions & 0 deletions utils/envoy-debug/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# How to debug Envoy exceptions

## 1. Coredumps

It's most efficient way to debug an exception - to enable it - you need to

### 1a. Prepare env

```bash
# update soft limits on the system
ulimit -S -c unlimited

# locate path for core dumps
sysctl -w kernel.core_pattern='/envoy/core-%e.%p.%h.%t'
```

### 1b. Run envoy with debug symbols

You need run envoy with debug symbols - for example docker image with debug sympols `envoyproxy/envoy-debug:<envoy-version>`

after next exception linux will create coredump

## 2. For example envoy return exception trace

```log
[2022-02-14 23:28:53.199][22][critical][main] [source/exe/terminate_handler.cc:12] std::terminate called! (possible uncaught exception, see trace)
[2022-02-14 23:28:53.199][22][critical][backtrace] [./source/server/backtrace.h:91] Backtrace (use tools/stack_decode.py to get line numbers):
[2022-02-14 23:28:53.199][22][critical][backtrace] [./source/server/backtrace.h:92] Envoy version: a9d72603c68da3a10a1c0d021d01c7877e6f2a30/1.21.0/Clean/RELEASE/BoringSSL
[2022-02-14 23:28:53.218][22][critical][backtrace] [./source/server/backtrace.h:96] #0: Envoy::TerminateHandler::logOnTerminate()::$_0::operator()() [0x55d953a74f0e]
[2022-02-14 23:28:53.228][22][critical][backtrace] [./source/server/backtrace.h:98] #1: [0x55d953a74dd9]
[2022-02-14 23:28:53.237][22][critical][backtrace] [./source/server/backtrace.h:96] #2: std::__terminate() [0x55d953f25433]
[2022-02-14 23:28:53.246][22][critical][backtrace] [./source/server/backtrace.h:96] #3: std::__1::__function::__func<>::operator()() [0x55d9535f67a3]
[2022-02-14 23:28:53.258][22][critical][backtrace] [./source/server/backtrace.h:96] #4: event_process_active_single_queue [0x55d953915220]
[2022-02-14 23:28:53.268][22][critical][backtrace] [./source/server/backtrace.h:96] #5: event_base_loop [0x55d953913f11]
[2022-02-14 23:28:53.278][22][critical][backtrace] [./source/server/backtrace.h:96] #6: Envoy::Server::InstanceImpl::run() [0x55d95318261c]
[2022-02-14 23:28:53.287][22][critical][backtrace] [./source/server/backtrace.h:96] #7: Envoy::MainCommonBase::run() [0x55d951d5cd64]
[2022-02-14 23:28:53.297][22][critical][backtrace] [./source/server/backtrace.h:96] #8: Envoy::MainCommon::main() [0x55d951d5d5d6]
[2022-02-14 23:28:53.307][22][critical][backtrace] [./source/server/backtrace.h:96] #9: main [0x55d951d5979c]
[2022-02-14 23:28:53.310][22][critical][backtrace] [./source/server/backtrace.h:96] #10: __libc_start_main [0x7f4eda35dbf7]
[2022-02-14 23:28:53.310][22][critical][backtrace] [./source/server/backtrace.h:104] Caught Aborted, suspect faulting address 0x6500000016
[2022-02-14 23:28:53.310][22][critical][backtrace] [./source/server/backtrace.h:91] Backtrace (use tools/stack_decode.py to get line numbers):
[2022-02-14 23:28:53.310][22][critical][backtrace] [./source/server/backtrace.h:92] Envoy version: a9d72603c68da3a10a1c0d021d01c7877e6f2a30/1.21.0/Clean/RELEASE/BoringSSL
[2022-02-14 23:28:53.310][22][critical][backtrace] [./source/server/backtrace.h:96] #0: __restore_rt [0x7f4eda73f980]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:98] #1: [0x55d953a74dd9]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #2: std::__terminate() [0x55d953f25433]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #3: std::__1::__function::__func<>::operator()() [0x55d9535f67a3]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #4: event_process_active_single_queue [0x55d953915220]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #5: event_base_loop [0x55d953913f11]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #6: Envoy::Server::InstanceImpl::run() [0x55d95318261c]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #7: Envoy::MainCommonBase::run() [0x55d951d5cd64]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #8: Envoy::MainCommon::main() [0x55d951d5d5d6]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #9: main [0x55d951d5979c]
[2022-02-14 23:28:53.320][22][critical][backtrace] [./source/server/backtrace.h:96] #10: __libc_start_main [0x7f4eda35dbf7]
```

### 2a. Find the static address of the entry of Envoy::MainCommon::main()

```bash
objdump -Cd /usr/local/bin/envoy | fgrep <main> -A 20
```

for example

```log
fef797: e8 84 3d 00 00 callq ff3520 <Envoy::MainCommon::main(int, char**, std::__1::function<void (Envoy::Server::Instance&)>)>
```

static address of `Envoy::MainCommon::main()` will be `fef797` = `0xfef797`

### 2b. Compute the static address of exception 0x55d9535f67a3

we also need `main` address from exception trace = `0x55d951d5979c` and the static address of the entry of Envoy::MainCommon::main() = `0xfef797`

```bash
python3 -c 'print(hex(0x55d9535f67a3-0x55d951d5979c+0xfef797))'
# result: 0x288c79e
```

### 2c. Use addr2line get the line of the code

```bash
addr2line -Ce /usr/local/bin/envoy 0x288c79e
# result: /proc/self/cwd/source/common/config/ttl.cc:30
```